In [4]:
import boto3
import re
import os
import numpy as np
import pandas as pd
from sagemaker import get_execution_role
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import CSVDeserializer
import roles
import sagemaker as sage
from time import gmtime, strftime

ModuleNotFoundError: No module named 'boto3'

In [5]:
#Set to True if you want to train before deploy
TRAIN = False

### AWS connection startup

In [6]:
role = f"arn:aws:iam::{roles.account_ID}:role/{roles.SageMakerExecutionRole}"
sess = sage.Session()

NameError: name 'roles' is not defined

---

### Sample data upload to S3

In [None]:
WORK_DIRECTORY = "../src/data"
prefix = "DEMO-DATA"
data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=prefix)

---

### TRAINING

To create the estimator we need:
* an ECR image
* a role
* an instance_type
* and output_path
* a session

In [None]:
account = sess.boto_session.client("sts").get_caller_identity()["Account"]
region = sess.boto_session.region_name
image = "{}.dkr.ecr.{}.amazonaws.com/sagemaker-deploy-terraform:latest".format(account, region)
instance_type = "ml.c4.2xlarge"
output_path = "s3://{}/output".format(sess.default_bucket())

In [None]:
# # we use the Hyperparameter Tuner
# from sagemaker.tuner import IntegerParameter

# # Define exploration boundaries
# hyperparameter_ranges = {
#     "n-estimators": IntegerParameter(20, 100),
#     "min-samples-leaf": IntegerParameter(2, 6),
# }

# # create Optimizer
# Optimizer = sagemaker.tuner.HyperparameterTuner(
#     estimator=sagemaker_model,
#     hyperparameter_ranges=hyperparameter_ranges,
#     base_tuning_job_name="RF-tuner",
#     objective_type="Minimize",
#     objective_metric_name="median-AE",
#     metric_definitions=[
#         {"Name": "median-AE", "Regex": "AE-at-50th-percentile: ([0-9.]+).*$"} # Hay que hacer que el script de entrenamiento printee
#     ],  # extract tracked metric from logs with regexp
#     max_jobs=10,
#     max_parallel_jobs=2,
# )


# Optimizer.fit({"train": trainpath, "test": testpath})


# get tuner results in a df
# results = Optimizer.analytics().dataframe()
# while results.empty:
#     time.sleep(1)
#     results = Optimizer.analytics().dataframe()
# results.head()

In [None]:
if TRAIN:
    sagemaker_model = sage.estimator.Estimator(
    image,
    role,
    1,
    instance_type,
    output_path=output_path,
    sagemaker_session=sess,
    container_port=8080
)
    sagemaker_model.fit(data_location)
    # estimator.fit({"training": "s3://bucket/path/to/training/data", 
    #            "testing": "s3://bucket/path/to/testing/data"})
else:
    # Define your trained model artifact path
    model_artifact = 's3://sagemaker-us-east-2-169385451286/output/sagemaker-deploy-terraform-2023-09-06-00-09-13-615/output/model.tar.gz'
    
    # Create a SageMaker Model object
    sagemaker_model = sage.Model(
        model_data=model_artifact,
        role=role,
        image_uri=image
    )

---

### HOSTING

Deploying process may take some minutes, you can check the status in your aws sagemaker client

In [None]:
predictor = sagemaker_model.deploy(1, "ml.m4.xlarge", serializer=CSVSerializer(), deserializer=CSVDeserializer())

---

### Sample trial

In [None]:
shape = pd.read_csv("../src/data/iris.csv", header=None)
shape.sample(3)

In [None]:
# drop the label column in the training set
shape.drop(shape.columns[[0]], axis=1, inplace=True)
shape.sample(3)

In [None]:
import itertools

a = [50 * i for i in range(3)]
b = [40 + i for i in range(10)]
indices = [i + j for i, j in itertools.product(a, b)]

test_data = shape.iloc[indices[:-1]]

In [None]:
if not TRAIN:
    endpoint_name = 'sagemaker-deploy-terraform-2023-09-06-19-19-03-606'
    predictor = sage.predictor.Predictor(endpoint_name)

In [None]:
print(predictor.predict(test_data.to_csv(sep=",", header=False, index=False)))#.decode("utf-8")
#test_data.values

In [None]:
import boto3
import json
# Create a SageMaker runtime client
sagemaker_runtime = boto3.client('sagemaker-runtime')

In [None]:
# Specify the endpoint name and content type
content_type = 'text/csv'  # Adjust this based on your model's input format

# Prepare your input data (modify to match your model's input format)
input_data = {"data": test_data.to_csv(sep=",", header=False, index=False)}

In [None]:
# Send the prediction request
response = sagemaker_runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType=content_type,
    Body=json.dumps(input_data)
)

---

### Clean up

In [None]:
#sess.delete_endpoint(predictor.endpoint)