# Step 2 - Deploy del modelo

## Seteos iniciales

Logueamos sólo los mensajes de warning y error

In [1]:
import logging

logging.getLogger("sagemaker.config").setLevel(logging.WARNING)
logging.getLogger("sagemaker.experiments.run").setLevel(logging.WARNING)

Mostramos las versiones de las librerías de Python importantes para el proyecto

In [2]:
import awscli
import boto3
import numpy
import pandas
import sagemaker

print("sagemaker\t", sagemaker.__version__)
print("pandas\t\t", pandas.__version__)
print("numpy\t\t", numpy.__version__)
print("boto3\t\t", boto3.__version__)
print("awscli\t\t", awscli.__version__)

sagemaker	 2.215.0
pandas		 2.2.2
numpy		 1.26.4
boto3		 1.34.84
awscli		 1.32.84


In [None]:
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sage_session = sagemaker.Session()
bucket_name = sage_session.default_bucket()
prefix = "australia-rain"
project_fd = f"s3://{bucket_name}/{prefix}"

print("Region:", region)
print("Rol:", role)
print("Info S3:")
print(f"- Bucket: {bucket_name}")
print(f"- Prefix: {prefix}")

Region: us-east-1
Rol: arn:aws:iam::335415446667:role/service-role/AmazonSageMakerExecutionRole-sagemaker-stack
Info S3:
- Bucket: sagemaker-us-east-1-335415446667
- Prefix: train_val


## Modelo

In [None]:
from notebook_utilities import get_latest_training_job_name, get_training_job_s3_model_artifacts

train_base_job_name   = f"{prefix}-sm-xgb"
latest_train_job_name = get_latest_training_job_name(train_base_job_name)
model_path            = get_training_job_s3_model_artifacts(latest_train_job_name)

print(f"Model path: {model_path}")

In [None]:
import time
from sagemaker.xgboost import XGBoostModel

code_location = f"{project_fd}/code"
xgboost_model = XGBoostModel(
    name=f"{train_base_job_name}-model-{int(time.time())}",
    model_data=model_path,
    entry_point="inference.py",
    source_dir="xgboost_source_dir/",
    code_location=code_location,
    framework_version="0.90-2",
    py_version="py3",
    role=role, 
    sagemaker_session=sage_session,
)

## Deploy del modelo

In [None]:
import time

from sagemaker.model_monitor import DataCaptureConfig

s3_capture_upload_path = f"{project_fd}/monitoring/datacapture"
print(f"The endpoint will upload captured data to {s3_capture_upload_path}")

endpoint_name = f"{prefix}-sm-endpoint-{int(time.time())}"

print(f"\n*** Endpoint Name ***\n\n{endpoint_name}")

xgboost_model.deploy(
    initial_instance_count=1, 
    instance_type="ml.m5.xlarge", 
    endpoint_name=endpoint_name,
    data_capture_config=DataCaptureConfig(
        enable_capture=True,
        sampling_percentage=100,
        destination_s3_uri=s3_capture_upload_path,
    ),
)

## Inferencia

In [None]:
from sagemaker.deserializers import CSVDeserializer
from sagemaker.predictor import Predictor
from sagemaker.serializers import CSVSerializer

predictor = Predictor(
    endpoint_name=endpoint_name,
    sagemaker_session=sage_session,
    serializer=CSVSerializer(),
    deserializer=CSVDeserializer(),
)

In [None]:
# Usamos las filas 0, 1, 8 & 9 de la data de train_val (F, F, T, T)
examples = {
    "MinTemp":           [ 1.211401,     -0.153948,      0.280482,      -1.736512],
    "MaxTemp":           [ 1.152702,     -0.720034,     -0.622205,      -0.133057],
    "Rainfall":          [-0.229017,     -0.277102,     -0.108804,      -0.277102],
    "WindGustSpeed":     [-0.080519,     -0.895915,      0.734877,       0.067735],
    "WindSpeed9am":      [ 0.309775,     -0.837656,      0.080289,      -0.608170],
    "WindSpeed3pm":      [-1.141891,     -0.909887,      0.830141,      -0.445880],
    "Humidity9am":       [ 0.028559,     -0.544163,      1.121937,       0.080625],
    "Humidity3pm":       [ 0.192704,      0.528628,      0.672595,      -1.103001],
    "Pressure9am":       [-1.371232,      0.800877,     -0.214654,      -0.398014],
    "Pressure3pm":       [-1.383499,      0.781688,     -0.215437,      -0.941915],
    "Temp9am":           [ 1.247170,     -0.416770,     -0.355708,      -0.676284],
    "Temp3pm":           [ 1.162131,     -0.642591,     -0.513682,      -0.098310],
    "RainToday":         [-0.531932,     -0.531932,      1.879924,      -0.531932],
    "WindGustDir_east":  [ 1.324919,     -1.270054,     -0.510004,       1.324919],
    "WindGustDir_north": [-0.474859,      0.618431,     -1.247932,       0.618431],
    "WindDir9am_east":   [ 1.405556,     -1.060554,     -1.373707,       1.295592],
    "WindDir9am_north":  [-0.013734,     -0.994312,     -0.544419,       0.516952],
    "WindDir3pm_east":   [ 1.334753,      1.029443,     -0.5054534,      1.334753],
    "WindDir3pm_north":  [ 0.605610,     -0.945701,     -1.254276,       0.6056103],
}
examples = pandas.DataFrame.from_dict(examples)
examples

Unnamed: 0,MinTemp,MaxTemp,Rainfall,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Temp9am,Temp3pm,RainToday,WindGustDir_east,WindGustDir_north,WindDir9am_east,WindDir9am_north,WindDir3pm_east,WindDir3pm_north
0,1.211401,1.152702,-0.229017,-0.080519,0.309775,-1.141891,0.028559,0.192704,-1.371232,-1.383499,1.24717,1.162131,-0.531932,1.324919,-0.474859,1.405556,-0.013734,1.334753,0.60561
1,-0.153948,-0.720034,-0.277102,-0.895915,-0.837656,-0.909887,-0.544163,0.528628,0.800877,0.781688,-0.41677,-0.642591,-0.531932,-1.270054,0.618431,-1.060554,-0.994312,1.029443,-0.945701
2,0.280482,-0.622205,-0.108804,0.734877,0.080289,0.830141,1.121937,0.672595,-0.214654,-0.215437,-0.355708,-0.513682,1.879924,-0.510004,-1.247932,-1.373707,-0.544419,-0.505453,-1.254276
3,-1.736512,-0.133057,-0.277102,0.067735,-0.60817,-0.44588,0.080625,-1.103001,-0.398014,-0.941915,-0.676284,-0.09831,-0.531932,1.324919,0.618431,1.295592,0.516952,1.334753,0.60561


In [None]:

for _, row in examples.iterrows():
    payload = ",".join([str(v) for v in row])
    print(payload)
    print("Prediction:", predictor.predict(payload))
    print(10 * "-")

### Data capturada

In [None]:
import time
time.sleep(120)  # dejamos un delay de 2 minutos para que S3 pueda recibir la data

In [None]:
s3_client = boto3.Session().client("s3")
current_endpoint_capture_prefix = f"{prefix}/monitoring/datacapture/{endpoint_name}"

result = s3_client.list_objects(Bucket=bucket_name, Prefix=current_endpoint_capture_prefix)
capture_files = [
    f"s3://{bucket_name}/{capture_file.get('Key')}"
    for capture_file in result.get('Contents')
]

print("Capture Files: ")
print("\n ".join(capture_files))

In [None]:
!aws s3 cp {capture_files[0]} datacapture/captured_data_example.jsonl
!head datacapture/captured_data_example.jsonl

In [None]:
import json
with open("datacapture/captured_data_example.jsonl", "r") as f:
    data = f.read()

print(json.dumps(json.loads(data.split("\n")[0]), indent=2))