In [3]:
!pip install --upgrade sagemaker

Collecting sagemaker
  Downloading sagemaker-2.247.1-py3-none-any.whl.metadata (17 kB)
Downloading sagemaker-2.247.1-py3-none-any.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m50.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sagemaker
  Attempting uninstall: sagemaker
    Found existing installation: sagemaker 2.247.0
    Uninstalling sagemaker-2.247.0:
      Successfully uninstalled sagemaker-2.247.0
Successfully installed sagemaker-2.247.1


In [4]:
import sagemaker
import boto3
from sagemaker.estimator import Estimator
from sagemaker.model import Model

  from pandas.core.computation.check import NUMEXPR_INSTALLED


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


# Training with own container

In [5]:
# --- 1. Inicialización ---
# Obtener la sesión de SageMaker, el rol de ejecución y la región actual
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

In [6]:
bucket_name = 'machine-learning-serviciosnutresa-featurestore-lab' 

In [7]:
s3_input_train_path = f's3://{bucket_name}/data/tests/titanic/train/'

In [8]:
# El nombre de la imagen que creaste en el Paso 1
image_name = 'byco2-sklearn-1-5' 

In [9]:
# Construir la URI completa de tu imagen en ECR
account_id = boto3.client('sts').get_caller_identity().get('Account')
image_uri = f'{account_id}.dkr.ecr.{region}.amazonaws.com/{image_name}:latest'

In [10]:
source_dir = './src/'
entry_point = 'app.py'
print(f"Usando la imagen de ECR: {image_uri}")
print(f"Leyendo datos de: {s3_input_train_path}")
print(f"Usando el script: {source_dir}{entry_point}")

Usando la imagen de ECR: 544644514035.dkr.ecr.us-east-1.amazonaws.com/byco2-sklearn-1-5:latest
Leyendo datos de: s3://machine-learning-serviciosnutresa-featurestore-lab/data/tests/titanic/train/
Usando el script: ./src/app.py


In [11]:
estimator = Estimator(
    image_uri=image_uri,          # Tu contenedor personalizado
    role=role,                    # El rol de IAM para que SageMaker tenga permisos
    entry_point=entry_point,      # Tu script de entrenamiento
    source_dir=source_dir,        # La carpeta que contiene tu script
    instance_count=1,             # Número de máquinas para el entrenamiento
    instance_type='local',#'ml.m5.large',  # Tipo de máquina a usar
    hyperparameters={             # Hiperparámetros que se pasan a tu script
        'n-estimators': 250,
        'random-state': 42
    },
    #sagemaker_session=sagemaker_session
)

In [12]:
print("Iniciando el trabajo de entrenamiento...")
estimator.fit({'training': s3_input_train_path})
print("Trabajo de entrenamiento completado.")

Iniciando el trabajo de entrenamiento...


INFO:sagemaker:Creating training-job with name: byco2-sklearn-1-5-2025-07-11-15-31-35-770
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.local.image:'Docker Compose' is not installed. Proceeding to check for 'docker-compose' CLI.
INFO:sagemaker.local.image:'Docker Compose' found using Docker Compose CLI.
INFO:sagemaker.local.local_session:Starting training job
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:sagemaker.local.image:No AWS credentials found in session but credentials from EC2 Metadata Service are available.
INFO:sagemaker.local.image

Login Succeeded


INFO:sagemaker.local.image:image pulled: 544644514035.dkr.ecr.us-east-1.amazonaws.com/byco2-sklearn-1-5:latest
INFO:sagemaker.local.image:docker command: docker-compose -f /tmp/tmp20o1s8s1/docker-compose.yaml up --build --abort-on-container-exit


 Container 2uufkvci4k-algo-1-jug04  Creating
 Container 2uufkvci4k-algo-1-jug04  Created
Attaching to 2uufkvci4k-algo-1-jug04
2uufkvci4k-algo-1-jug04  |   import pkg_resources
2uufkvci4k-algo-1-jug04  | 2025-07-11 15:32:05,418 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
2uufkvci4k-algo-1-jug04  | 2025-07-11 15:32:05,430 sagemaker-containers INFO     Invoking user script
2uufkvci4k-algo-1-jug04  | 
2uufkvci4k-algo-1-jug04  | Training Env:
2uufkvci4k-algo-1-jug04  | 
2uufkvci4k-algo-1-jug04  | {
2uufkvci4k-algo-1-jug04  |     "additional_framework_parameters": {},
2uufkvci4k-algo-1-jug04  |     "channel_input_dirs": {
2uufkvci4k-algo-1-jug04  |         "training": "/opt/ml/input/data/training"
2uufkvci4k-algo-1-jug04  |     },
2uufkvci4k-algo-1-jug04  |     "current_host": "algo-1-jug04",
2uufkvci4k-algo-1-jug04  |     "framework_module": null,
2uufkvci4k-algo-1-jug04  |     "hosts": [
2uufkvci4k-algo-1-jug04  |         "algo-1-jug04"
2uufkvci4k-algo-1-ju

INFO:sagemaker.local.image:===== Job Complete =====


Trabajo de entrenamiento completado.


In [13]:
estimator

<sagemaker.estimator.Estimator at 0x7fc7f5a43c70>

In [14]:
# Muestra la ubicación del modelo entrenado en S3
print(estimator.model_data)

s3://sagemaker-us-east-1-544644514035/byco2-sklearn-1-5-2025-07-11-15-31-35-770/output/model.tar.gz


# Inference

## Inference desde el Estimator

In [43]:
# --- Paso de Inferencia: Desplegar el modelo ---

#print("Iniciando el despliegue del modelo en un endpoint...")

# Usamos el objeto 'estimator' para desplegar el modelo.
# SageMaker se encarga de crear el Modelo, la Configuración del Endpoint y el Endpoint.
# Usamos una instancia pequeña y económica para esta prueba.
predictor = estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.medium' 
)

#print("\n¡Despliegue completado!")
#print(f"El nombre de tu endpoint es: {predictor.endpoint_name}")

INFO:sagemaker:Creating model with name: byco2-sklearn-1-5-2025-07-11-18-37-28-615
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating endpoint-config with name byco2-sklearn-1-5-2025-07-11-18-37-28-615
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-

Attaching to 69cpfgikeq-algo-1-z97p4
69cpfgikeq-algo-1-z97p4  | --- Verificando contenido de /opt/ml/code ---
69cpfgikeq-algo-1-z97p4  | /opt/ml/code:
69cpfgikeq-algo-1-z97p4  | total 0
69cpfgikeq-algo-1-z97p4  | --- Fin de la verificación ---
69cpfgikeq-algo-1-z97p4  | --- Verificando contenido de /opt/ml/input ---
69cpfgikeq-algo-1-z97p4  | ls: cannot access '/opt/ml/input': No such file or directory
69cpfgikeq-algo-1-z97p4  | --- Fin de la verificación ---
69cpfgikeq-algo-1-z97p4  | Creando directorio de código en /opt/ml/code
69cpfgikeq-algo-1-z97p4  | Descargando scripts de s3://machine-learning-serviciosnutresa-modelos-lab/titanic/artifacts/code/ a /opt/ml/code
download: s3://machine-learning-serviciosnutresa-modelos-lab/titanic/artifacts/code/app.py to ./app.py
download: s3://machine-learning-serviciosnutresa-modelos-lab/titanic/artifacts/code/wsgi.py to ./wsgi.py
69cpfgikeq-algo-1-z97p4  | Contenido del directorio después de la descarga:
69cpfgikeq-algo-1-z97p4  | /opt/ml/code:

INFO:sagemaker.local.entities:Checking if serving container is up, attempt: 10


!

In [44]:
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer

# Configura cómo enviar los datos (en formato CSV)
predictor.serializer = CSVSerializer()
# Configura cómo recibir los datos (en formato JSON)
predictor.deserializer = JSONDeserializer()

# Prepara tus datos de prueba en el formato que tu función `input_fn` espera
# Por ejemplo, una fila en formato CSV para el modelo del Titanic:
# Pclass, Sex, Age, SibSp, Parch, Fare
test_data = "3,male,22.0,1,0,7.25"

# Llama al endpoint para obtener una predicción
try:
    prediction = predictor.predict(test_data)
    print("Predicción recibida:", prediction)

except Exception as e:
    print(f"Error al invocar el endpoint: {e}")

69cpfgikeq-algo-1-z97p4  | Inferencing: Received request with Content-Type: text/csv
69cpfgikeq-algo-1-z97p4  | Inferencing: Performing prediction.
69cpfgikeq-algo-1-z97p4  | Inferencing: Serializing prediction for Accept type: application/json
69cpfgikeq-algo-1-z97p4  | --- DEBUG: Iniciando output_fn ---
69cpfgikeq-algo-1-z97p4  | Valor de la variable 'accept': application/json
69cpfgikeq-algo-1-z97p4  | Tipo de la variable 'accept': <class 'werkzeug.datastructures.MIMEAccept'>
69cpfgikeq-algo-1-z97p4  | Contenido como string: application/json
69cpfgikeq-algo-1-z97p4  | --- FIN DEBUG ---
Predicción recibida: {'predictions': [0]}


In [17]:
# Configura cómo enviar los datos (en formato CSV)
predictor.serializer = CSVSerializer()
# Configura cómo recibir los datos (en formato JSON)
predictor.deserializer = JSONDeserializer()

# Prepara tus datos de prueba en el formato que tu función `input_fn` espera
# Por ejemplo, una fila en formato CSV para el modelo del Titanic:
# Pclass, Sex, Age, SibSp, Parch, Fare
test_data = "1,female,38.0,1,0,71.28"

# Llama al endpoint para obtener una predicción
try:
    prediction = predictor.predict(test_data)
    print("Predicción recibida:", prediction)

except Exception as e:
    print(f"Error al invocar el endpoint: {e}")

dkixnt4ta0-algo-1-4imrs  | Inferencing: Received request with Content-Type: text/csv
dkixnt4ta0-algo-1-4imrs  | Inferencing: Performing prediction.
dkixnt4ta0-algo-1-4imrs  | Inferencing: Serializing prediction for Accept type: application/json
Predicción recibida: {'predictions': [1]}


In [18]:
# Una vez que termines de hacer pruebas, elimina el endpoint
predictor.delete_endpoint()

print("Endpoint eliminado exitosamente.")

INFO:sagemaker:Deleting endpoint configuration with name: byco2-sklearn-1-5-2025-07-11-15-32-09-832
INFO:sagemaker:Deleting endpoint with name: byco2-sklearn-1-5-2025-07-11-15-32-09-832


Endpoint eliminado exitosamente.


## Inference desde Model

In [19]:
model = Model(
    model_data=estimator.model_data,
    image_uri=estimator.image_uri,
    role=role,
    source_dir='./src/',
    entry_point='app.py' # <- APUNTAS AL MISMO SCRIPT
)

In [20]:
print("Iniciando el despliegue del modelo en un endpoint...")

# Usamos el objeto 'estimator' para desplegar el modelo.
# SageMaker se encarga de crear el Modelo, la Configuración del Endpoint y el Endpoint.
# Usamos una instancia pequeña y económica para esta prueba.
predictor = model.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.medium' 
)


Iniciando el despliegue del modelo en un endpoint...


INFO:sagemaker:Repacking model artifact (s3://sagemaker-us-east-1-544644514035/byco2-sklearn-1-5-2025-07-11-15-31-35-770/output/model.tar.gz), script artifact (./src/), and dependencies ([]) into single tar.gz file located at s3://sagemaker-us-east-1-544644514035/byco2-sklearn-1-5-2025-07-11-15-32-45-948/model.tar.gz. This may take some time depending on model size...
INFO:sagemaker:Creating model with name: byco2-sklearn-1-5-2025-07-11-15-32-47-469
INFO:sagemaker:Creating endpoint-config with name byco2-sklearn-1-5-2025-07-11-15-32-48-096
INFO:sagemaker:Creating endpoint with name byco2-sklearn-1-5-2025-07-11-15-32-48-096


----!

In [21]:
print(predictor)

None


In [25]:
from sagemaker.predictor import Predictor
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer

# 1. Pega aquí el nombre del endpoint que ya está creado
endpoint_name = "byco2-sklearn-1-5-2025-07-11-15-32-48-096"

# 2. Crea el objeto predictor "conectándote" al endpoint existente
print(f"Conectando al endpoint: {endpoint_name}")
predictor = Predictor(
    endpoint_name=endpoint_name
)
print("Conexión exitosa.")

# 3. Ahora puedes configurar el predictor y usarlo normalmente
predictor.serializer = CSVSerializer()
predictor.deserializer = JSONDeserializer()

# 4. Haz tu predicción
test_data = "1,female,38.0,1,0,71.28"
try:
    prediction = predictor.predict(test_data)
    print("Predicción recibida:", prediction)
except Exception as e:
    print(f"Error al invocar el endpoint: {e}")

# 5. No olvides borrar el endpoint cuando termines
# predictor.delete_endpoint()

Conectando al endpoint: byco2-sklearn-1-5-2025-07-11-15-32-48-096
Conexión exitosa.
Predicción recibida: {'predictions': [1]}


In [26]:
# 4. Haz tu predicción
test_data = "3,male,22.0,1,0,7.25"
try:
    prediction = predictor.predict(test_data)
    print("Predicción recibida:", prediction)
except Exception as e:
    print(f"Error al invocar el endpoint: {e}")


Predicción recibida: {'predictions': [0]}


In [27]:
# Una vez que termines de hacer pruebas, elimina el endpoint
predictor.delete_endpoint()

print("Endpoint eliminado exitosamente.")

INFO:sagemaker:Deleting endpoint configuration with name: byco2-sklearn-1-5-2025-07-11-15-32-48-096
INFO:sagemaker:Deleting endpoint with name: byco2-sklearn-1-5-2025-07-11-15-32-48-096


Endpoint eliminado exitosamente.


# Batch Inference

In [28]:
s3_input_path = "s3://machine-learning-serviciosnutresa-modelos-lab/titanic/data/test/test.csv"

In [45]:
# Define dónde quieres que se guarden las predicciones en S3
s3_output_path = f"s3://machine-learning-serviciosnutresa-modelos-lab/titanic/data/output/"
# 1. Crea el objeto transformador desde tu estimador ya entrenado
transformer = estimator.transformer(
    instance_count=1,
    instance_type='ml.m5.large',
    output_path=s3_output_path,
    accept='application/json'  # <-- AÑADE ESTA LÍNEA
)

# 3. Lanza el trabajo de inferencia por Lotes
#    (Esta parte no cambia)
response = transformer.transform(
    data="s3://machine-learning-serviciosnutresa-modelos-lab/titanic/data/test/test.csv",
    content_type='text/csv',
    wait=True # Puedes usar wait=True para que el notebook espere a que termine
)

INFO:sagemaker:Creating model with name: byco2-sklearn-1-5-2025-07-11-18-39-03-715
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating transform job with name: byco2-sklearn-1-5-2025-07-11-18-39-03-806
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-w

Attaching to 6zhdmzfjwh-algo-1-y95j7
69cpfgikeq-algo-1-z97p4  | Inferencing: Received request with Content-Type: text/csv
69cpfgikeq-algo-1-z97p4  | Inferencing: Performing prediction.
69cpfgikeq-algo-1-z97p4  | Inferencing: Serializing prediction for Accept type: application/json
69cpfgikeq-algo-1-z97p4  | --- DEBUG: Iniciando output_fn ---
69cpfgikeq-algo-1-z97p4  | Valor de la variable 'accept': application/json
69cpfgikeq-algo-1-z97p4  | Tipo de la variable 'accept': <class 'werkzeug.datastructures.MIMEAccept'>
69cpfgikeq-algo-1-z97p4  | Contenido como string: application/json
69cpfgikeq-algo-1-z97p4  | --- FIN DEBUG ---
.