# Step 1 - Training del modelo

## Seteos iniciales

Logueamos sólo los mensajes de warning y error

In [2]:
import logging

logging.getLogger("sagemaker.config").setLevel(logging.WARNING)
logging.getLogger("sagemaker.experiments.run").setLevel(logging.WARNING)

Mostramos las versiones de las librerías de Python importantes para el proyecto

In [4]:
import awscli
import boto3
import numpy
import pandas
import sagemaker

print("sagemaker\t", sagemaker.__version__)
print("pandas\t\t", pandas.__version__)
print("numpy\t\t", numpy.__version__)
print("boto3\t\t", boto3.__version__)
print("awscli\t\t", awscli.__version__)

sagemaker	 2.215.0
pandas		 2.2.2
numpy		 1.26.4
boto3		 1.34.84
awscli		 1.32.84


In [7]:
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sage_session = sagemaker.Session()
bucket_name = sage_session.default_bucket()
prefix = "australia-rain"

print("Region:", region)
print("Rol:", role)
print("Info S3:")
print(f"- Bucket: {bucket_name}")
print(f"- Prefix: {prefix}")

Region: us-east-1
Rol: arn:aws:iam::335415446667:role/service-role/AmazonSageMakerExecutionRole-sagemaker-stack
Info S3:
- Bucket: sagemaker-us-east-1-335415446667
- Prefix: train_val


In [None]:
%store -r experiment_name
print(experiment_name)

no stored variable or alias experiment_name


NameError: name 'experiment_name' is not defined

## Training

In [None]:
from sagemaker.xgboost import XGBoost

hyperparameters = {
    "max_depth": "3",
    "eta": "0.1",
    "gamma": "0",
    "min_child_weight": "1",
    "silent": "0",
    "objective": "binary:logistic",
    "num_round": "10",
    "eval_metric": "auc"
}

base_job_name = f"{prefix}-sm-xgb"
entry_point   = "training.py"
source_dir    = "source_dir/"
output_path   = f"s3://{bucket_name}/{prefix}/output/"
code_location = f"s3://{bucket_name}/{prefix}/code"

estimator = XGBoost(
    base_job_name=base_job_name,
    entry_point=entry_point,
    source_dir=source_dir,
    output_path=output_path,
    code_location=code_location,
    hyperparameters=hyperparameters,
    instance_type="ml.m5.xlarge",
    instance_count=1,
    framework_version="0.90-2",
    py_version="py3",
    role=role,
)

In [None]:
from sagemaker.experiments.run import Run
import time

time_str = time.strftime("%H-%M-%S", time.localtime())

run_name         = f"training-{time_str}"
run_display_name = "xgboost-training"

train_config = sagemaker.TrainingInput(
    f"s3://{bucket_name}/{prefix}/data/preprocessed/train/",
    content_type="text/csv",
)
val_config = sagemaker.TrainingInput(
    f"s3://{bucket_name}/{prefix}/data/preprocessed/val/",
    content_type="text/csv",
)

with Run(
    experiment_name=experiment_name,
    run_name=run_name,
    run_display_name=run_display_name,
    sagemaker_session=sagemaker_session,
) as run:
    estimator.fit(
        {
            'train': train_config,
            'validation': val_config,
        }
    )

### Tuneo de hipérparametros

In [None]:
# TODO

## Experimentos corridos

In [None]:
from sagemaker.analytics import ExperimentAnalytics

analytics = ExperimentAnalytics(experiment_name=experiment_name)
analytics.dataframe()