# Step 1 - Training del modelo

## Seteos iniciales

Logueamos sólo los mensajes de warning y error

In [2]:
import logging

logging.getLogger("sagemaker.config").setLevel(logging.WARNING)
logging.getLogger("sagemaker.experiments.run").setLevel(logging.WARNING)

Mostramos las versiones de las librerías de Python importantes para el proyecto

In [3]:
import awscli
import boto3
import numpy
import pandas
import sagemaker

print("sagemaker\t", sagemaker.__version__)
print("pandas\t\t", pandas.__version__)
print("numpy\t\t", numpy.__version__)
print("boto3\t\t", boto3.__version__)
print("awscli\t\t", awscli.__version__)

sagemaker	 2.215.0
pandas		 2.2.2
numpy		 1.26.4
boto3		 1.34.84
awscli		 1.32.84


In [10]:
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sage_session = sagemaker.Session()
bucket_name = sage_session.default_bucket()
prefix = "australia-rain-processed"

print("Region:", region)
print("Rol:", role)
print("Info S3:")
print(f"- Bucket: {bucket_name}")
print(f"- Prefix: {prefix}")

Region: us-east-1
Rol: arn:aws:iam::335415446667:role/service-role/AmazonSageMakerExecutionRole-sagemaker-stack
Info S3:
- Bucket: sagemaker-us-east-1-335415446667
- Prefix: australia-rain-processed


Genero un experiment_name que puede guardarse y recuperarse con las funciones especiales de notebook

In [6]:
import time
experiment_name = f"australia-rain-exp-{time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())}"
print(f"Experiment name: {experiment_name}")

Experiment name: australia-rain-exp-2024-11-18-18-39-24


In [7]:
%store experiment_name

Stored 'experiment_name' (str)


## Training

In [20]:
from sagemaker.xgboost import XGBoost

hyperparameters = {
    "max_depth": "3",
    "eta": "0.1",
    "gamma": "0",
    "min_child_weight": "1",
    "silent": "0",
    "objective": "binary:logistic",
    "num_round": "10",
    "eval_metric": "auc"
}

base_job_name = f"{prefix}-sm-xgb"
entry_point   = "training.py"
source_dir    = "scripts/training/"
output_path   = f"s3://{bucket_name}/{prefix}/output/"
code_location = f"s3://{bucket_name}/{prefix}/code"

estimator = XGBoost(
    base_job_name=base_job_name,
    entry_point=entry_point,
    source_dir=source_dir,
    output_path=output_path,
    code_location=code_location,
    hyperparameters=hyperparameters,
    instance_type="ml.m5.xlarge",
    instance_count=1,
    framework_version="0.90-2",
    py_version="py3",
    role=role,
)

In [21]:
from sagemaker.experiments.run import Run
import time

time_str = time.strftime("%H-%M-%S", time.localtime())

run_name            = f"training-{time_str}"
run_display_name    = "xgboost-training"
databrew_train_fd   = "australia-rain-train-dbjob_18Nov2024_1731954734584"
databrew_val_fd     = "australia-rain-val-dbjob_18Nov2024_1731954770752"

train_config = sagemaker.TrainingInput(
    f"s3://{bucket_name}/{prefix}/{databrew_train_fd}/",
    content_type="text/csv",
)
val_config = sagemaker.TrainingInput(
    f"s3://{bucket_name}/{prefix}/{databrew_val_fd}/",
    content_type="text/csv",
)

print("Run name:", run_name)

Run name: training-19-29-11


In [22]:
with Run(
    experiment_name=experiment_name,
    run_name=run_name,
    run_display_name=run_display_name,
    sagemaker_session=sage_session,
) as run:
    estimator.fit(
        {
            "train": train_config,
            "validation": val_config,
        }
    )

INFO:sagemaker:Creating training-job with name: australia-rain-processed-sm-xgb-2024-11-18-19-29-13-266


2024-11-18 19:29:13 Starting - Starting the training job...
2024-11-18 19:29:30 Starting - Preparing the instances for training...
2024-11-18 19:29:51 Downloading - Downloading input data...
2024-11-18 19:30:16 Downloading - Downloading the training image...
2024-11-18 19:31:08 Training - Training image download completed. Training in progress.
2024-11-18 19:31:08 Uploading - Uploading generated training model[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Invoking user training script.[0m
[34mINFO:sagemaker-containers:Module training does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34mINFO:sagemaker-containers:Generating setup.cfg[0m
[34mINFO:sagemaker-containers:Generating MANIFEST.in[0m
[34mINFO:sagemaker-containers:Installing module with the following command:[0m
[34m/miniconda3/bin/python

### Tuneo de hipérparametros

In [56]:
from sagemaker.parameter import ContinuousParameter, IntegerParameter
from sagemaker.tuner import HyperparameterTuner

hyperparameter_ranges = {
    "max_depth": IntegerParameter(3, 10),        # base: "3"
    "eta": ContinuousParameter(0.01, 0.30),      # base: "0.1"
    "gamma": ContinuousParameter(0.00, 0.50),    # base: "0"
    "min_child_weight": IntegerParameter(1, 6),  # base: "1"
}
hyperparameters_to_keep_static = {
    "silent": "0",
    "objective": "binary:logistic",
    "num_round": "10",
    "eval_metric": "auc"
}

In [57]:
tuner = HyperparameterTuner(
    estimator=estimator,
    objective_metric_name="validation:auc",
    hyperparameter_ranges=hyperparameter_ranges,
    hyperparameters_to_keep_static=hyperparameters_to_keep_static,
    max_jobs=20,
    max_parallel_jobs=3,
    autotune=True,
)

In [58]:
from sagemaker.experiments.run import Run
import time

time_str = time.strftime("%H-%M-%S", time.localtime())

run_name         = f"hpo-{time_str}"
run_display_name = "xgboost-hpo"

print("Run name:", run_name)

Run name: hpo-20-43-32


In [59]:
with Run(
    experiment_name=experiment_name,
    run_name=run_name,
    run_display_name=run_display_name,
    sagemaker_session=sage_session,
) as run:
    tuner.fit(
        {
            "train": train_config,
            "validation": val_config,
        }
    )

INFO:sagemaker:Creating hyperparameter tuning job with name: sagemaker-xgboost-241118-2043


................................................................................................!


## Experimentos corridos

In [62]:
from sagemaker.analytics import ExperimentAnalytics

analytics = ExperimentAnalytics(experiment_name=experiment_name)
df_analytics = analytics.dataframe()

df_analytics

Unnamed: 0,TrialComponentName,DisplayName,Trials,Experiments,SourceArn,SageMaker.ImageUri,SageMaker.InstanceCount,SageMaker.InstanceType,SageMaker.VolumeSizeInGB,eta,...,train - MediaType,train - Value,validation - MediaType,validation - Value,SageMaker.DebugHookOutput - MediaType,SageMaker.DebugHookOutput - Value,SageMaker.ModelArtifact - MediaType,SageMaker.ModelArtifact - Value,val - MediaType,val - Value
0,australia-rain-exp-2024-11-18-18-39-24-hpo-20-...,xgboost-hpo,[Default-Run-Group-australia-rain-exp-2024-11-...,[australia-rain-exp-2024-11-18-18-39-24],,,,,,,...,,,,,,,,,,
1,australia-rain-exp-2024-11-18-18-39-24-hpo-20-...,xgboost-hpo,[Default-Run-Group-australia-rain-exp-2024-11-...,[australia-rain-exp-2024-11-18-18-39-24],,,,,,,...,,,,,,,,,,
2,australia-rain-exp-2024-11-18-18-39-24-hpo-20-...,xgboost-hpo,[Default-Run-Group-australia-rain-exp-2024-11-...,[australia-rain-exp-2024-11-18-18-39-24],,,,,,,...,,,,,,,,,,
3,australia-rain-exp-2024-11-18-18-39-24-hpo-20-...,xgboost-hpo,[Default-Run-Group-australia-rain-exp-2024-11-...,[australia-rain-exp-2024-11-18-18-39-24],,,,,,,...,,,,,,,,,,
4,australia-rain-exp-2024-11-18-18-39-24-hpo-20-...,xgboost-hpo,[Default-Run-Group-australia-rain-exp-2024-11-...,[australia-rain-exp-2024-11-18-18-39-24],,,,,,,...,,,,,,,,,,
5,australia-rain-exp-2024-11-18-18-39-24-hpo-20-...,xgboost-hpo,[Default-Run-Group-australia-rain-exp-2024-11-...,[australia-rain-exp-2024-11-18-18-39-24],,,,,,,...,,,,,,,,,,
6,australia-rain-exp-2024-11-18-18-39-24-hpo-20-...,xgboost-hpo,[Default-Run-Group-australia-rain-exp-2024-11-...,[australia-rain-exp-2024-11-18-18-39-24],,,,,,,...,,,,,,,,,,
7,australia-rain-exp-2024-11-18-18-39-24-hpo-19-...,xgboost-hpo,[Default-Run-Group-australia-rain-exp-2024-11-...,[australia-rain-exp-2024-11-18-18-39-24],,,,,,,...,,,,,,,,,,
8,australia-rain-exp-2024-11-18-18-39-24-hpo-19-...,xgboost-hpo,[Default-Run-Group-australia-rain-exp-2024-11-...,[australia-rain-exp-2024-11-18-18-39-24],,,,,,,...,,,,,,,,,,
9,australia-rain-exp-2024-11-18-18-39-24-trainin...,xgboost-training,[Default-Run-Group-australia-rain-exp-2024-11-...,[australia-rain-exp-2024-11-18-18-39-24],,,,,,,...,,,,,,,,,,


In [63]:
df_analytics.dtypes

TrialComponentName                        object
DisplayName                               object
Trials                                    object
Experiments                               object
SourceArn                                 object
SageMaker.ImageUri                        object
SageMaker.InstanceCount                  float64
SageMaker.InstanceType                    object
SageMaker.VolumeSizeInGB                 float64
eta                                       object
eval_metric                               object
gamma                                     object
max_depth                                 object
min_child_weight                          object
num_round                                 object
objective                                 object
sagemaker_container_log_level            float64
sagemaker_job_name                        object
sagemaker_program                         object
sagemaker_region                          object
sagemaker_submit_dir

In [67]:
df_analytics.iloc[0]

TrialComponentName                       australia-rain-exp-2024-11-18-18-39-24-hpo-20-...
DisplayName                                                                    xgboost-hpo
Trials                                   [Default-Run-Group-australia-rain-exp-2024-11-...
Experiments                                       [australia-rain-exp-2024-11-18-18-39-24]
SourceArn                                                                              NaN
SageMaker.ImageUri                                                                     NaN
SageMaker.InstanceCount                                                                NaN
SageMaker.InstanceType                                                                 NaN
SageMaker.VolumeSizeInGB                                                               NaN
eta                                                                                    NaN
eval_metric                                                                            NaN

In [66]:
df_analytics[["TrialComponentName", "Experiments", "DisplayName"] + [c for c in df_analytics if c.startswith(("train:auc", "val"))]]

Unnamed: 0,TrialComponentName,Experiments,DisplayName,train:auc - Min,train:auc - Max,train:auc - Avg,train:auc - StdDev,train:auc - Last,train:auc - Count,validation - MediaType,validation - Value,val - MediaType,val - Value
0,australia-rain-exp-2024-11-18-18-39-24-hpo-20-...,[australia-rain-exp-2024-11-18-18-39-24],xgboost-hpo,,,,,,,,,,
1,australia-rain-exp-2024-11-18-18-39-24-hpo-20-...,[australia-rain-exp-2024-11-18-18-39-24],xgboost-hpo,,,,,,,,,,
2,australia-rain-exp-2024-11-18-18-39-24-hpo-20-...,[australia-rain-exp-2024-11-18-18-39-24],xgboost-hpo,,,,,,,,,,
3,australia-rain-exp-2024-11-18-18-39-24-hpo-20-...,[australia-rain-exp-2024-11-18-18-39-24],xgboost-hpo,,,,,,,,,,
4,australia-rain-exp-2024-11-18-18-39-24-hpo-20-...,[australia-rain-exp-2024-11-18-18-39-24],xgboost-hpo,,,,,,,,,,
5,australia-rain-exp-2024-11-18-18-39-24-hpo-20-...,[australia-rain-exp-2024-11-18-18-39-24],xgboost-hpo,,,,,,,,,,
6,australia-rain-exp-2024-11-18-18-39-24-hpo-20-...,[australia-rain-exp-2024-11-18-18-39-24],xgboost-hpo,,,,,,,,,,
7,australia-rain-exp-2024-11-18-18-39-24-hpo-19-...,[australia-rain-exp-2024-11-18-18-39-24],xgboost-hpo,,,,,,,,,,
8,australia-rain-exp-2024-11-18-18-39-24-hpo-19-...,[australia-rain-exp-2024-11-18-18-39-24],xgboost-hpo,,,,,,,,,,
9,australia-rain-exp-2024-11-18-18-39-24-trainin...,[australia-rain-exp-2024-11-18-18-39-24],xgboost-training,,,,,,,,,,
