# Model Training with SageMaker

Train and track ML experiments

In [None]:
import sagemaker
from sagemaker.sklearn import SKLearn
from sagemaker.experiments.run import Run
import pandas as pd

sess = sagemaker.Session()
region = sess.boto_region_name

In [None]:
# Retrieve from previous notebook
%store -r train_s3_path
%store -r test_s3_path
%store -r bucket
%store -r role

print(f'Training data: {train_s3_path}')
print(f'Test data: {test_s3_path}')

In [None]:
# Create estimator
sklearn_estimator = SKLearn(
    entry_point='../scripts/train.py',
    role=role,
    instance_type='ml.m5.large',
    instance_count=1,
    framework_version='1.2-1',
    py_version='py3',
    hyperparameters={
        'n-estimators': 100,
        'max-depth': 5,
        'min-samples-split': 2
    },
    output_path=f's3://{bucket}/models',
    base_job_name='classification-baseline'
)

print('Estimator configured')

In [None]:
# Train with experiments
with Run(
    experiment_name='classification-experiments',
    run_name='baseline-model',
    sagemaker_session=sess
) as run:
    run.log_parameters({
        'n_estimators': 100,
        'max_depth': 5,
        'algorithm': 'RandomForest'
    })
    
    sklearn_estimator.fit({'train': train_s3_path}, wait=True)
    
    run.log_artifact(
        name='model',
        value=sklearn_estimator.model_data,
        media_type='application/x-tar'
    )
    
    print(f'Training job: {sklearn_estimator.latest_training_job.name}')
    print(f'Model artifact: {sklearn_estimator.model_data}')

In [None]:
# Train multiple experiments
experiments = [
    {'name': 'shallow', 'n_estimators': 50, 'max_depth': 3},
    {'name': 'deep', 'n_estimators': 200, 'max_depth': 10},
]

for exp in experiments:
    print(f'Training: {exp["name"]}')
    
    estimator = SKLearn(
        entry_point='../scripts/train.py',
        role=role,
        instance_type='ml.m5.large',
        instance_count=1,
        framework_version='1.2-1',
        py_version='py3',
        hyperparameters={
            'n-estimators': exp['n_estimators'],
            'max-depth': exp['max_depth']
        },
        output_path=f's3://{bucket}/models'
    )
    
    with Run(
        experiment_name='classification-experiments',
        run_name=exp['name'],
        sagemaker_session=sess
    ) as run:
        run.log_parameters(exp)
        estimator.fit({'train': train_s3_path}, wait=True)

print('All experiments completed')

In [None]:
# Save best model
best_model_data = sklearn_estimator.model_data
%store best_model_data

print(f'Best model: {best_model_data}')