In [1]:
!pip install --upgrade --quiet sagemaker>=2.215.0
!pip install mlflow==2.13.2 sagemaker-mlflow==0.1.0



In [2]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.serve import SchemaBuilder
from sagemaker.serve import ModelBuilder
from sagemaker.serve.mode.function_pointers import Mode
import mlflow
from mlflow import MlflowClient
import boto3
import numpy as np
import pandas as pd
import os

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [39]:
# Define session, role, and region so we can
# perform any SageMaker tasks we need
sagemaker_session = sagemaker.Session()
role = get_execution_role()
region = sagemaker_session.boto_region_name

# Provide the ARN of the Tracking Server that you want to track your training job with
tracking_server_arn = "arn:aws:sagemaker:eu-west-1:211125740051:mlflow-tracking-server/Sample-server"
train_data_uri = "s3://sagemaker-bucket-ds/training-jobs/data/train/"
mlflow_experiment_name = "01-project-POL"
mlflow_register_name = "01-project-POL"
input_path = 's3://sagemaker-bucket-ds/training-jobs/data/inference_input/'
output_path = 's3://sagemaker-bucket-ds/training-jobs/data/inference_output/'

In [25]:
os.makedirs("01_training_code", exist_ok=True) # Create folder for training code
!aws s3 rm s3://sagemaker-bucket-ds/training-jobs/data/inference_output/ --recursive

delete: s3://sagemaker-bucket-ds/training-jobs/data/inference_output/iris_inference.csv.out


In [11]:
%%writefile 01_training_code/train.py

from __future__ import print_function

import argparse
import joblib
import os
import pandas as pd

from sklearn import tree

import mlflow

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    # Hyperparameters are described here. In this simple example we are just including one hyperparameter.
    parser.add_argument('--max_leaf_nodes', type=int, default=-1)

    # Sagemaker specific arguments. Defaults are set in the environment variables.
    parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR'])
    parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
    parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])

    args = parser.parse_args()

    # Take the set of files and read them all into a single pandas dataframe
    input_files = [ os.path.join(args.train, file) for file in os.listdir(args.train) if os.path.isfile(os.path.join(args.train, file))]
    if len(input_files) == 0:
        raise ValueError(('There are no files in {}.\n' +
                          'This usually indicates that the channel ({}) was incorrectly specified,\n' +
                          'the data specification in S3 was incorrectly specified or the role specified\n' +
                          'does not have permission to access the data.').format(args.train, "train"))
    raw_data = [ pd.read_csv(file, header=None, engine="python") for file in input_files ]
    train_data = pd.concat(raw_data)

    # Set the Tracking Server URI using the ARN of the Tracking Server you created
    mlflow.set_tracking_uri(os.environ['MLFLOW_TRACKING_ARN'])
    mlflow.set_experiment(os.environ['MLFLOW_EXPERIMENT_NAME'])

    # Enable autologging in MLflow
    mlflow.autolog()

    # labels are in the first column
    train_y = train_data.iloc[:, 0]
    train_X = train_data.iloc[:, 1:]

    # Here we support a single hyperparameter, 'max_leaf_nodes'. Note that you can add as many
    # as your training my require in the ArgumentParser above.
    max_leaf_nodes = args.max_leaf_nodes

    # Now use scikit-learn's decision tree classifier to train the model.
    clf = tree.DecisionTreeClassifier(max_leaf_nodes=max_leaf_nodes)
    clf = clf.fit(train_X, train_y)

    # Print the coefficients of the trained classifier, and save the coefficients
    joblib.dump(clf, os.path.join(args.model_dir, "model.joblib"))

    # Register the model with MLflow
    run_id = mlflow.last_active_run().info.run_id
    artifact_path = "model"
    model_uri = "runs:/{run_id}/{artifact_path}".format(run_id=run_id, artifact_path=artifact_path)
    model_details = mlflow.register_model(model_uri=model_uri, name=os.environ['MLFLOW_REGISTER_NAME'])

Writing 01_training_code/train.py


In [12]:
%%writefile 01_training_code/requirements.txt
mlflow==2.13.2
sagemaker-mlflow==0.1.0
cloudpickle==2.2.1 # Required for Sagemaker Python SDK

Writing 01_training_code/requirements.txt


In [13]:
sklearn = SKLearn(
    entry_point="train.py",
    source_dir="01_training_code",
    framework_version="1.2-1",
    instance_type="ml.c4.xlarge",
    role=role,
    sagemaker_session=sagemaker_session,
    hyperparameters={"max_leaf_nodes": 30},
    keep_alive_period_in_seconds=3600,
    environment={
        "MLFLOW_TRACKING_ARN": tracking_server_arn,
        'MLFLOW_EXPERIMENT_NAME': mlflow_experiment_name,
        'MLFLOW_REGISTER_NAME': mlflow_register_name
    }
)

In [14]:
sklearn.fit({"train": train_data_uri})

INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2024-07-03-12-20-49-692


2024-07-03 12:20:50 Starting - Starting the training job...
2024-07-03 12:21:11 Starting - Preparing the instances for training...
2024-07-03 12:21:38 Downloading - Downloading input data...
2024-07-03 12:22:18 Downloading - Downloading the training image......
2024-07-03 12:23:08 Training - Training image download completed. Training in progress..[34m2024-07-03 12:23:16,600 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2024-07-03 12:23:16,606 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-07-03 12:23:16,609 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2024-07-03 12:23:16,633 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2024-07-03 12:23:16,861 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:[0m
[34m/miniconda3/bin/python -m pip install -r requirements.txt[0m
