### Install sagemaker-experiments

In [47]:
import sys
!{sys.executable} -m pip install sagemaker-experiments



In [48]:
import sagemaker
import boto3

session = sagemaker.Session()
sm = boto3.Session().client('sagemaker')
role = sagemaker.get_execution_role()

# Push data to S3 bucket

In [49]:
DATA_FILE = './data/iris.csv'
bucket_name = 'snowflake-getting-started'
prefix = 'iris/data'
s3_output_path = 's3://'+bucket_name

print ('------------>uploading data to s3')
s3_data_path = session.upload_data(path=DATA_FILE, 
                                   bucket=bucket_name, 
                                   key_prefix=prefix)
print ('------------>data uploaded to -', s3_data_path)
print ('------------>model would be uploaded to -', s3_output_path)

------------>uploading data to s3
------------>data uploaded to - s3://snowflake-getting-started/iris/data/iris.csv
------------>model would be uploaded to - s3://snowflake-getting-started


In [50]:
from smexperiments.tracker import Tracker

with Tracker.create(display_name="Preprocessing", sagemaker_boto_client=sm) as tracker:
    tracker.log_parameters({
        "pre_processing_param": 'pre_processing_param_value'
    })
    # we can log the s3 uri to the dataset we just uploaded
    tracker.log_input(name="iris-dataset", media_type="s3/uri", value=s3_data_path)
    tracker.log_output(name="iris-dataset", media_type="s3/uri", value=s3_data_path)

### Step 1 - Setup an Experiment

### Create an Experiment

In [51]:
from smexperiments.experiment import Experiment
import time

iris_experiment = Experiment.create(
    experiment_name=f"iris-classification-model-{int(time.time())}", 
    description="Classification of iris flowers", 
    sagemaker_boto_client=sm)
print(iris_experiment)

Experiment(sagemaker_boto_client=<botocore.client.SageMaker object at 0x7fc774055550>,experiment_name='iris-classification-model-1595707275',description='Classification of iris flowers',tags=None,experiment_arn='arn:aws:sagemaker:eu-west-1:951135073253:experiment/iris-classification-model-1595707275',response_metadata={'RequestId': '34ed97bc-5fc4-432b-bf4d-c114dd0aeeb7', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '34ed97bc-5fc4-432b-bf4d-c114dd0aeeb7', 'content-type': 'application/x-amz-json-1.1', 'content-length': '108', 'date': 'Sat, 25 Jul 2020 20:01:14 GMT'}, 'RetryAttempts': 0})


### Step 2 - Track Experiment

### Now create a Trial for each training run to track the it's inputs, parameters, and metrics.

In [52]:
from sagemaker.sklearn import SKLearn
from smexperiments.trial import Trial

# we are taking the trial component which was created earlir for preprocessing. since for different training trails
# our pre-processing trial is the same hence we just attach that preprocessing component to this trial.

preprocessing_trial_component = tracker.trial_component
print (preprocessing_trial_component)
for i, num_max_iter in enumerate([10,20]):
    
    # create trial
    trial_name = f"iris-training-job-{num_max_iter}-max-iter-{int(time.time())}"
    
    iris_trial = Trial.create(
        trial_name=trial_name, 
        experiment_name=iris_experiment.experiment_name,
        sagemaker_boto_client=sm,
    )
    
    # associate the proprocessing trial component with the current trial
    iris_trial.add_trial_component(preprocessing_trial_component)
    
    executor = SKLearn(entry_point='train.py',
                      train_instance_type='ml.c4.xlarge',
                      sagemaker_session = session,
                      role = role,
                      hyperparameters = {
                          'max_iter':num_max_iter,
                          'class_weight':'balanced'
                      },
                      metric_definitions=[
                        {'Name':'test:f1-score', 'Regex':'Test F1-Score: (.*)'},
                        {'Name':'test:accuracy', 'Regex':'Test Accuracy: (.*)'}
                      ],
                      enable_sagemaker_metrics=True,
                      output_path = s3_output_path)
    
    iris_training_job_name = "iris-training-job-{}".format(int(time.time()))
    executor.fit(
        inputs={'training': s3_data_path},
        job_name=iris_training_job_name,
        experiment_config={
            "TrialName": iris_trial.trial_name,
            "TrialComponentDisplayName": "Training",
        },
        wait=True
    )
    
    # give it a while before dispatching the next training job
    time.sleep(2)



TrialComponent(sagemaker_boto_client=<botocore.client.SageMaker object at 0x7fc774055550>,trial_component_name='TrialComponent-2020-07-25-200112-dxml',display_name='Preprocessing',tags=None,trial_component_arn='arn:aws:sagemaker:eu-west-1:951135073253:experiment-trial-component/trialcomponent-2020-07-25-200112-dxml',response_metadata={'RequestId': '37a0ba05-bbfc-4f8c-bbd1-54068758324c', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '37a0ba05-bbfc-4f8c-bbd1-54068758324c', 'content-type': 'application/x-amz-json-1.1', 'content-length': '129', 'date': 'Sat, 25 Jul 2020 20:01:11 GMT'}, 'RetryAttempts': 0},parameters={'pre_processing_param': 'pre_processing_param_value'},input_artifacts={'iris-dataset': TrialComponentArtifact(value='s3://snowflake-getting-started/iris/data/iris.csv',media_type='s3/uri')},output_artifacts={'iris-dataset': TrialComponentArtifact(value='s3://snowflake-getting-started/iris/data/iris.csv',media_type='s3/uri')},start_time=datetime.datetime(2020, 7, 2

INFO:sagemaker:Creating training-job with name: iris-training-job-1595707279


2020-07-25 20:01:19 Starting - Starting the training job...
2020-07-25 20:01:21 Starting - Launching requested ML instances.........
2020-07-25 20:03:04 Starting - Preparing the instances for training......
2020-07-25 20:04:10 Downloading - Downloading input data
2020-07-25 20:04:10 Training - Downloading the training image..[34m2020-07-25 20:04:30,923 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2020-07-25 20:04:30,926 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-07-25 20:04:30,937 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2020-07-25 20:04:31,211 sagemaker-containers INFO     Module train does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34m2020-07-25 20:04:31,211 sagemaker-containers INFO     Generating setup.cfg[0m
[34m2020-07-25 20:04:31,212 sagemaker-containers INFO     Generating MANIFEST.in[0m
[34m2020-07-25 20:04:31,212 

INFO:sagemaker:Creating training-job with name: iris-training-job-1595707533


2020-07-25 20:05:34 Starting - Starting the training job...
2020-07-25 20:05:36 Starting - Launching requested ML instances............
2020-07-25 20:07:38 Starting - Preparing the instances for training...
2020-07-25 20:08:32 Downloading - Downloading input data
2020-07-25 20:08:32 Training - Downloading the training image...
2020-07-25 20:08:52 Training - Training image download completed. Training in progress.[34m2020-07-25 20:08:52,956 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2020-07-25 20:08:52,958 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-07-25 20:08:52,969 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2020-07-25 20:09:00,049 sagemaker-containers INFO     Module train does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34m2020-07-25 20:09:00,049 sagemaker-containers INFO     Generating setup.cfg[0m
[34m2020-07-25 20:09:00,0

### Compare the model training runs for an experiment

Now we will use the analytics capabilities of Python SDK to query and compare the training runs for identifying the best model produced by our experiment. You can retrieve trial components by using a search expression.

### Some Simple Analyses

In [53]:
search_expression = {
    "Filters":[
        {
            "Name": "DisplayName",
            "Operator": "Equals",
            "Value": "Training",
        }
    ],
}

In [76]:
from sagemaker.analytics import ExperimentAnalytics
from sagemaker.session import Session

trial_component_analytics = ExperimentAnalytics(
    sagemaker_session=Session(boto3.Session(), sm), 
    experiment_name=iris_experiment.experiment_name,
    search_expression=search_expression,
    sort_by="metrics.test:f1-score.max",
    metric_names=['test:f1-score'],
    sort_order="Descending",    
    #metric_names=['test:f1-score'],
    parameter_names=['max_iter', 'class_weight']
)
trial_component_analytics.dataframe()

Unnamed: 0,TrialComponentName,DisplayName,SourceArn,class_weight,max_iter,test:f1-score - Min,test:f1-score - Max,test:f1-score - Avg,test:f1-score - StdDev,test:f1-score - Last,test:f1-score - Count
0,iris-training-job-1595707533-aws-training-job,Training,arn:aws:sagemaker:eu-west-1:951135073253:train...,"""balanced""",20.0,0.947368,0.947368,0.947368,0.0,0.947368,1
1,iris-training-job-1595707279-aws-training-job,Training,arn:aws:sagemaker:eu-west-1:951135073253:train...,"""balanced""",10.0,0.921053,0.921053,0.921053,0.0,0.921053,1


In [80]:
lineage_table = ExperimentAnalytics(
    sagemaker_session=Session(boto3.Session(), sm), 
    search_expression={
        "Filters":[{
            "Name": "Parents.TrialName",
            "Operator": "Equals",
            "Value": 'iris-training-job-20-max-iter-1595707533'
        },
        {
            "Name": "DisplayName",
            "Operator": "Equals",
            "Value": "Training",
        }]
    },
    sort_by="CreationTime",
    sort_order="Ascending",
)

lineage_table.dataframe()

Unnamed: 0,TrialComponentName,DisplayName,SourceArn,SageMaker.ImageUri,SageMaker.InstanceCount,SageMaker.InstanceType,SageMaker.VolumeSizeInGB,class_weight,max_iter,sagemaker_container_log_level,...,test:f1-score - Avg,test:f1-score - StdDev,test:f1-score - Last,test:f1-score - Count,test:accuracy - Min,test:accuracy - Max,test:accuracy - Avg,test:accuracy - StdDev,test:accuracy - Last,test:accuracy - Count
0,iris-training-job-1595707533-aws-training-job,Training,arn:aws:sagemaker:eu-west-1:951135073253:train...,141502667606.dkr.ecr.eu-west-1.amazonaws.com/s...,1.0,ml.c4.xlarge,30.0,"""balanced""",20.0,20.0,...,0.947368,0.0,0.947368,1,0.947368,0.947368,0.947368,0.0,0.947368,1


### Step - Real Time Predictions

#### Deploy Model to an Endpoint

In [37]:
predictor = executor.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')
print('\nModel Deployed!')

print (predictor.endpoint)

INFO:sagemaker:Creating model with name: iris-training-job-1595705969
INFO:sagemaker:Creating endpoint with name iris-training-job-1595705969


-------------!
Model Deployed!
iris-training-job-1595705969


#### Realtime Inference

    Lookup the predictor via the endpoint name & call predict on it

In [46]:
import pandas as pd
from sklearn.model_selection import train_test_split

data = pd.read_csv(DATA_FILE,engine='python')
X = data.iloc[:,1:5]
y = data.iloc[:,5]

train_x, test_x, train_y, test_y = train_test_split(X,y)
pred_y = predictor.predict(test_x)
print (pred_y)
print (test_y)

[0. 2. 1. 0. 2. 0. 1. 2. 1. 1. 0. 2. 0. 0. 0. 1. 1. 2. 0. 0. 0. 2. 2. 0.
 2. 0. 1. 1. 0. 1. 2. 2. 2. 0. 2. 0. 1. 1.]
36     0.0
137    2.0
89     1.0
48     0.0
106    2.0
34     0.0
95     1.0
136    2.0
75     1.0
50     1.0
9      0.0
149    2.0
39     0.0
7      0.0
18     0.0
80     1.0
60     1.0
113    2.0
17     0.0
49     0.0
29     0.0
117    2.0
131    2.0
22     0.0
120    2.0
37     0.0
90     1.0
96     1.0
3      0.0
69     1.0
114    2.0
70     1.0
84     1.0
44     0.0
123    2.0
16     0.0
86     1.0
64     1.0
Name: class, dtype: float64
