# Examine the Evalution Metrics

Examine the resulting model evaluation after the pipeline completes. Download the resulting evaluation.json file from S3 and print the report.

View [evaluate_model_metrics.py](evaluate_model_metrics.py)

In [1]:
from botocore.exceptions import ClientError

import os
import sagemaker
import logging
import boto3
import sagemaker
import pandas as pd

sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

import botocore.config

config = botocore.config.Config(
    user_agent_extra='dsoaws/1.0'
)

sm = boto3.Session().client(service_name="sagemaker", 
                            region_name=region,
                            config=config)

In [2]:
%store -r pipeline_name

In [3]:
print(pipeline_name)

BERT-pipeline-1625234077


In [4]:
%%time

import time
from pprint import pprint

executions_response = sm.list_pipeline_executions(PipelineName=pipeline_name)["PipelineExecutionSummaries"]
pipeline_execution_status = executions_response[0]["PipelineExecutionStatus"]
print(pipeline_execution_status)

while pipeline_execution_status == "Executing":
    try:
        executions_response = sm.list_pipeline_executions(PipelineName=pipeline_name)["PipelineExecutionSummaries"]
        pipeline_execution_status = executions_response[0]["PipelineExecutionStatus"]
    except Exception as e:
        print("Please wait...")
        time.sleep(30)

pprint(executions_response)

Succeeded
[{'PipelineExecutionArn': 'arn:aws:sagemaker:us-west-2:706553727873:pipeline/bert-pipeline-1625234077/execution/vaoey0p8e4pb',
  'PipelineExecutionDisplayName': 'execution-1625235109795',
  'PipelineExecutionStatus': 'Succeeded',
  'StartTime': datetime.datetime(2021, 7, 2, 14, 11, 49, 691000, tzinfo=tzlocal())}]
CPU times: user 11 ms, sys: 3.2 ms, total: 14.2 ms
Wall time: 115 ms


# List Pipeline Execution Steps

In [5]:
pipeline_execution_status = executions_response[0]["PipelineExecutionStatus"]
print(pipeline_execution_status)

Succeeded


In [6]:
pipeline_execution_arn = executions_response[0]["PipelineExecutionArn"]
print(pipeline_execution_arn)

arn:aws:sagemaker:us-west-2:706553727873:pipeline/bert-pipeline-1625234077/execution/vaoey0p8e4pb


In [7]:
from pprint import pprint

steps = sm.list_pipeline_execution_steps(PipelineExecutionArn=pipeline_execution_arn)

pprint(steps)

{'PipelineExecutionSteps': [{'EndTime': datetime.datetime(2021, 7, 2, 14, 51, 22, 911000, tzinfo=tzlocal()),
                             'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-west-2:706553727873:model-package/bert-reviews-1625234723/1'}},
                             'StartTime': datetime.datetime(2021, 7, 2, 14, 51, 21, 260000, tzinfo=tzlocal()),
                             'StepName': 'RegisterModel',
                             'StepStatus': 'Succeeded'},
                            {'EndTime': datetime.datetime(2021, 7, 2, 14, 51, 22, 520000, tzinfo=tzlocal()),
                             'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-west-2:706553727873:model/pipelines-vaoey0p8e4pb-createmodel-uepmd0svcg'}},
                             'StartTime': datetime.datetime(2021, 7, 2, 14, 51, 21, 207000, tzinfo=tzlocal()),
                             'StepName': 'CreateModel',
                             'StepStatus': 'Succeeded'},
                      

# Retrieve Evaluation Metrics

In [8]:
# for execution_step in reversed(execution.list_steps()):
for execution_step in reversed(steps["PipelineExecutionSteps"]):
    if execution_step["StepName"] == "EvaluateModel":
        processing_job_name = execution_step["Metadata"]["ProcessingJob"]["Arn"].split("/")[-1]

describe_evaluation_processing_job_response = sm.describe_processing_job(ProcessingJobName=processing_job_name)

evaluation_metrics_s3_uri = describe_evaluation_processing_job_response["ProcessingOutputConfig"]["Outputs"][0][
    "S3Output"
]["S3Uri"]

print(evaluation_metrics_s3_uri)

s3://sagemaker-us-west-2-706553727873/sagemaker-scikit-learn-2021-07-02-14-07-56-815/output/metrics


## Show the test accuracy

In [9]:
import json
from pprint import pprint

evaluation_json = sagemaker.s3.S3Downloader.read_file("{}/evaluation.json".format(evaluation_metrics_s3_uri))

pprint(json.loads(evaluation_json))

{'metrics': {'accuracy': {'value': 0.45}}}


## Show the confusion matrix generated during model evaluation

In [10]:
!aws s3 cp $evaluation_metrics_s3_uri/confusion_matrix.png ./model_evaluation/

import time

time.sleep(10)  # Slight delay for our notebook to recognize the newly-downloaded file

download: s3://sagemaker-us-west-2-706553727873/sagemaker-scikit-learn-2021-07-02-14-07-56-815/output/metrics/confusion_matrix.png to model_evaluation/confusion_matrix.png


In [11]:
%%html

<img src='./model_evaluation/confusion_matrix.png'>

# Download and Analyze the Trained Model from S3

In [12]:
training_job_arn = None

for execution_step in steps["PipelineExecutionSteps"]:
    if execution_step["StepName"] == "Train":
        training_job_arn = execution_step["Metadata"]["TrainingJob"]["Arn"]

        break

training_job_name = training_job_arn.split("/")[-1]
print(training_job_name)

pipelines-vaoey0p8e4pb-train-v9x0q6ud44


In [13]:
model_tar_s3_uri = sm.describe_training_job(TrainingJobName=training_job_name)["ModelArtifacts"]["S3ModelArtifacts"]

In [14]:
!aws s3 cp $model_tar_s3_uri ./

download: s3://sagemaker-us-west-2-706553727873/pipelines-vaoey0p8e4pb-Train-V9x0q6ud44/output/model.tar.gz to ./model.tar.gz


In [15]:
!mkdir -p ./model
!tar -zxvf model.tar.gz -C ./model

code/
code/inference.py
transformers/
transformers/fine-tuned/
transformers/fine-tuned/config.json
transformers/fine-tuned/tf_model.h5
tensorboard/
tensorflow/
tensorflow/saved_model/
tensorflow/saved_model/0/
tensorflow/saved_model/0/variables/
tensorflow/saved_model/0/variables/variables.index
tensorflow/saved_model/0/variables/variables.data-00000-of-00001
tensorflow/saved_model/0/saved_model.pb
tensorflow/saved_model/0/assets/


# List All Artifacts Generated By The Pipeline

Amazon SageMaker ML Lineage Tracking creates and stores information about the steps of a machine learning (ML) workflow from data preparation to model deployment. 

Amazon SageMaker Lineage enables events that happen within SageMaker to be traced via a graph structure. The data simplifies generating reports, making comparisons, or discovering relationships between events. For example easily trace both how a model was generated and where the model was deployed.

The lineage graph is created automatically by SageMaker and you can directly create or modify your own graphs.

## Key Concepts

* **Lineage Graph** - A connected graph tracing your machine learning workflow end to end.

* **Artifacts** - Represents a URI addressable object or data. Artifacts are typically inputs or outputs to Actions.

* **Actions** - Represents an action taken such as a computation, transformation, or job.

* **Contexts** - Provides a method to logically group other entities.

* **Associations** - A directed edge in the lineage graph that links two entities.

* **Lineage Traversal** - Starting from an arbitrary point trace the lineage graph to discover and analyze relationships between steps in your workflow.

In [16]:
processing_job_name = None
training_job_name = None

In [17]:
import time
from sagemaker.lineage.visualizer import LineageTableVisualizer

viz = LineageTableVisualizer(sagemaker.session.Session())

for execution_step in reversed(steps["PipelineExecutionSteps"]):
    print(execution_step)
    # We are doing this because there appears to be a bug of this LineageTableVisualizer handling the Processing Step
    if execution_step["StepName"] == "Processing":
        processing_job_name = execution_step["Metadata"]["ProcessingJob"]["Arn"].split("/")[-1]
        print(processing_job_name)
        display(viz.show(processing_job_name=processing_job_name))
    elif execution_step["StepName"] == "Train":
        training_job_name = execution_step["Metadata"]["TrainingJob"]["Arn"].split("/")[-1]
        print(training_job_name)
        display(viz.show(training_job_name=training_job_name))
    else:
        display(viz.show(pipeline_execution_step=execution_step))
        time.sleep(5)

{'StepName': 'Processing', 'StartTime': datetime.datetime(2021, 7, 2, 14, 11, 50, 288000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2021, 7, 2, 14, 24, 39, 480000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:706553727873:processing-job/pipelines-vaoey0p8e4pb-processing-bgfqfj4ktd'}}}
pipelines-vaoey0p8e4pb-processing-bgfqfj4ktd


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...ess-scikit-text-to-bert-feature-store.py,Input,DataSet,ContributedTo,artifact
1,s3://...t-2-706553727873/amazon-reviews-pds/tsv/,Input,DataSet,ContributedTo,artifact
2,24661...om/sagemaker-scikit-learn:0.23-1-cpu-py3,Input,Image,ContributedTo,artifact
3,s3://...2021-07-02-14-08-29-911/output/bert-test,Output,DataSet,Produced,artifact
4,s3://...7-02-14-08-29-911/output/bert-validation,Output,DataSet,Produced,artifact
5,s3://...021-07-02-14-08-29-911/output/bert-train,Output,DataSet,Produced,artifact


{'StepName': 'Train', 'StartTime': datetime.datetime(2021, 7, 2, 14, 24, 40, 20000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2021, 7, 2, 14, 42, 52, 753000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:706553727873:training-job/pipelines-vaoey0p8e4pb-train-v9x0q6ud44'}}}
pipelines-vaoey0p8e4pb-train-v9x0q6ud44


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...2021-07-02-14-08-29-911/output/bert-test,Input,DataSet,ContributedTo,artifact
1,s3://...7-02-14-08-29-911/output/bert-validation,Input,DataSet,ContributedTo,artifact
2,s3://...021-07-02-14-08-29-911/output/bert-train,Input,DataSet,ContributedTo,artifact
3,76310...s.com/tensorflow-training:2.3.1-cpu-py37,Input,Image,ContributedTo,artifact
4,s3://...4pb-Train-V9x0q6ud44/output/model.tar.gz,Output,Model,Produced,artifact


{'StepName': 'EvaluateModel', 'StartTime': datetime.datetime(2021, 7, 2, 14, 42, 53, 288000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2021, 7, 2, 14, 51, 19, 60000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:706553727873:processing-job/pipelines-vaoey0p8e4pb-evaluatemodel-lg6w3yopkw'}}}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...937/input/code/evaluate_model_metrics.py,Input,DataSet,ContributedTo,artifact
1,s3://...t-2-706553727873/amazon-reviews-pds/tsv/,Input,DataSet,ContributedTo,artifact
2,s3://...4pb-Train-V9x0q6ud44/output/model.tar.gz,Input,Model,ContributedTo,artifact
3,24661...om/sagemaker-scikit-learn:0.23-1-cpu-py3,Input,Image,ContributedTo,artifact
4,s3://...n-2021-07-02-14-07-56-815/output/metrics,Output,DataSet,Produced,artifact


{'StepName': 'AccuracyCondition', 'StartTime': datetime.datetime(2021, 7, 2, 14, 51, 19, 825000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2021, 7, 2, 14, 51, 20, 924000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'Condition': {'Outcome': 'True'}}}


None

{'StepName': 'CreateModel', 'StartTime': datetime.datetime(2021, 7, 2, 14, 51, 21, 207000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2021, 7, 2, 14, 51, 22, 520000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-west-2:706553727873:model/pipelines-vaoey0p8e4pb-createmodel-uepmd0svcg'}}}


None

{'StepName': 'RegisterModel', 'StartTime': datetime.datetime(2021, 7, 2, 14, 51, 21, 260000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2021, 7, 2, 14, 51, 22, 911000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-west-2:706553727873:model-package/bert-reviews-1625234723/1'}}}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,bert-reviews-1625234723-1-PendingManualApprova...,Input,Approval,ContributedTo,action


# Release Resources

In [None]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>