In [19]:
import sys

In [20]:
# !{sys.executable} -m pip install sagemaker-experiments
# !{sys.executable} -m pip install "sagemaker-studio-image-build"

## Prerequistes

In [21]:
from sagemaker import get_execution_role

role = get_execution_role()

print(
    f"""{{
    "Version": "2012-10-17",
    "Statement": [
        {{
            "Effect": "Allow",
            "Action": [
                "codebuild:DeleteProject",
                "codebuild:CreateProject",
                "codebuild:BatchGetBuilds",
                "codebuild:StartBuild"
            ],
            "Resource": "arn:aws:codebuild:*:*:project/sagemaker-studio*"
        }},
        {{
            "Effect": "Allow",
            "Action": "logs:CreateLogStream",
            "Resource": "arn:aws:logs:*:*:log-group:/aws/codebuild/sagemaker-studio*"
        }},
        {{
            "Effect": "Allow",
            "Action": [
                "logs:GetLogEvents",
                "logs:PutLogEvents"
            ],
            "Resource": "arn:aws:logs:*:*:log-group:/aws/codebuild/sagemaker-studio*:log-stream:*"
        }},
        {{
            "Effect": "Allow",
            "Action": "logs:CreateLogGroup",
            "Resource": "*"
        }},
        {{
            "Effect": "Allow",
            "Action": [
                "ecr:CreateRepository",
                "ecr:BatchGetImage",
                "ecr:CompleteLayerUpload",
                "ecr:DescribeImages",
                "ecr:DescribeRepositories",
                "ecr:UploadLayerPart",
                "ecr:ListImages",
                "ecr:InitiateLayerUpload",
                "ecr:BatchCheckLayerAvailability",
                "ecr:PutImage"
            ],
            "Resource": "arn:aws:ecr:*:*:repository/sagemaker-studio*"
        }},
        {{
            "Effect": "Allow",
            "Action": "ecr:GetAuthorizationToken",
            "Resource": "*"
        }},
        {{
            "Effect": "Allow",
            "Action": [
              "s3:GetObject",
              "s3:DeleteObject",
              "s3:PutObject"
              ],
            "Resource": "arn:aws:s3:::sagemaker-*/*"
        }},
        {{
            "Effect": "Allow",
            "Action": [
                "s3:CreateBucket"
            ],
            "Resource": "arn:aws:s3:::sagemaker*"
        }},
        {{
            "Effect": "Allow",
            "Action": [
                "iam:GetRole",
                "iam:ListRoles"
            ],
            "Resource": "*"
        }},
        {{
            "Effect": "Allow",
            "Action": "iam:PassRole",
            "Resource": "{role}",
            "Condition": {{
                "StringLikeIfExists": {{
                    "iam:PassedToService": "codebuild.amazonaws.com"
                }}
            }}
        }}
    ]
}}"""
)

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "codebuild:DeleteProject",
                "codebuild:CreateProject",
                "codebuild:BatchGetBuilds",
                "codebuild:StartBuild"
            ],
            "Resource": "arn:aws:codebuild:*:*:project/sagemaker-studio*"
        },
        {
            "Effect": "Allow",
            "Action": "logs:CreateLogStream",
            "Resource": "arn:aws:logs:*:*:log-group:/aws/codebuild/sagemaker-studio*"
        },
        {
            "Effect": "Allow",
            "Action": [
                "logs:GetLogEvents",
                "logs:PutLogEvents"
            ],
            "Resource": "arn:aws:logs:*:*:log-group:/aws/codebuild/sagemaker-studio*:log-stream:*"
        },
        {
            "Effect": "Allow",
            "Action": "logs:CreateLogGroup",
            "Resource": "*"
        },
        {
            "Effect": "Allow",
   

In [22]:
role

'arn:aws:iam::136605741915:role/SageMakerExeutionRole_Custom'

In [23]:
import boto3
import sagemaker

In [24]:
sm_sess = sagemaker.Session()
region = boto3.session.Session().region_name

# Boto clients provide a low-level interface to the AWS services
sageM = boto3.Session().client("sagemaker")

In [25]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from time import sleep, gmtime, strftime
import json
import time

In [26]:
# Import SageMaker Experiments
from sagemaker.analytics import ExperimentAnalytics
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent
from smexperiments.tracker import Tracker

## S3 buckets and prefixes

In [27]:
rawbucket = sm_sess.default_bucket()

prefix = "sagemaker-modelmonitor"  # use this prefix to store all files pertaining to this workshop.

dataprefix = prefix + "/data"
traindataprefix = prefix + "/train_data"
testdataprefix = prefix + "/test_data"
testdatanolabelprefix = prefix + "/test_data_no_label"
trainheaderprefix = prefix + "/train_headers"

## Read raw data

In [28]:
data = pd.read_excel("default of credit card clients.xls", header=1)
data = data.drop(columns=["ID"])
data.rename(columns={"default payment next month": "Label"}, inplace=True)
lbl = data.Label
data = pd.concat([lbl, data.drop(columns=["Label"])], axis=1)

In [29]:
if not os.path.exists('rawdata/rawdata.csv'):
    !mkdir rawdata
    data.to_csv('rawdata/rawdata.csv', index=None)
else:
    pass

In [30]:
# Upload the raw dataset
raw_data_location = sm_sess.upload_data("rawdata", bucket=rawbucket, key_prefix=dataprefix)
print(raw_data_location)

s3://sagemaker-us-east-2-136605741915/sagemaker-modelmonitor/data


## SageMaker Preprocessing

### Runs a processing job using SKLearnProcessor class from the the SageMaker Python SDK

In [31]:
# Copy the preprocessing code over to the s3 bucket
codeprefix = prefix + "/code"
codeupload = sm_sess.upload_data("preprocessing.py", bucket=rawbucket, key_prefix=codeprefix)
print(codeupload)

s3://sagemaker-us-east-2-136605741915/sagemaker-modelmonitor/code/preprocessing.py


In [32]:
train_data_location = rawbucket + "/" + traindataprefix
test_data_location = rawbucket + "/" + testdataprefix
print("Training data location = {}".format(train_data_location))
print("Test data location = {}".format(test_data_location))

Training data location = sagemaker-us-east-2-136605741915/sagemaker-modelmonitor/train_data
Test data location = sagemaker-us-east-2-136605741915/sagemaker-modelmonitor/test_data


In [33]:
## Use SageMaker Processing with Sk Learn. -- combine data into train and test at this stage if possible.
from sagemaker.sklearn.processing import SKLearnProcessor

sklearn_processor = SKLearnProcessor(framework_version="0.20.0", role=role, instance_type="ml.c4.xlarge", instance_count=1)

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Defaulting to only available Python version: py3


In [34]:
from sagemaker.processing import ProcessingInput, ProcessingOutput

sklearn_processor.run(
    code=codeupload,
    inputs=[ProcessingInput(source=raw_data_location, destination="/opt/ml/processing/input")],
    outputs=[
        ProcessingOutput(
            output_name="train_data",
            source="/opt/ml/processing/train",
            destination="s3://" + train_data_location,
        ),
        ProcessingOutput(
            output_name="test_data",
            source="/opt/ml/processing/test",
            destination="s3://" + test_data_location,
        ),
        ProcessingOutput(
            output_name="train_data_headers",
            source="/opt/ml/processing/train_headers",
            destination="s3://" + rawbucket + "/" + prefix + "/train_headers",
        ),
    ],
    arguments=["--train-test-split-ratio", "0.2"],
)

preprocessing_job_description = sklearn_processor.jobs[-1].describe()


output_config = preprocessing_job_description["ProcessingOutputConfig"]
for output in output_config["Outputs"]:
    if output["OutputName"] == "train_data":
        preprocessed_training_data = output["S3Output"]["S3Uri"]
    if output["OutputName"] == "test_data":
        preprocessed_test_data = output["S3Output"]["S3Uri"]

INFO:sagemaker:Creating processing-job with name sagemaker-scikit-learn-2022-02-10-03-34-55-520



Job Name:  sagemaker-scikit-learn-2022-02-10-03-34-55-520
Inputs:  [{'InputName': 'input-1', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-2-136605741915/sagemaker-modelmonitor/data', 'LocalPath': '/opt/ml/processing/input', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'code', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-2-136605741915/sagemaker-modelmonitor/code/preprocessing.py', 'LocalPath': '/opt/ml/processing/input/code', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'train_data', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-east-2-136605741915/sagemaker-modelmonitor/train_data', 'LocalPath': '/opt/ml/processing/train', 'S3UploadMode': 'EndOfJob'}}, {'OutputName': 'test_data', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sa

# Part 2: Building the container and training the model

## SageMaker Experiment

In [35]:
# Create a SageMaker Experiment

my_experiment = Experiment.create(experiment_name=f"CreditCardDefault-{int(time.time())}",
                                 description = "Predict credit card default from payments data",
                                 sagemaker_boto_client=sageM)

In [36]:
print(my_experiment)

Experiment(sagemaker_boto_client=<botocore.client.SageMaker object at 0x7f1c75682050>,experiment_name='CreditCardDefault-1644464408',description='Predict credit card default from payments data',tags=None,experiment_arn='arn:aws:sagemaker:us-east-2:136605741915:experiment/creditcarddefault-1644464408',response_metadata={'RequestId': '42e356b3-1d9f-4ed1-922e-b66bf1cde2ea', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '42e356b3-1d9f-4ed1-922e-b66bf1cde2ea', 'content-type': 'application/x-amz-json-1.1', 'content-length': '100', 'date': 'Thu, 10 Feb 2022 03:40:08 GMT'}, 'RetryAttempts': 0})


In [37]:
# Start Tracking parameters used in the Pre-processing pipeline.
with Tracker.create(display_name="Preprocessing", sagemaker_boto_client=sageM) as tracker:
    tracker.log_parameters({"train_test_split_ratio": 0.2, "random_state": 0})
    
    # we can log the s3 uri to the dataset we just uploaded
    tracker.log_input(name="ccdefault-raw-dataset", media_type="s3/uri", value=raw_data_location)
    tracker.log_input(name="ccdefault-train-dataset", media_type="s3/uri", value=train_data_location)
    tracker.log_input(name="ccdefault-test-dataset", media_type="s3/uri", value=test_data_location)

In [40]:
!sm-docker build .

Created ECR repository sagemaker-studio-d-c3dw70cqdizn
.....[Container] 2022/02/10 04:04:08 Waiting for agent ping

[Container] 2022/02/10 04:04:09 Waiting for DOWNLOAD_SOURCE
[Container] 2022/02/10 04:04:12 Phase is DOWNLOAD_SOURCE
[Container] 2022/02/10 04:04:12 CODEBUILD_SRC_DIR=/codebuild/output/src583262899/src
[Container] 2022/02/10 04:04:12 YAML location is /codebuild/output/src583262899/src/buildspec.yml
[Container] 2022/02/10 04:04:12 Setting HTTP client timeout to higher timeout for S3 source
[Container] 2022/02/10 04:04:12 Processing environment variables
[Container] 2022/02/10 04:04:12 No runtime version selected in buildspec.
[Container] 2022/02/10 04:04:12 Moving to directory /codebuild/output/src583262899/src
[Container] 2022/02/10 04:04:12 Configuring ssm agent with target id: codebuild:ba29cdb8-87eb-4e08-816f-e874d7368e65
[Container] 2022/02/10 04:04:12 Successfully updated ssm agent configuration
[Container] 2022/02/10 04:04:12 Registering with agent
[Container] 2022/

In [46]:
account = sm_sess.boto_session.client("sts").get_caller_identity()["Account"]
ecr = boto3.client("ecr")
domain_id = "sagemaker-studio-{}".format(sageM.list_apps()["Apps"][0]["DomainId"])
image_tag = ecr.list_images(repositoryName=domain_id, filter={"tagStatus": "TAGGED"})["imageIds"][0]["imageTag"]

IndexError: list index out of range

In [48]:
ecr.list_images(repositoryName=domain_id, filter={"tagStatus": "TAGGED"})["imageIds"]

[]

In [45]:
"sagemaker-studio-{}".format(sageM.list_apps()["Apps"][0]["DomainId"])

'sagemaker-studio-d-c3dw70cqdizn'

In [None]:
image = "{}.dkr.ecr.{}.amazonaws.com/{}:{}".format(account, region, domain_id, image_tag)
preprocessing_trial_component = tracker.trial_component

trial_name = f"cc-fraud-training-job-{int(time.time())}"
cc_trial = Trial.create(
    trial_name=trial_name, experiment_name=cc_experiment.experiment_name, sagemaker_boto_client=sm
)

cc_trial.add_trial_component(preprocessing_trial_component)
cc_training_job_name = "cc-training-job-{}".format(int(time.time()))