# Imports

In [1]:
import io
import sys
import os

import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd 
%matplotlib inline

In [2]:
from pathlib import Path
import json
import subprocess

In [3]:
import boto3
import sagemaker
import docker
from sagemaker import get_execution_role

In [4]:
from sagemaker.sklearn import SKLearn
from sagemaker.local import LocalSession

In [5]:
from sklearn.model_selection import train_test_split

# Set up session

In [6]:
# sagemaker session, role
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

# S3 bucket name
bucket = sagemaker_session.default_bucket()

In [28]:
ENDPOINT_NAME = "credit-explainer"

In [8]:
AWS_ACCOUNT_ID = role.split(":")[4]
AWS_REGION = sagemaker_session.boto_region_name

## ECR Repository

Given that we will be deploying a custom docker container, need to make sure we have a ECR docker registry set up.

You can go the the console to create one (mine is in `eu-central-1`, so go to https://eu-central-1.console.aws.amazon.com/ecr/repositories?region=eu-central-1)

The name I gave to my repository is `sagemaker-explainer`.

In [9]:
ECR_REPOSITORY_NAME = 'sagemaker-explainer'
CUSTOM_IMAGE_URI = str(AWS_ACCOUNT_ID)+'.dkr.ecr.' + AWS_REGION + '.amazonaws.com/' + ECR_REPOSITORY_NAME

# Load data

In [10]:
data_dir = Path.cwd().parent / "data"
data_dir

PosixPath('/home/ec2-user/SageMaker/sagemaker-creditscore-explainer/sagemaker/data')

In [11]:
train_df = pd.read_csv(data_dir / "train.csv")
test_df = pd.read_csv(data_dir / "test.csv")

In [12]:
TARGET = 'SeriousDlqin2yrs'

In [13]:
train_df

Unnamed: 0,SeriousDlqin2yrs,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents
0,0,1.000000,29,0,0.167191,2541.0,3,0,0,0,2.0
1,0,0.022649,65,0,0.402510,9241.0,12,0,2,0,1.0
2,0,0.435809,55,1,0.380973,6800.0,25,1,1,1,2.0
3,0,0.056438,56,0,1740.000000,,5,0,1,0,0.0
4,0,0.254972,29,2,0.013997,5000.0,4,0,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
17995,0,0.000000,79,0,0.000000,,4,0,0,0,
17996,0,0.008116,64,0,0.117417,10500.0,9,0,1,0,0.0
17997,0,0.141855,76,1,0.206648,2917.0,9,0,0,0,0.0
17998,0,0.831707,64,0,2782.000000,,18,0,0,0,0.0


In [14]:
# set prefix, a descriptive name for a directory  
bucket_prefix = 'sagemaker-explainer'
model_output_prefix = 'model_output'

# upload all data to S3
train_data = sagemaker_session.upload_data(str(data_dir / "train.csv"), bucket, key_prefix=bucket_prefix)
test_data = sagemaker_session.upload_data(str(data_dir / "test.csv"), bucket, key_prefix=bucket_prefix)

# Set up custom docker training container

## install `docker-credential-ecr-login`

Before we can deploy a custom docker image that includes the shap library, first have to install `docker-credential-ecr-login`

Uncomment the cells below install `docker-credential-ecr-login`:

In [15]:
# uncomment this line below in order to install amazon-ecr-credential-helper:
!sudo wget -P /usr/bin https://amazon-ecr-credential-helper-releases.s3.us-east-2.amazonaws.com/0.4.0/linux-amd64/docker-credential-ecr-login
              

--2020-08-17 18:05:30--  https://amazon-ecr-credential-helper-releases.s3.us-east-2.amazonaws.com/0.4.0/linux-amd64/docker-credential-ecr-login
Resolving amazon-ecr-credential-helper-releases.s3.us-east-2.amazonaws.com (amazon-ecr-credential-helper-releases.s3.us-east-2.amazonaws.com)... 52.219.100.176
Connecting to amazon-ecr-credential-helper-releases.s3.us-east-2.amazonaws.com (amazon-ecr-credential-helper-releases.s3.us-east-2.amazonaws.com)|52.219.100.176|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 8577024 (8.2M) [binary/octet-stream]
Saving to: ‘/usr/bin/docker-credential-ecr-login’


2020-08-17 18:05:32 (5.93 MB/s) - ‘/usr/bin/docker-credential-ecr-login’ saved [8577024/8577024]



In [16]:
# uncomment this line below in order to change permission on /usr/bin/docker-credential-ecr-login:
!sudo chmod +x /usr/bin/docker-credential-ecr-login

## Attach `AmazonEC2ContainerRegistryFullAccess` policy to sagemaker notebook

In order to create a custom docker container containing the shap library for our model we need to add `AmazonEC2ContainerRegistryFullAccess` policy to our notebook.

In the sagemaker console:
    - click on notebook instances
    - click on the notebook instance that you are using
    - go to Permissions and encryption
    - click on the `IAM role ARN`
    - click on 'Attach Policies'
    - find `AmazonEC2ContainerRegistryFullAccess`
    - add it to the notebook.

### Attach additional policies: 


You may have to add some additional permissions to your notebook policies, namely `"ecr:GetDownloadUrlForLayer"`, `"ecr:BatchGetImage"` and `"ecr:BatchCheckLayerAvailability"`. 

You can either edit these manually, or paste the following json:

```json
{
    "Version": "2008-10-17",
    "Statement": [
        {
            "Sid": "allowSageMakerToPull",
            "Effect": "Allow",
            "Action": [
                "ecr:GetDownloadUrlForLayer",
                "ecr:BatchGetImage",
                "ecr:BatchCheckLayerAvailability"
            ],
            "Resource": "*"
        }
    ]
}
```

In [17]:
ecr_client = boto3.client("ecr", region_name=AWS_REGION)
docker_client = docker.APIClient()

class DockerImage:
    def __init__(self, registry, repository_name, tag="latest",
                docker_config_filepath='/home/ec2-user/.docker/config.json'):
        self.registry = registry
        self.repository_name = repository_name
        self.docker_config_filepath = docker_config_filepath
        self.tag = tag
        self._check_credential_manager()
        self._configure_credentials()

    def __str__(self):
        return "{}/{}:{}".format(self.registry, self.repository_name, self.tag)

    @property
    def repository(self):
        return "{}/{}".format(self.registry, self.repository_name)

    @property
    def short_name(self):
        return self.repository_name

    @staticmethod
    def _check_credential_manager():
        try:
            subprocess.run(
                ["docker-credential-ecr-login", "version"],
                stdout=subprocess.DEVNULL,
            )
        except Exception:
            raise Exception(
                "Couldn't run 'docker-credential-ecr-login'. "
                "Make sure it is installed and configured correctly."
            )

    def _configure_credentials(self):
        docker_config_filepath = Path(self.docker_config_filepath)
        if docker_config_filepath.exists():
            with open(docker_config_filepath, "r") as openfile:
                docker_config = json.load(openfile)
        else:
            docker_config = {}
        if "credHelpers" not in docker_config:
            docker_config["credHelpers"] = {}
        docker_config["credHelpers"][self.registry] = "ecr-login"
        docker_config_filepath.parent.mkdir(exist_ok=True, parents=True)
        with open(docker_config_filepath, "w") as openfile:
            json.dump(docker_config, openfile, indent=4)

    def build(self, dockerfile, buildargs):
        path = Path(dockerfile).parent
        for line in docker_client.build(
            path=str(path),
            buildargs=buildargs,
            tag=self.repository_name,
            decode=True,
        ):
            if "error" in line:
                raise Exception(line["error"])
            else:
                print(line)

    def push(self):
        docker_client.tag(
            self.repository_name, self.repository, self.tag, force=True
        )
        for line in docker_client.push(
            self.repository, self.tag, stream=True, decode=True
        ):
            print(line)

## Get base scikit-learn image

This is the base image for our docker image.

In [18]:
def scikit_learn_image():
    registry = sagemaker.fw_registry.registry(
        region_name=AWS_REGION, framework="scikit-learn"
    )
    repository_name = "sagemaker-scikit-learn"
    tag = "0.20.0-cpu-py3"
    return DockerImage(registry, repository_name, tag)

sklearn_image = scikit_learn_image()
str(sklearn_image)

'492215442770.dkr.ecr.eu-central-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3'

## Build custom image based on this scikit-learn image

The `Dockerfile` looks as follows:

```Dockerfile
ARG SCIKIT_LEARN_IMAGE
FROM $SCIKIT_LEARN_IMAGE

COPY requirements.txt /requirements.txt
RUN pip install --no-cache -r /requirements.txt && \
    rm /requirements.txt
```

So we start with a `$SCIKIT_LEARN_IMAGE` as a base (given as a parameter), and then install the additional requirements. (in our case `joblib` and `shap`)

In [20]:
def custom_image(aws_account_id, aws_region, repository_name, tag="latest"):
    ecr_registry = f"{aws_account_id}.dkr.ecr.{aws_region}.amazonaws.com"
    return DockerImage(ecr_registry, repository_name, tag)

custom_image = custom_image(AWS_ACCOUNT_ID, AWS_REGION, ECR_REPOSITORY_NAME)

dockerfile = Path.cwd().parent / "container" / "Dockerfile"

custom_image.build(
    dockerfile=dockerfile,
    buildargs={'SCIKIT_LEARN_IMAGE': str(sklearn_image)}
)

custom_image.push()


{'stream': 'Step 1/4 : ARG SCIKIT_LEARN_IMAGE'}
{'stream': '\n'}
{'stream': 'Step 2/4 : FROM $SCIKIT_LEARN_IMAGE'}
{'stream': '\n'}
{'status': 'Pulling from sagemaker-scikit-learn', 'id': '0.20.0-cpu-py3'}
{'status': 'Pulling fs layer', 'progressDetail': {}, 'id': 'dadc679dc38f'}
{'status': 'Pulling fs layer', 'progressDetail': {}, 'id': '9e622765e19e'}
{'status': 'Pulling fs layer', 'progressDetail': {}, 'id': '3742832a87c6'}
{'status': 'Pulling fs layer', 'progressDetail': {}, 'id': '34c6cdf813d1'}
{'status': 'Pulling fs layer', 'progressDetail': {}, 'id': '7582aa0b1ed5'}
{'status': 'Pulling fs layer', 'progressDetail': {}, 'id': 'f42eda49fdd2'}
{'status': 'Pulling fs layer', 'progressDetail': {}, 'id': 'ac8c022aaad2'}
{'status': 'Pulling fs layer', 'progressDetail': {}, 'id': '91cf5329f2d2'}
{'status': 'Pulling fs layer', 'progressDetail': {}, 'id': 'a170f2e57d1e'}
{'status': 'Pulling fs layer', 'progressDetail': {}, 'id': 'e54f9d9f1590'}
{'status': 'Waiting', 'progressDetail': {}, 

{'status': 'Extracting', 'progressDetail': {'current': 3670016, 'total': 43250279}, 'progress': '[====>                                              ]   3.67MB/43.25MB', 'id': 'dadc679dc38f'}
{'status': 'Downloading', 'progressDetail': {'current': 10791423, 'total': 378859596}, 'progress': '[=>                                                 ]  10.79MB/378.9MB', 'id': 'ac8c022aaad2'}
{'status': 'Downloading', 'progressDetail': {'current': 14568959, 'total': 378859596}, 'progress': '[=>                                                 ]  14.57MB/378.9MB', 'id': 'ac8c022aaad2'}
{'status': 'Downloading', 'progressDetail': {'current': 18346495, 'total': 378859596}, 'progress': '[==>                                                ]  18.35MB/378.9MB', 'id': 'ac8c022aaad2'}
{'status': 'Downloading', 'progressDetail': {'current': 20505087, 'total': 378859596}, 'progress': '[==>                                                ]  20.51MB/378.9MB', 'id': 'ac8c022aaad2'}
{'status': 'Downloading', 'p

{'status': 'Verifying Checksum', 'progressDetail': {}, 'id': '7582aa0b1ed5'}
{'status': 'Download complete', 'progressDetail': {}, 'id': '7582aa0b1ed5'}
{'status': 'Downloading', 'progressDetail': {'current': 538113, 'total': 56934490}, 'progress': '[>                                                  ]  538.1kB/56.93MB', 'id': '91cf5329f2d2'}
{'status': 'Downloading', 'progressDetail': {'current': 2696705, 'total': 56934490}, 'progress': '[==>                                                ]  2.697MB/56.93MB', 'id': '91cf5329f2d2'}
{'status': 'Downloading', 'progressDetail': {'current': 5394945, 'total': 56934490}, 'progress': '[====>                                              ]  5.395MB/56.93MB', 'id': '91cf5329f2d2'}


{'status': 'Verifying Checksum', 'progressDetail': {}, 'id': 'f42eda49fdd2'}
{'status': 'Download complete', 'progressDetail': {}, 'id': 'f42eda49fdd2'}
{'status': 'Verifying Checksum', 'progressDetail': {}, 'id': 'a170f2e57d1e'}
{'status': 'Download complete', 'progressDetail': {}, 'id': 'a170f2e57d1e'}
{'status': 'Downloading', 'progressDetail': {'current': 172545, 'total': 15919073}, 'progress': '[>                                                  ]  172.5kB/15.92MB', 'id': 'e54f9d9f1590'}
{'status': 'Verifying Checksum', 'progressDetail': {}, 'id': 'e54f9d9f1590'}
{'status': 'Download complete', 'progressDetail': {}, 'id': 'e54f9d9f1590'}
{'status': 'Verifying Checksum', 'progressDetail': {}, 'id': '91cf5329f2d2'}
{'status': 'Download complete', 'progressDetail': {}, 'id': '91cf5329f2d2'}


{'status': 'Pull complete', 'progressDetail': {}, 'id': 'dadc679dc38f'}
{'status': 'Verifying Checksum', 'progressDetail': {}, 'id': 'ac8c022aaad2'}
{'status': 'Download complete', 'progressDetail': {}, 'id': 'ac8c022aaad2'}
{'status': 'Pull complete', 'progressDetail': {}, 'id': '9e622765e19e'}
{'status': 'Pull complete', 'progressDetail': {}, 'id': '3742832a87c6'}
{'status': 'Pull complete', 'progressDetail': {}, 'id': '34c6cdf813d1'}
{'status': 'Extracting', 'progressDetail': {'current': 557056, 'total': 143601687}, 'progress': '[>                                                  ]  557.1kB/143.6MB', 'id': '7582aa0b1ed5'}
{'status': 'Extracting', 'progressDetail': {'current': 4456448, 'total': 143601687}, 'progress': '[=>                                                 ]  4.456MB/143.6MB', 'id': '7582aa0b1ed5'}
{'status': 'Extracting', 'progressDetail': {'current': 9469952, 'total': 143601687}, 'progress': '[===>                                               ]   9.47MB/143.6MB', 'id

{'status': 'Pull complete', 'progressDetail': {}, 'id': '7582aa0b1ed5'}
{'status': 'Extracting', 'progressDetail': {'current': 557056, 'total': 143422943}, 'progress': '[>                                                  ]  557.1kB/143.4MB', 'id': 'f42eda49fdd2'}
{'status': 'Extracting', 'progressDetail': {'current': 5013504, 'total': 143422943}, 'progress': '[=>                                                 ]  5.014MB/143.4MB', 'id': 'f42eda49fdd2'}
{'status': 'Extracting', 'progressDetail': {'current': 7241728, 'total': 143422943}, 'progress': '[==>                                                ]  7.242MB/143.4MB', 'id': 'f42eda49fdd2'}
{'status': 'Extracting', 'progressDetail': {'current': 11698176, 'total': 143422943}, 'progress': '[====>                                              ]   11.7MB/143.4MB', 'id': 'f42eda49fdd2'}
{'status': 'Extracting', 'progressDetail': {'current': 16154624, 'total': 143422943}, 'progress': '[=====>                                             ]  16

{'status': 'Pull complete', 'progressDetail': {}, 'id': 'f42eda49fdd2'}
{'status': 'Extracting', 'progressDetail': {'current': 557056, 'total': 378859596}, 'progress': '[>                                                  ]  557.1kB/378.9MB', 'id': 'ac8c022aaad2'}
{'status': 'Extracting', 'progressDetail': {'current': 3342336, 'total': 378859596}, 'progress': '[>                                                  ]  3.342MB/378.9MB', 'id': 'ac8c022aaad2'}
{'status': 'Extracting', 'progressDetail': {'current': 5570560, 'total': 378859596}, 'progress': '[>                                                  ]  5.571MB/378.9MB', 'id': 'ac8c022aaad2'}
{'status': 'Extracting', 'progressDetail': {'current': 6684672, 'total': 378859596}, 'progress': '[>                                                  ]  6.685MB/378.9MB', 'id': 'ac8c022aaad2'}
{'status': 'Extracting', 'progressDetail': {'current': 7241728, 'total': 378859596}, 'progress': '[>                                                  ]  7.24



{'status': 'Pull complete', 'progressDetail': {}, 'id': 'ac8c022aaad2'}
{'status': 'Extracting', 'progressDetail': {'current': 557056, 'total': 56934490}, 'progress': '[>                                                  ]  557.1kB/56.93MB', 'id': '91cf5329f2d2'}
{'status': 'Extracting', 'progressDetail': {'current': 3899392, 'total': 56934490}, 'progress': '[===>                                               ]  3.899MB/56.93MB', 'id': '91cf5329f2d2'}


{'status': 'Pull complete', 'progressDetail': {}, 'id': '91cf5329f2d2'}
{'status': 'Pull complete', 'progressDetail': {}, 'id': 'a170f2e57d1e'}
{'status': 'Extracting', 'progressDetail': {'current': 163840, 'total': 15919073}, 'progress': '[>                                                  ]  163.8kB/15.92MB', 'id': 'e54f9d9f1590'}
{'status': 'Extracting', 'progressDetail': {'current': 327680, 'total': 15919073}, 'progress': '[=>                                                 ]  327.7kB/15.92MB', 'id': 'e54f9d9f1590'}
{'status': 'Pull complete', 'progressDetail': {}, 'id': 'e54f9d9f1590'}
{'status': 'Digest: sha256:1476311657fe444be8f95f472a8a91a3da577d558e1046678ee76c611f7132ca'}
{'status': 'Status: Downloaded newer image for 492215442770.dkr.ecr.eu-central-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3'}
{'stream': ' ---> 30adb1aa9af5\n'}
{'stream': 'Step 3/4 : COPY requirements.txt /requirements.txt'}
{'stream': '\n'}
{'stream': ' ---> 3ccc6e1b2a88\n'}
{'stream': 'Step 4/4 

{'stream': '  Found existing installation: numpy 1.17.3\n'}
{'stream': '    Uninstalling numpy-1.17.3:\n'}
{'stream': '      Successfully uninstalled numpy-1.17.3\n'}
{'stream': '  Found existing installation: six 1.12.0\n'}
{'stream': '    Uninstalling six-1.12.0:\n'}
{'stream': '      Successfully uninstalled six-1.12.0\n'}
{'stream': '  Found existing installation: python-dateutil 2.8.0\n'}
{'stream': '    Uninstalling python-dateutil-2.8.0:\n'}
{'stream': '      Successfully uninstalled python-dateutil-2.8.0\n'}
{'stream': '  Found existing installation: pytz 2019.3\n'}
{'stream': '    Uninstalling pytz-2019.3:\n'}
{'stream': '      Successfully uninstalled pytz-2019.3\n'}
{'stream': '  Found existing installation: pandas 0.25.2\n'}
{'stream': '    Uninstalling pandas-0.25.2:\n'}
{'stream': '      Successfully uninstalled pandas-0.25.2\n'}
{'stream': '  Found existing installation: scipy 1.3.1\n'}
{'stream': '    Uninstalling scipy-1.3.1:\n'}
{'stream': '      Successfully uninstal

{'status': 'Pushing', 'progressDetail': {'current': 20532736, 'total': 188045294}, 'progress': '[=====>                                             ]  20.53MB/188MB', 'id': '23b3530769e5'}
{'status': 'Pushing', 'progressDetail': {'current': 10807883, 'total': 222875314}, 'progress': '[==>                                                ]  10.81MB/222.9MB', 'id': '8813ed8e8df0'}
{'status': 'Pushing', 'progressDetail': {'current': 22203904, 'total': 188045294}, 'progress': '[=====>                                             ]   22.2MB/188MB', 'id': '23b3530769e5'}
{'status': 'Pushing', 'progressDetail': {'current': 11858216, 'total': 222875314}, 'progress': '[==>                                                ]  11.86MB/222.9MB', 'id': '8813ed8e8df0'}
{'status': 'Pushed', 'progressDetail': {}, 'id': 'cfe0cbc5dd2c'}
{'status': 'Pushing', 'progressDetail': {'current': 13455308, 'total': 222875314}, 'progress': '[===>                                               ]  13.46MB/222.9MB', 'id': 

{'status': 'Pushing', 'progressDetail': {'current': 1095680, 'total': 278852740}, 'progress': '[>                                                  ]  1.096MB/278.9MB', 'id': 'a5a688002069'}
{'status': 'Pushing', 'progressDetail': {'current': 24778240, 'total': 222875314}, 'progress': '[=====>                                             ]  24.78MB/222.9MB', 'id': '8813ed8e8df0'}
{'status': 'Pushing', 'progressDetail': {'current': 4302848, 'total': 1076667337}, 'progress': '[>                                                  ]  4.303MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 2209792, 'total': 278852740}, 'progress': '[>                                                  ]   2.21MB/278.9MB', 'id': 'a5a688002069'}
{'status': 'Pushing', 'progressDetail': {'current': 25304013, 'total': 222875314}, 'progress': '[=====>                                             ]   25.3MB/222.9MB', 'id': '8813ed8e8df0'}
{'status': 'Pushing', 'progressDetail': {'curre

{'status': 'Pushing', 'progressDetail': {'current': 17032704, 'total': 1076667337}, 'progress': '[>                                                  ]  17.03MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 13880320, 'total': 278852740}, 'progress': '[==>                                                ]  13.88MB/278.9MB', 'id': 'a5a688002069'}
{'status': 'Pushing', 'progressDetail': {'current': 18638336, 'total': 1076667337}, 'progress': '[>                                                  ]  18.64MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 21421568, 'total': 1076667337}, 'progress': '[>                                                  ]  21.42MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 14994432, 'total': 278852740}, 'progress': '[==>                                                ]  14.99MB/278.9MB', 'id': 'a5a688002069'}
{'status': 'Pushing', 'progressDetail': {'

{'status': 'Pushing', 'progressDetail': {'current': 27822174, 'total': 1076667337}, 'progress': '[=>                                                 ]  27.82MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 20419072, 'total': 278852740}, 'progress': '[===>                                               ]  20.42MB/278.9MB', 'id': 'a5a688002069'}
{'status': 'Pushing', 'progressDetail': {'current': 20945282, 'total': 278852740}, 'progress': '[===>                                               ]  20.95MB/278.9MB', 'id': 'a5a688002069'}
{'status': 'Pushing', 'progressDetail': {'current': 28348417, 'total': 1076667337}, 'progress': '[=>                                                 ]  28.35MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 21481984, 'total': 278852740}, 'progress': '[===>                                               ]  21.48MB/278.9MB', 'id': 'a5a688002069'}
{'status': 'Pushing', 'progressDetail': {'c

{'status': 'Pushing', 'progressDetail': {'current': 30268928, 'total': 278852740}, 'progress': '[=====>                                             ]  30.27MB/278.9MB', 'id': 'a5a688002069'}
{'status': 'Pushing', 'progressDetail': {'current': 31383040, 'total': 278852740}, 'progress': '[=====>                                             ]  31.38MB/278.9MB', 'id': 'a5a688002069'}
{'status': 'Pushing', 'progressDetail': {'current': 32497152, 'total': 278852740}, 'progress': '[=====>                                             ]   32.5MB/278.9MB', 'id': 'a5a688002069'}
{'status': 'Pushing', 'progressDetail': {'current': 30476501, 'total': 1076667337}, 'progress': '[=>                                                 ]  30.48MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 31002877, 'total': 1076667337}, 'progress': '[=>                                                 ]     31MB/1.077GB', 'id': '8be6c85c96e4'}


{'status': 'Pushing', 'progressDetail': {'current': 31529112, 'total': 1076667337}, 'progress': '[=>                                                 ]  31.53MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 32066021, 'total': 1076667337}, 'progress': '[=>                                                 ]  32.07MB/1.077GB', 'id': '8be6c85c96e4'}


{'status': 'Pushing', 'progressDetail': {'current': 32597598, 'total': 1076667337}, 'progress': '[=>                                                 ]   32.6MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 33128042, 'total': 1076667337}, 'progress': '[=>                                                 ]  33.13MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 33674752, 'total': 1076667337}, 'progress': '[=>                                                 ]  33.67MB/1.077GB', 'id': '8be6c85c96e4'}


{'status': 'Pushing', 'progressDetail': {'current': 34200428, 'total': 1076667337}, 'progress': '[=>                                                 ]   34.2MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 34747904, 'total': 1076667337}, 'progress': '[=>                                                 ]  34.75MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 35276630, 'total': 1076667337}, 'progress': '[=>                                                 ]  35.28MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 35801600, 'total': 1076667337}, 'progress': '[=>                                                 ]   35.8MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 36326400, 'total': 1076667337}, 'progress': '[=>                                                 ]  36.33MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': 

{'status': 'Pushing', 'progressDetail': {'current': 37401239, 'total': 1076667337}, 'progress': '[=>                                                 ]   37.4MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 37928960, 'total': 1076667337}, 'progress': '[=>                                                 ]  37.93MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 38480896, 'total': 1076667337}, 'progress': '[=>                                                 ]  38.48MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 39005591, 'total': 1076667337}, 'progress': '[=>                                                 ]  39.01MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 39539712, 'total': 1076667337}, 'progress': '[=>                                                 ]  39.54MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': 

{'status': 'Pushing', 'progressDetail': {'current': 41656494, 'total': 1076667337}, 'progress': '[=>                                                 ]  41.66MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 42718957, 'total': 1076667337}, 'progress': '[=>                                                 ]  42.72MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 43806874, 'total': 1076667337}, 'progress': '[==>                                                ]  43.81MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 44342272, 'total': 1076667337}, 'progress': '[==>                                                ]  44.34MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 46533168, 'total': 1076667337}, 'progress': '[==>                                                ]  46.53MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': 

{'status': 'Pushing', 'progressDetail': {'current': 57297690, 'total': 1076667337}, 'progress': '[==>                                                ]   57.3MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 57838080, 'total': 1076667337}, 'progress': '[==>                                                ]  57.84MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 58918628, 'total': 1076667337}, 'progress': '[==>                                                ]  58.92MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 60506868, 'total': 1076667337}, 'progress': '[==>                                                ]  60.51MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 63194624, 'total': 1076667337}, 'progress': '[==>                                                ]  63.19MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': 

{'status': 'Pushing', 'progressDetail': {'current': 83514880, 'total': 1076667337}, 'progress': '[===>                                               ]  83.51MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushed', 'progressDetail': {}, 'id': '8813ed8e8df0'}
{'status': 'Pushing', 'progressDetail': {'current': 86152055, 'total': 1076667337}, 'progress': '[====>                                              ]  86.15MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 88297472, 'total': 1076667337}, 'progress': '[====>                                              ]   88.3MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 89367552, 'total': 1076667337}, 'progress': '[====>                                              ]  89.37MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 90440009, 'total': 1076667337}, 'progress': '[====>                                              ]  90.44MB/1.077GB

{'status': 'Pushing', 'progressDetail': {'current': 7100416, 'total': 344538117}, 'progress': '[=>                                                 ]    7.1MB/344.5MB', 'id': '4b7dff825027'}
{'status': 'Pushing', 'progressDetail': {'current': 8176128, 'total': 344538117}, 'progress': '[=>                                                 ]  8.176MB/344.5MB', 'id': '4b7dff825027'}
{'status': 'Pushing', 'progressDetail': {'current': 9227776, 'total': 344538117}, 'progress': '[=>                                                 ]  9.228MB/344.5MB', 'id': '4b7dff825027'}
{'status': 'Pushing', 'progressDetail': {'current': 94708047, 'total': 1076667337}, 'progress': '[====>                                              ]  94.71MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 10320464, 'total': 344538117}, 'progress': '[=>                                                 ]  10.32MB/344.5MB', 'id': '4b7dff825027'}
{'status': 'Pushing', 'progressDetail': {'curre

{'status': 'Pushing', 'progressDetail': {'current': 31094784, 'total': 344538117}, 'progress': '[====>                                              ]  31.09MB/344.5MB', 'id': '4b7dff825027'}
{'status': 'Pushing', 'progressDetail': {'current': 97346877, 'total': 1076667337}, 'progress': '[====>                                              ]  97.35MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 97876992, 'total': 1076667337}, 'progress': '[====>                                              ]  97.88MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 32726528, 'total': 344538117}, 'progress': '[====>                                              ]  32.73MB/344.5MB', 'id': '4b7dff825027'}
{'status': 'Pushing', 'progressDetail': {'current': 34340864, 'total': 344538117}, 'progress': '[====>                                              ]  34.34MB/344.5MB', 'id': '4b7dff825027'}
{'status': 'Pushing', 'progressDetail': {'c

{'status': 'Pushing', 'progressDetail': {'current': 122142716, 'total': 1076667337}, 'progress': '[=====>                                             ]  122.1MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 7013888, 'total': 122214933}, 'progress': '[==>                                                ]  7.014MB/122.2MB', 'id': '29f36b5893dc'}
{'status': 'Pushed', 'progressDetail': {}, 'id': '7f083f9454c0'}
{'status': 'Pushing', 'progressDetail': {'current': 124809614, 'total': 1076667337}, 'progress': '[=====>                                             ]  124.8MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 126393344, 'total': 1076667337}, 'progress': '[=====>                                             ]  126.4MB/1.077GB', 'id': '8be6c85c96e4'}
{'status': 'Pushing', 'progressDetail': {'current': 8063444, 'total': 122214933}, 'progress': '[===>                                               ]  8.063MB/122.2MB'















{'status': 'Pushed', 'progressDetail': {}, 'id': '29f36b5893dc'}






{'status': 'Pushed', 'progressDetail': {}, 'id': '4b7dff825027'}


{'status': 'Pushed', 'progressDetail': {}, 'id': 'a5a688002069'}














{'status': 'Pushed', 'progressDetail': {}, 'id': '8be6c85c96e4'}
{'status': 'latest: digest: sha256:d76564711561faf35c653075fcb5867d7fa8a769bbde3d729b4d672214191e33 size: 2841'}
{'progressDetail': {}, 'aux': {'Tag': 'latest', 'Digest': 'sha256:d76564711561faf35c653075fcb5867d7fa8a769bbde3d729b4d672214191e33', 'Size': 2841}}


# Train model

In [21]:
source_dir = Path.cwd().parent / "src"
source_dir

PosixPath('/home/ec2-user/SageMaker/sagemaker-creditscore-explainer/sagemaker/src')

In [22]:
output_path = 's3://' + str(Path(bucket, bucket_prefix, model_output_prefix))
output_path

's3://sagemaker-eu-central-1-360983820924/sagemaker-explainer/model_output'

In [25]:
hyperparameters = {
    # not optimizing hyperparameters for now, not the focus of this project
}

estimator = SKLearn(
    image_name=str(custom_image),
    entry_point='entry_point.py',
    source_dir=str(source_dir),
    hyperparameters=hyperparameters,
    role=role,
    train_instance_count=1,
    train_instance_type='ml.m5.2xlarge', 
    output_path=output_path,
    code_location=output_path,
)

Parameter image_name will be renamed to image_uri in SageMaker Python SDK v2.
This is not the latest supported version. If you would like to use version 0.23-1, please add framework_version=0.23-1 to your constructor.


In [26]:
estimator.fit({'train_data': train_data, 'test_data': test_data})

's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.
's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


2020-08-17 18:14:57 Starting - Starting the training job...
2020-08-17 18:14:59 Starting - Launching requested ML instances......
2020-08-17 18:16:01 Starting - Preparing the instances for training...
2020-08-17 18:16:36 Downloading - Downloading input data
2020-08-17 18:16:36 Training - Downloading the training image.....[34m2020-08-17 18:17:32,824 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2020-08-17 18:17:32,826 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-08-17 18:17:32,835 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2020-08-17 18:17:33,056 sagemaker-containers INFO     Module entry_point does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34m2020-08-17 18:17:33,056 sagemaker-containers INFO     Generating setup.cfg[0m
[34m2020-08-17 18:17:33,056 sagemaker-containers INFO     Generating MANIFEST.in[0m
[34m2020-08-17 18:17:33,0

# Deploy estimator

In [29]:
ENDPOINT_NAME

'credit-explainer'

In [30]:
estimator.deploy(
    endpoint_name=ENDPOINT_NAME,
    initial_instance_count=1, 
    instance_type="ml.t2.medium")

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


-----------------!

<sagemaker.sklearn.model.SKLearnPredictor at 0x7f3bee718cc0>

# Create predictor

In [31]:
from sagemaker.predictor import RealTimePredictor
from sagemaker.predictor import json_serializer, json_deserializer, CONTENT_TYPE_JSON

predictor = RealTimePredictor(
    endpoint=endpoint_name,
    sagemaker_session=sagemaker_session,
    serializer=json_serializer,
    deserializer=json_deserializer,
    content_type="application/json",
)

# Test predictor

In [32]:
predictor.predict(test_df.sample(1).to_json(orient='records'))

{'prediction': [0.05187674979012408],
 'shap_base': 0.0606733333333333,
 'shap_values': {'MonthlyIncome': [-0.001911406670069957],
  'age': [0.007089596509830959],
  'NumberOfDependents': [-0.0010889384741045176],
  'DebtRatio': [-0.0007444030564928647],
  'RevolvingUtilizationOfUnsecuredLines': [-0.01696979592690931],
  'NumberRealEstateLoansOrLines': [-0.0006599644194419679],
  'NumberOfOpenCreditLinesAndLoans': [-0.0010915706111867057],
  'NumberOfTime30-59DaysPastDueNotWorse': [0.022754292981416072],
  'NumberOfTime60-89DaysPastDueNotWorse': [-0.004928530423758262],
  'NumberOfTimes90DaysLate': [-0.011245863452492741]}}

# Now build a lambda frontend and GatewayAPI 

- this is a good tutorial: https://medium.com/analytics-vidhya/invoke-an-amazon-sagemaker-endpoint-using-aws-lambda-83ff1a9f5443




## `lambda_handler` code

I used the following code for the lambda handler. I basically just forwards the event and sets ContentTyoe to 'application/json', and decodes the response.

Make sure to define ENDPOINT_NAME in the environmental variables of your lambda function

```python
import os
import io
import boto3
import json
import csv

# grab environment variables
ENDPOINT_NAME = os.environ['ENDPOINT_NAME']
runtime= boto3.client('runtime.sagemaker')

def lambda_handler(event, context):
    print("Received event: " + json.dumps(event, indent=2))
    response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                       ContentType='application/json',
                                       Body=event)
    print("raw response: ", response)
    result = json.loads(response['Body'].read().decode())
    print("decoded result: ", result)
    return result
```