## Review Docker File

In [1]:
!cat ../container/Dockerfile

FROM python:3.6

RUN apt-get -y update && apt-get install -y --no-install-recommends \
         wget \
         python \
         nginx \
         ca-certificates \
    && rm -rf /var/lib/apt/lists/*

# Install all of the packages
RUN wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py

# install code dependencies
COPY "requirements.txt" .
RUN ["pip", "install", "-r", "requirements.txt"]

RUN pip list
# Env Variables
ENV PYTHONUNBUFFERED=TRUE
ENV PYTHONDONTWRITEBYTECODE=TRUE
ENV PATH="/opt/ml:${PATH}"

# Set up the program in the image
COPY scripts /opt/ml
WORKDIR /opt/ml



# Main Libraries used:

# RUN pip install numpy
# RUN pip install scipy
# RUN pip install scikit-learn
# RUN pip install pandas
# RUN pip install flask
# RUN pip install gevent
# RUN pip install gunicorn
# RUN pip install tensorflow==1.15.2
# RUN pip install keras==2.2.4
# RUN pip install h5py
# RUN pip install hyperopt


## Building and registering the container

In [2]:
%%sh

# The name of our algorithm
algorithm_name=dnn

cd ../container

chmod +x scripts/train
chmod +x scripts/serve

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.

aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
$(aws ecr get-login --region ${region} --no-include-email)

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build  -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

Login Succeeded
Sending build context to Docker daemon  56.83kB
Step 1/11 : FROM python:3.6
 ---> 3cfab35f43d8
Step 2/11 : RUN apt-get -y update && apt-get install -y --no-install-recommends          wget          python          nginx          ca-certificates     && rm -rf /var/lib/apt/lists/*
 ---> Using cache
 ---> b75d4cd4a101
Step 3/11 : RUN wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py
 ---> Using cache
 ---> b4aaf23f847d
Step 4/11 : COPY "requirements.txt" .
 ---> Using cache
 ---> 05ac400095c3
Step 5/11 : RUN ["pip", "install", "-r", "requirements.txt"]
 ---> Using cache
 ---> b47f4eb1a88a
Step 6/11 : RUN pip list
 ---> Using cache
 ---> d062397de672
Step 7/11 : ENV PYTHONUNBUFFERED=TRUE
 ---> Using cache
 ---> b0d23f10a547
Step 8/11 : ENV PYTHONDONTWRITEBYTECODE=TRUE
 ---> Using cache
 ---> f2a7938ad223
Step 9/11 : ENV PATH="/opt/ml:${PATH}"
 ---> Using cache
 ---> 75555c546dca
Step 10/11 : COPY scripts /opt/ml
 ---> Using cache
 ---> 6af50d7af3ee
Step 11/11

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



# Testing the container from this notebook

In [3]:
# S3 prefix
prefix = 'dnn'

# Define IAM role
import boto3
import re
import io
import os
import numpy as np
import pandas as pd
from sagemaker import get_execution_role
import json
from sklearn.preprocessing import StandardScaler

role = get_execution_role()

## Create the session

In [4]:
import sagemaker as sage
from time import gmtime, strftime

sess = sage.Session()

## Upload the data for training

In [12]:
sess

<sagemaker.session.Session at 0x7f20bcdc1780>

In [5]:
WORK_DIRECTORY = 'data'

data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=prefix)

In [6]:
data_location

's3://sagemaker-us-east-1-597844091762/dnn'

In [19]:
!aws s3 ls s3://sagemaker-us-east-1-597844091762/dnn/output/

                           PRE dnn-2020-07-31-23-40-40-767/
                           PRE dnn-2020-08-01-16-49-56-395/


## Create an estimator and fit the model

In [8]:
# !container/local_test/train_local.sh ann-churn

In [34]:
sage.estimator.Estimator?

In [29]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = f'{account}.dkr.ecr.{region}.amazonaws.com/{prefix}:latest'

clf = sage.estimator.Estimator(
    image, 
    role, 
    2, 
    'ml.m4.2xlarge',
    output_path="s3://{}/{}/output".format(sess.default_bucket(), prefix),
    sagemaker_session=sess)

Parameter image_name will be renamed to image_uri in SageMaker Python SDK v2.


In [30]:
final_training = True

if final_training: # Final Training
    clf.set_hyperparameters(final_training = True,
                            target = 'Target',# arbitrary string
                            batch_normalization = True,
                            include_dropout = True,
                            dropout_f = .2,
                            early_stopping_patience = 15,# Number of epochs with no significant change in metric before early stopping happens 
                            lr_update_patience = 7,
                            loss_metric = 'mae',
                            monitor_metric = 'val_mean_absolute_error',
                            num_layers_f = 8,
                            nodes = [1024,64,1024,32,32,64,512], # The number of nodes (length of "nodes" list) should be num_layers_f-1 because the last layer has 1 node and is automatically added
                            nb_epochs_f = 300,
                            batch_size_f = 32,
                            optimizer_f = 'adam',
                            last_activation_f = 'tanh'
                           )    
else:  # HPO
    clf.set_hyperparameters(final_training = False,
                            target = 'Target',# arbitrary string
                            batch_normalization = True,
                            include_dropout = False,
                            dropout = [.2,.3,.5],
                            early_stopping_patience = 15,# Number of epochs with no significant change in metric before early stopping happens 
                            lr_update_patience = 7,# Number of epochs with no significant change in metric before learning rate decrease
                            loss_metric = 'mae',
                            monitor_metric = 'val_mean_absolute_error',
                            used_data_percentage = 10,
                            train_validation_split = .15,
                            MAX_EVALS = 3,
                            randstate = 50,
                            num_layers_low = 1,
                            num_layers_high = 9,
                            choice_of_node_numbers = [16,32,64,128,256,512,1024,2048], # Here you can give the possible node size for layers. If you want to only have small number of nodes, remove the high values from this list. 
                            nb_epochs = 3,
                            batch_size = [32,64,128],
                            optimizer = ['adam'],
                            last_activation = ['tanh']  # Activation for the layer with one node. Options for this are 'linear' and 'tanh'
                            )


In [31]:
clf.fit(data_location)

's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


2020-08-28 19:56:13 Starting - Starting the training job...
2020-08-28 19:56:16 Starting - Launching requested ML instances...............
2020-08-28 19:59:08 Starting - Preparing the instances for training......
2020-08-28 20:00:12 Downloading - Downloading input data...
2020-08-28 20:00:40 Training - Downloading the training image......
2020-08-28 20:01:38 Training - Training image download completed. Training in progress..[34mUsing TensorFlow backend.[0m
[34mStarting the training.[0m
[34mStarting the final training...[0m
[34mIndex(['F0', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10',
       'F11', 'F12', 'F13', 'F14', 'F15', 'F16', 'F17', 'F18', 'F19', 'F20',
       'F21', 'F22', 'F23', 'F24', 'F25', 'F26', 'F27', 'Target'],
      dtype='object')[0m
[34mdata loaded[0m
[0m
[0m
[0m
[0m
[34mInstructions for updating:[0m
[34mPlease use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.[0m
[0m
[34m_______________________________

[35mUsing TensorFlow backend.[0m
[35mStarting the training.[0m
[35mStarting the final training...[0m
[35mIndex(['F0', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10',
       'F11', 'F12', 'F13', 'F14', 'F15', 'F16', 'F17', 'F18', 'F19', 'F20',
       'F21', 'F22', 'F23', 'F24', 'F25', 'F26', 'F27', 'Target'],
      dtype='object')[0m
[35mdata loaded[0m
[0m
[0m
[0m
[0m
[35mInstructions for updating:[0m
[35mPlease use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.[0m
[0m
[35m_________________________________________________________________[0m
[35mLayer (type)                 Output Shape              Param #   [0m
[35mdense_1 (Dense)              (None, 1024)              29696     [0m
[35m_________________________________________________________________[0m
[35mbatch_normalization_1 (Batch (None, 1024)              4096      [0m
[35m_________________________________________________________________[0m
[35mdropout_1 

[35m - 2s - loss: 0.2483 - mean_absolute_error: 0.2483 - val_loss: 0.2435 - val_mean_absolute_error: 0.2435
[0m
[35mEpoch 00011: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.[0m
[35mEpoch 12/300[0m
[34m - 2s - loss: 0.2467 - mean_absolute_error: 0.2467 - val_loss: 0.2525 - val_mean_absolute_error: 0.2525[0m
[34mEpoch 12/300[0m
[35m - 2s - loss: 0.2482 - mean_absolute_error: 0.2482 - val_loss: 0.2434 - val_mean_absolute_error: 0.2434[0m
[35mEpoch 13/300[0m
[34m - 2s - loss: 0.2466 - mean_absolute_error: 0.2466 - val_loss: 0.2525 - val_mean_absolute_error: 0.2525[0m
[34mEpoch 13/300[0m
[35m - 2s - loss: 0.2482 - mean_absolute_error: 0.2482 - val_loss: 0.2434 - val_mean_absolute_error: 0.2434[0m
[35mEpoch 14/300[0m
[34m - 2s - loss: 0.2467 - mean_absolute_error: 0.2467 - val_loss: 0.2525 - val_mean_absolute_error: 0.2525
[0m
[34mEpoch 00013: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.[0m
[34mEpoch 14/300[0m
[35m - 

## Deploy the model

In [33]:
from sagemaker.predictor import csv_serializer

# 'ml.m4.2xlarge
predictor = clf.deploy(1, 'ml.t2.medium', serializer=csv_serializer)

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


-----------------!

## Do Inference using your endpoint

In [48]:
endpointName = 'dnn-2020-07-31-23-40-40-767' # Your endpoint name that was created in "Deploy the model" section

from pickle import load

# Process and prepare the data
def transform_data(test_x):
    test_x = test_x.dropna()
    test_x = test_x.astype('float32')
        
    # Feature Scaling
    scaler = load(open('opt/ml/model/scaler.pkl', 'rb'))
    test_x = scaler.fit_transform(test_x)
    return pd.DataFrame(test_x)

test_X = pd.read_csv('df_test.csv')

test_X = transform_data(test_X)


test_file = io.StringIO()
test_X.to_csv(test_file, header=None, index=None)

# Talk to SageMaker
client = boto3.client('sagemaker-runtime')
response = client.invoke_endpoint(
    EndpointName=endpointName,
    Body=test_file.getvalue(),
    ContentType='text/csv',
    Accept='Accept'
)

print(response['Body'].read().decode('ascii'))

0.4543851
0.4524969
-2.281816
-2.3425076
-1.1159251
0.7206936
0.44174922
-2.0166993
0.3614421
0.40492582
0.86388576
0.45966506
0.88729477
-1.9939917
-2.505851
-1.0132473
-1.5082275
0.5550299
0.8951546
0.48231643
-0.1171655
0.8137144
0.8104975
0.4096722
-0.38626385
0.56806266
0.49516064
0.79760003
-2.5226
-2.169464
0.47764283
1.0997182
0.62332654
0.47557318
0.31022048
1.1103781
-0.29530275
-0.3499143
0.80459535
0.6170493
0.5890228
0.9398215
0.44568592
-0.33819556
0.7365062
0.37853813
0.5786757
1.1121311
0.44373238
0.4494444
0.7082635
-2.5089495
0.9290234
0.6302534
0.58756155
0.87174547
1.0767252
-0.62279314
0.6430147
0.72513914
0.81923306
0.44854695
0.92391276
-0.40077397
0.7643955
-0.50616145
0.29813856
0.48851758
0.53562343
0.41239822
0.9418725
0.6214094
0.46062738
0.7342822
0.9403086
0.5665387
1.1065239
0.69793737
0.57264584
0.40309596
0.91506934
0.4914052
0.658716
0.5392608
0.39463478
0.5623907
0.7149785
0.38935357
0.8153
0.659692
0.9099002
0.7353129
0.7275145
0.48609346
0.45560187


In [None]:
# !tar cvfz container_hpo.tar.gz *

## Optional cleanup

In [17]:
# sess.delete_endpoint(predictor.endpoint)

INFO:sagemaker:Deleting endpoint with name: ann-churn-2018-05-27-18-29-21-010
