# Build the container

In [1]:
%%bash
cd container
source build_push.sh breast-cancer-nb

Login Succeeded
Sending build context to Docker daemon   51.2kB
Step 1/9 : FROM ubuntu:16.04
 ---> 00fd29ccc6f1
Step 2/9 : LABEL maintainer "v-peter.wills@thetradedesk.com"
 ---> Using cache
 ---> 5a7c2036de47
Step 3/9 : RUN apt-get -y update && apt-get install -y --no-install-recommends          wget          python          nginx          ca-certificates     && rm -rf /var/lib/apt/lists/*
 ---> Using cache
 ---> fe42fe556e46
Step 4/9 : RUN wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py &&     pip install numpy scipy scikit-learn pandas flask gevent gunicorn &&         (cd /usr/local/lib/python2.7/dist-packages/scipy/.libs; rm *; ln ../../numpy/.libs/* .) &&         rm -rf /root/.cache
 ---> Using cache
 ---> 7e3e0e4f22a8
Step 5/9 : ENV PYTHONUNBUFFERED TRUE
 ---> Using cache
 ---> 9aaf9596b240
Step 6/9 : ENV PYTHONDONTWRITEBYTECODE TRUE
 ---> Using cache
 ---> b2f024e9feeb
Step 7/9 : ENV PATH "/opt/program:${PATH}"
 ---> Using cache
 ---> 261540cd0ed9
Step 8/9 : COP

In [2]:
# S3 prefix
prefix = 'breastCancerNB'

# Define IAM role
import boto3
import re
import os
import numpy as np
import pandas as pd
import sagemaker as sage
from time import gmtime, strftime

sess = sage.Session()
role = sage.get_execution_role()

# Build training data

In [3]:
if not os.path.exists('training_data/breast_cancer.csv'):
    from sklearn.datasets import load_breast_cancer
    import pandas as pd

    breast_cancer = load_breast_cancer()
    data = pd.concat([pd.Series(breast_cancer.target),
                      pd.DataFrame(breast_cancer.data)],axis=1)
    data.to_csv('training_data/breast_cancer.csv',header=False,index=False)

# Upload training data into S3

In [4]:
WORK_DIRECTORY = 'training_data'
data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=prefix)

print('Contents of directory {} now in S3 at {}'.format(WORK_DIRECTORY,data_location))

INFO:sagemaker:Created S3 bucket: sagemaker-us-east-1-470403475418


Contents of directory training_data now in S3 at s3://sagemaker-us-east-1-470403475418/breastCancerNB


In [5]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = '{}.dkr.ecr.{}.amazonaws.com/breast-cancer-nb'.format(account, region)

print('Location of Docker image is {}'.format(image))

Location of Docker image is 470403475418.dkr.ecr.us-east-1.amazonaws.com/breast-cancer-nb


In [6]:
from sagemaker.estimator import Estimator

In [7]:
output_path = "s3://{}/output".format(sess.default_bucket())
clf = Estimator(image_name = image,
                role = role, 
                train_instance_count = 1, 
                train_instance_type = 'ml.c4.2xlarge',
                output_path=output_path,
                sagemaker_session=sess)

clf.fit(data_location)

INFO:sagemaker:Creating training-job with name: breast-cancer-nb-2017-12-18-22-58-15-403


.....................................................

KeyboardInterrupt: 