# Build the container

In [None]:
%%sh 
#docker rmi -f $(docker images -q) || true
#docker ps -q -a -f status=exited | xargs -n 100 docker rm -v

#run this to clear out stale docker images and containers. sometimes they bake bad things into thier layers.
#ignore errors

In [None]:
%%bash 
cd container
chmod 755 build_push.sh
./build_push.sh breast-cancer-nb-bh3


# you can, it appears you just needed to chmod the build_push.sh
# also, %%sh might be a little more standard. although, totally not sure and the script headlines: #!/usr/bin/env bash. 
# . === source, so instead of source <<script>> you can just ./<<script>> for convenience
# -bh


In [None]:
# S3 prefix
prefix = 'breastCancerNB'

# Define IAM role
import boto3
import re
import os
import numpy as np
import pandas as pd
import sagemaker as sage
from time import gmtime, strftime

sess = sage.Session()
role = sage.get_execution_role()

# Build training data

In [None]:
if not os.path.exists('training_data/breast_cancer.csv'):
    from sklearn.datasets import load_breast_cancer
    import pandas as pd

    breast_cancer = load_breast_cancer()
    data = pd.concat([pd.Series(breast_cancer.target),
                      pd.DataFrame(breast_cancer.data)],axis=1)
    data.to_csv('training_data/breast_cancer.csv',header=False,index=False)

# Upload training data into S3

In [None]:
WORK_DIRECTORY = 'training_data'
data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=prefix)

print('Contents of directory {} now in S3 at {}'.format(WORK_DIRECTORY,data_location))

In [None]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = '{}.dkr.ecr.{}.amazonaws.com/breast-cancer-nb'.format(account, region)

print('Location of Docker image is {}'.format(image))

In [None]:
%%time
from sagemaker.estimator import Estimator

output_path = "s3://{}/output".format(sess.default_bucket())
clf = Estimator(image_name = image,
                role = role, 
                train_instance_count = 1, 
                train_instance_type = 'ml.c4.2xlarge',
                output_path = output_path,
                sagemaker_session = sess)

clf.fit(data_location)

In [None]:
%%time
from sagemaker.predictor import csv_serializer
predictor = clf.deploy(initial_instance_count = 1,
                       instance_type = 'ml.m4.xlarge', 
                       serializer=csv_serializer)