Build our Docker image "my-custom-sagemaker-image"

In [None]:
%%sh

# See README.md for explanation
# Hint: the ECR image we'll login for is the same we use as base image in the Dockerfile
aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3

docker-compose build

echo "DOCKER BUILD TERMINATED AT $(date)"

Using SageMaker Python SDK we can test our Docker image

In [None]:
from sagemaker import get_execution_role
from sagemaker.estimator import Estimator
import os

role=get_execution_role()

hyperparameters={'epochs': 1}

estimator=Estimator(
    image_uri='my-custom-sagemaker-image',
    role=role,
    instance_count=1,
    instance_type='local',
    hyperparameters=hyperparameters,
    output_path='file://{}/data/output'.format(os.getcwd())
)

print('##### ESTIMATOR FIT STARTED')
estimator.fit('file://{}/data/input/my-input-csv-file.csv'.format(os.getcwd()))
print('##### ESTIMATOR FIT COMPLETED')

NB: if you encountered an error related to `network sagemaker-local was found but has incorrect label com.docker.compose.network set to ""` run the following command in the terminal and retry the above cell
`docker network prune --force`

In [None]:
%%sh

# Extracting local training archives to see the results

tar -xvf $PWD/data/output/model.tar.gz -C $PWD/data/output
tar -xvf $PWD/data/output/output.tar.gz -C $PWD/data/output

echo "Check the above files in the $PWD/data/output directory!!!!"

As our image works as expected we can build it again with the right ECR image URI and push it to ECR

In [None]:
%%sh

# Specify an image name
image_name=my-custom-sagemaker-image
echo "image_name: ${image_name} ######################"

account=$(aws sts get-caller-identity --query Account --output text)
echo "account: ${account} ######################"

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
echo "region: ${region} ######################"

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${image_name}:latest"
echo "fullname: ${fullname} ######################"

# If the repository doesn't exist in ECR, create it.

aws ecr describe-repositories --repository-names "${image_name}" > /dev/null 2>&1
if [ $? -ne 0 ]
then
aws ecr create-repository --repository-name "${image_name}" > /dev/null
fi

# Log into Docker
aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build -t ${image_name} .
docker tag ${image_name} ${fullname}

docker push ${fullname}

echo "Docker push ended at $(date)"

NB: if the last command "docker push" remain pending check README.md "AWS ECR IAM policies"

Before executing a training job on SageMaker we need to move our input data to AWS S3.
Obv. we also need an S3 bucket first.

Create an S3 bucket using AWS CLI

In [None]:
# Generate a random AWS S3 bucket name sharing the name between sh/bash and other Python cells.
# NB: need to be executed only the first time you want to create the AWS S3 bucket
import random

bucket_name='a-random-bucket-name-{}'.format(random.randint(0, 1000000))

%set_env AWS_S3_BUCKET_NAME=$bucket_name

In [None]:
%%sh

# NB: need to be executed only the first time you want to create the AWS S3 bucket
aws s3api create-bucket --bucket $AWS_S3_BUCKET_NAME --region $(aws configure get region)

In [None]:
import pandas as pd
import boto3
import io
import os
from sklearn.model_selection import train_test_split
import sagemaker

url = 'file://{}/data/input/my-input-csv-file.csv'.format(os.getcwd())
df_demo = pd.read_csv(url,',')

prefix='demo'
train_file='demo_train.csv'
test_file='demo_test.csv'
validate_file='demo_validate.csv'
whole_file='demo.csv'
s3_resource = boto3.Session().resource('s3')

train, test_and_validate = train_test_split(df_demo, 
                                            test_size=0.2, 
                                            random_state=42, 
                                            stratify=df_demo['quality'])

test, validate = train_test_split(test_and_validate, 
                                  test_size=0.5, 
                                  random_state=42, 
                                  stratify=test_and_validate['quality'])

print(train.shape)
print(test.shape)
print(validate.shape)

def upload_s3_csv(filename, folder, dataframe):
    csv_buffer = io.StringIO()
    dataframe.to_csv(csv_buffer, header=False, index=False )
    s3_resource.Bucket(bucket_name).Object(os.path.join(prefix, folder, filename)).put(Body=csv_buffer.getvalue())

upload_s3_csv(train_file, 'train', train)
upload_s3_csv(test_file, 'test', test)
upload_s3_csv(validate_file, 'validate', validate)

train_channel = sagemaker.inputs.TrainingInput(
    "s3://{}/{}/train/".format(bucket_name, prefix, train_file),
    content_type='text/csv')

validate_channel = sagemaker.inputs.TrainingInput(
    "s3://{}/{}/validate/".format(bucket_name, prefix, validate_file),
    content_type='text/csv')

data_channels = {'train': train_channel, 'validation': validate_channel}

As we have pushed our Docker image to ECR and uploaded our input data to AWS S3 we can use it with a training job on SageMaker

In [None]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.estimator import Estimator
import pandas as pd
import boto3
import io
import os
from sklearn.model_selection import train_test_split

client=boto3.client('sts')
account=client.get_caller_identity()['Account']

my_session=boto3.session.Session()
region=my_session.region_name

image_name='my-custom-sagemaker-image'
ecr_image='{}.dkr.ecr.{}.amazonaws.com/{}:latest'.format(account, region, image_name)
print('###### ecr_image is: {}'.format(ecr_image))

estimator=Estimator(
    image_uri=ecr_image,
    role=get_execution_role(),
    base_job_name='custom-docker-image-for-training',
    instance_count=1,
    instance_type='ml.p2.xlarge',
    output_path='s3://{}'.format(bucket_name)
)

# start training
estimator.fit(inputs=data_channels)

TODO: deploy our model

In [None]:
# deploy the trained model
predictor=estimator.deploy(1, instance_type)