In [6]:
%%time
%%sh

# The name of our algorithm
# Makoto.Sano@Mack-the-Psych.com
algorithm_name=sagemaker-vdok3-bert

cd container

chmod +x vdok3_sage/train
chmod +x vdok3_sage/serve

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
$(aws ecr get-login --region ${region} --no-include-email)

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build  -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

Login Succeeded
Sending build context to Docker daemon  39.42kB
Step 1/24 : FROM continuumio/anaconda3:5.0.0
 ---> c9b256f6fa83
Step 2/24 : RUN apt-get update && apt-get -y upgrade &&     apt-get install -y curl git unzip bzip2
 ---> Using cache
 ---> 102a829e377d
Step 3/24 : RUN git clone https://github.com/pyenv/pyenv.git .pyenv
 ---> Using cache
 ---> 0b00938fae18
Step 4/24 : ENV PYTHONUNBUFFERED=TRUE
 ---> Using cache
 ---> 139d63a136d6
Step 5/24 : ENV PYTHONDONTWRITEBYTECODE=TRUE
 ---> Using cache
 ---> a9a580a1c211
Step 6/24 : ENV PATH="/opt/program:${PATH}"
 ---> Using cache
 ---> 7247323eab51
Step 7/24 : WORKDIR /opt/program
 ---> Using cache
 ---> cf8f04050e6f
Step 8/24 : RUN git clone https://github.com/mack-the-psych/plimac3.git
 ---> Using cache
 ---> a04ab477da3c
Step 9/24 : RUN echo "/opt/program/plimac3/Lib" > /opt/conda/lib/python3.6/site-packages/plimac-custom.pth
 ---> Using cache
 ---> 1c1525ecb265
Step 10/24 : RUN echo "/opt/program/plimac3/Tools" >> /opt/conda/lib/

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



CPU times: user 7.81 ms, sys: 4.27 ms, total: 12.1 ms
Wall time: 5.29 s


In [7]:
# S3 prefix
# Makoto.Sano@Mack-the-Psych.com
prefix = 'vdok3_bert_trial'

# Define IAM role
import boto3
import re

import os
import numpy as np
import pandas as pd
from sagemaker import get_execution_role

role = get_execution_role()

In [8]:
import sagemaker as sage
from time import gmtime, strftime

sess = sage.Session()

In [9]:
WORK_DIRECTORY = 'data'

data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=prefix)

In [10]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name

# Makoto.Sano@Mack-the-Psych.com
image = '{}.dkr.ecr.{}.amazonaws.com/sagemaker-vdok3-bert:latest'.format(account, region)
vdok3bert = sage.estimator.Estimator(image,
                       role, 1, 'ml.c4.2xlarge',
                       output_path="s3://{}/output".format(sess.default_bucket()),
                       sagemaker_session=sess)
vdok3bert.fit(data_location)

2020-12-20 08:00:25 Starting - Starting the training job...
2020-12-20 08:00:49 Starting - Launching requested ML instancesProfilerReport-1608451225: InProgress
......
2020-12-20 08:01:55 Starting - Preparing the instances for training......
2020-12-20 08:02:53 Downloading - Downloading input data...
2020-12-20 08:03:10 Training - Downloading the training image...............
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])[0m
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])[0m
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])[0m
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])[0m
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])[0m
  np_resource = np.dtype([("resource", np.ubyte, 1)])[0m
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])[0m
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])[0m
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])[0m
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])[0m
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])[0m
  np_resource

In [11]:
from sagemaker.predictor import csv_serializer
predictor = vdok3bert.deploy(1, 'ml.p2.xlarge', serializer=csv_serializer)

ClientError: An error occurred (ValidationException) when calling the CreateModel operation: Could not find model data at s3://sagemaker-us-west-2-822408253028/output/sagemaker-vdok3-bert-2020-12-20-08-00-25-757/output/model.tar.gz.

In [None]:
df_in = pd.read_csv('data/Head4-Serialized-Def-ELVA.PILOT.POST-TEST.csv')
np_in = np.vstack((np.array(df_in.columns), df_in.to_numpy()))
print(predictor.predict(np_in).decode('utf-8'))

In [None]:
sess.delete_endpoint(predictor.endpoint)

In [None]:
transform_output_folder = "batch-transform-output"
output_path="s3://{}/{}".format(sess.default_bucket(), transform_output_folder)

transformer = vdok3bert.transformer(instance_count=1,
                               instance_type='ml.p2.xlarge',
                               output_path=output_path,
                               assemble_with='Line',
                               accept='text/csv')

In [None]:
transformer.transform(data_location + '/Head4-Serialized-Def-ELVA.PILOT.POST-TEST.csv', content_type='text/csv', split_type='Line')
transformer.wait()

s3_client = sess.boto_session.client('s3')
s3_client.download_file(sess.default_bucket(), "{}/Head4-Serialized-Def-ELVA.PILOT.POST-TEST.csv.out".format(transform_output_folder), '/tmp/Head4-Serialized-Def-ELVA.PILOT.POST-TEST.csv.out')
with open('/tmp/Head4-Serialized-Def-ELVA.PILOT.POST-TEST.csv.out') as f:
    results = f.readlines()   
print("Transform results: \n{}".format(''.join(results)))