In [7]:
import os
import pandas as pd
import yaml
import io
import boto3
from time import gmtime, strftime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
%matplotlib inline

import sagemaker
import sagemaker.amazon.common as smac
from sagemaker import image_uris


pd.set_option('display.max_columns', 50)

In [20]:
import yaml
import sagemaker
import boto3

SETTING_FILE_PATH = "../config/settings.yaml"
DATA_FOLDER_PATH = "avazu-ctr-prediction"

with open(SETTING_FILE_PATH) as file:
    aws_info = yaml.safe_load(file)
        
sess = sagemaker.Session()
role = aws_info['aws']['sagemaker']['role']
bucket = aws_info['aws']['sagemaker']['s3bucket']
region = aws_info['aws']['sagemaker']['region']
account = aws_info['aws']['sagemaker']['account']

sm = boto3.client('sagemaker')
s3 = boto3.client('s3')

In [2]:
from sklearn.model_selection import train_test_split
import pandas as pd
import os 

df_train = pd.read_csv(os.path.join(DATA_FOLDER_PATH, "train_partial"), dtype="object")
df_train, df_test = train_test_split(df_train, train_size=0.7, random_state=0, shuffle=True)


In [27]:
train_file = "train.csv"
test_file = "test.csv"

df_train.to_csv(train_file, index=False)
df_test.to_csv(test_file, index=False)

prefix = 'custom-container-training'

s3_resource_bucket = boto3.Session().resource("s3").Bucket(bucket)

s3_resource_bucket.Object(os.path.join(prefix, "train", train_file)).upload_file(train_file)
s3_resource_bucket.Object(os.path.join(prefix, "test", test_file)).upload_file(test_file)


In [28]:
output_location = f"s3://{bucket}/{prefix}/output"

s3_train_data = f"s3://{bucket}/{prefix}/train/{train_file}"
s3_test_data = f"s3://{bucket}/{prefix}/test/{test_file}"

In [5]:
from sagemaker.sklearn.estimator import SKLearn

estimator = SKLearn(entry_point="",
                                     framework_version="0.23-1",
                                     py_version="py3",
                                     role=role,
                                     instance_type='local')


print(estimator.training_image_uri())

354813040037.dkr.ecr.ap-northeast-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3


In [17]:
%%sh

chmod +x extend_sagemaker_container/trainer.py

# Specify an algorithm name
algorithm_name=ctr-prediction-extend-sagemaker-container

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
# region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"
echo $fullname
# If the repository doesn't exist in ECR, create it.

aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1
if [ $? -ne 0 ]
then
aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly

aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build -t ${algorithm_name} -f extend_sagemaker_container/Dockerfile .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

547760918250.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-extend-sagemaker-container:latest
Login Succeeded
The push refers to repository [547760918250.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-extend-sagemaker-container]
b1d8e512c889: Preparing
6b01f020c189: Preparing
b2441e7b42d4: Preparing
bdeb2cf114ee: Preparing
c5d05b4b53cb: Preparing
d49da1cd03ad: Preparing
a7a48ec604ba: Preparing
4387bb22a2ae: Preparing
0e58ecc8bdbd: Preparing
220cc31ad6da: Preparing
611239e85648: Preparing
17058ddb7377: Preparing
70cefb165e49: Preparing
8215a68c2a4b: Preparing
f3cb95302c96: Preparing
5e97ab70ebe9: Preparing
213551f26e74: Preparing
c8e3d2f95e4e: Preparing
1dc52a6b4de8: Preparing
d49da1cd03ad: Waiting
a7a48ec604ba: Waiting
4387bb22a2ae: Waiting
0e58ecc8bdbd: Waiting
220cc31ad6da: Waiting
611239e85648: Waiting
17058ddb7377: Waiting
70cefb165e49: Waiting
8215a68c2a4b: Waiting
f3cb95302c96: Waiting
5e97ab70ebe9: Waiting
213551f26e74: Waiting
c8e3d2f95e4e: Waiting
1dc52a6b4de8: Waiti

#1 [internal] load build definition from Dockerfile
#1 sha256:bb28e49c5a3c37c749ac80325365a4669e2fe987bea2a54dbc0ab0ac2ba89459
#1 transferring dockerfile: 370B done
#1 DONE 0.0s

#2 [internal] load .dockerignore
#2 sha256:742bf9d4a9d6fc92879f5f7f68c9eb63efb71b6b50df61ee5459f22d7fb6ff64
#2 transferring context: 2B done
#2 DONE 0.0s

#3 [internal] load metadata for 354813040037.dkr.ecr.ap-northeast-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3
#3 sha256:1b0afbc7fe22dc2105381bb4e920f88d3384efc8fa11e6ecac6191d29a93d67a
#3 DONE 0.0s

#5 [internal] load build context
#5 sha256:c52cda3bab5c08a2d4154e9ab0a7524d7440a364e78c756e4b0dbf48af97c803
#5 transferring context: 3.24kB done
#5 DONE 0.0s

#4 [1/2] FROM 354813040037.dkr.ecr.ap-northeast-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3
#4 sha256:b6566a50f304fe0d0ca504a18353a8e31f884384e1a3d1f775e36f9ac8c62009
#4 CACHED

#6 [2/2] ADD extend_sagemaker_container/trainer.py /opt/ml/code/trainer.py
#6 sha256:439f81c069a8f416f140b0a2f

In [22]:
from sagemaker.estimator import Estimator

job_name = "extend-sagemaker-container-ctr-prediction-" + strftime("%Y%m%d-%H-%M-%S", gmtime())

hyperparameters = {"alpha": 0.00001, "eta0": 2.0}

enable_local_mode_training=False
if enable_local_mode_training:
    train_instance_type = "local"
    inputs = {"train": f"file://{train_file}", "test": f"file://{test_file}"}
else:
    train_instance_type = "ml.m5.large"
    inputs = {"train": s3_train_data, "test": s3_test_data}


estimator = Estimator(image_uri=f'{account}.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-extend-sagemaker-container:latest',
                      role=role,
                      instance_count=1,
                      instance_type=train_instance_type,
                      hyperparameters=hyperparameters,
                      base_job_name=job_name)

estimator.fit(inputs)

2022-05-26 14:22:17 Starting - Starting the training job...
2022-05-26 14:22:34 Starting - Preparing the instances for trainingProfilerReport-1653574937: InProgress
......
2022-05-26 14:23:50 Downloading - Downloading input data......
2022-05-26 14:24:50 Training - Downloading the training image......
2022-05-26 14:25:50 Training - Training image download completed. Training in progress.[34m2022-05-26 14:25:43,806 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2022-05-26 14:25:43,811 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-05-26 14:25:43,828 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2022-05-26 14:25:43,833 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-05-26 14:25:43,859 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-05-26 14:25:43,879 sage

## sagemaker-training-toolkitを利用

In [29]:
%%sh

chmod +x my_custom_container/trainer.py

# Specify an algorithm name
algorithm_name=ctr-prediction-custom-container

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
# region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"
echo $fullname
# If the repository doesn't exist in ECR, create it.

aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1
if [ $? -ne 0 ]
then
aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly

aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build -t ${algorithm_name} -f my_custom_container/Dockerfile .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

547760918250.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-custom-container:latest
Login Succeeded
The push refers to repository [547760918250.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-custom-container]
5bedce2511b8: Preparing
74579fcdcd80: Preparing
9e01cdbddbe3: Preparing
ee3114554ad3: Preparing
9e0a57d01dbf: Preparing
e30df7d241f6: Preparing
64978906fbf1: Preparing
82baccdbb070: Preparing
c42229255bc9: Preparing
ad6b69b54919: Preparing
e30df7d241f6: Waiting
64978906fbf1: Waiting
82baccdbb070: Waiting
c42229255bc9: Waiting
ad6b69b54919: Waiting
9e01cdbddbe3: Layer already exists
ee3114554ad3: Layer already exists
9e0a57d01dbf: Layer already exists
74579fcdcd80: Layer already exists
64978906fbf1: Layer already exists
c42229255bc9: Layer already exists
e30df7d241f6: Layer already exists
82baccdbb070: Layer already exists
ad6b69b54919: Layer already exists
5bedce2511b8: Pushed
latest: digest: sha256:8611cb1bd5a99577a87c42530ed28b47a666919b98ee765c6012f269aa74b50a size: 2

#1 [internal] load build definition from Dockerfile
#1 sha256:008b6b812d9699b3623e1e6ca4de4683381874ac0995ea90887080bd658faef7
#1 transferring dockerfile: 37B done
#1 DONE 0.0s

#2 [internal] load .dockerignore
#2 sha256:dd1ecf6a02e9f4dd9214f06709618831aabf1f3f13640de89bbb031cc48dd9c4
#2 transferring context: 2B done
#2 DONE 0.0s

#3 [internal] load metadata for docker.io/library/python:3.6-slim-buster
#3 sha256:4479281d18059b3f797d359bb841ea019ba8f5017826dc09a157e5902a39b8d5
#3 ...

#4 [auth] library/python:pull token for registry-1.docker.io
#4 sha256:8653ac043978eba9dd25570b17912d124d8fd4ad17eb393bea64082d212002b9
#4 DONE 0.0s

#3 [internal] load metadata for docker.io/library/python:3.6-slim-buster
#3 sha256:4479281d18059b3f797d359bb841ea019ba8f5017826dc09a157e5902a39b8d5
#3 DONE 5.0s

#5 [1/6] FROM docker.io/library/python:3.6-slim-buster@sha256:e10aa83604948c6d8d9f72a9a20193d84bb2dbe550b725eb5208387117fde065
#5 sha256:2d122478d432b1e01288efe6d28570f7ef38e6ff79fffa843887f52cf207bc

In [31]:
from sagemaker.estimator import Estimator

job_name = "custom-container-ctr-prediction-" + strftime("%Y%m%d-%H-%M-%S", gmtime())

hyperparameters = {"rank": 7, "n_iter": 12}

enable_local_mode_training=False
if enable_local_mode_training:
    train_instance_type = "local"
    inputs = {"train": f"file://{train_file}", "test": f"file://{test_file}"}
else:
    train_instance_type = "ml.m5.large"
    inputs = {"train": s3_train_data, "test": s3_test_data}
    
estimator = Estimator(image_uri=f'{account}.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-custom-container:latest',
                      role=role,
                      instance_count=1,
                      instance_type=train_instance_type,
                      output_path=output_location,
                      hyperparameters=hyperparameters,
                     base_job_name=job_name)

estimator.fit(inputs)

2022-05-26 15:12:45 Starting - Starting the training job...
2022-05-26 15:13:09 Starting - Preparing the instances for trainingProfilerReport-1653577965: InProgress
......
2022-05-26 15:14:19 Downloading - Downloading input data...
  from cryptography.hazmat.backends import default_backend[0m
[34m2022-05-26 15:14:57,482 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-05-26 15:14:57,519 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-05-26 15:14:57,539 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-05-26 15:14:57,560 sagemaker-training-toolkit INFO     Invoking user script[0m
[34mTraining Env:[0m
[34m{
    "additional_framework_parameters": {},
    "channel_input_dirs": {
        "test": "/opt/ml/input/data/test",
        "train": "/opt/ml/input/data/train"
    },
    "current_host": "algo-1",
    "framework_module": null,
    "hosts":

## スクラッチのコンテイメージ

In [32]:
%%sh

chmod +x my_scratch_container/trainer.py

# Specify an algorithm name
algorithm_name=ctr-prediction-scratch-container

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
# region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"
echo $fullname
# If the repository doesn't exist in ECR, create it.

aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1
if [ $? -ne 0 ]
then
aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly

aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build -t ${algorithm_name} -f my_scratch_container/Dockerfile .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

547760918250.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-scratch-container:latest
Login Succeeded
The push refers to repository [547760918250.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-scratch-container]
2dc3863ae9a7: Preparing
9e01cdbddbe3: Preparing
ee3114554ad3: Preparing
9e0a57d01dbf: Preparing
e30df7d241f6: Preparing
64978906fbf1: Preparing
82baccdbb070: Preparing
c42229255bc9: Preparing
ad6b69b54919: Preparing
82baccdbb070: Waiting
c42229255bc9: Waiting
64978906fbf1: Waiting
ad6b69b54919: Waiting
9e0a57d01dbf: Layer already exists
ee3114554ad3: Layer already exists
9e01cdbddbe3: Layer already exists
e30df7d241f6: Layer already exists
64978906fbf1: Layer already exists
c42229255bc9: Layer already exists
82baccdbb070: Layer already exists
ad6b69b54919: Layer already exists
2dc3863ae9a7: Pushed
latest: digest: sha256:425f4602ed575f83683855ef6d78b4ea39e18bbb79302c7f16e2bbea0744a4ea size: 2212


#1 [internal] load build definition from Dockerfile
#1 sha256:e7ec40ace398f9631c60b8da0d7af0e56d64c2a2dfdd9cf01aeb176696b3199a
#1 transferring dockerfile: 331B done
#1 DONE 0.0s

#2 [internal] load .dockerignore
#2 sha256:dd4929f84a139923fb857ac24bc0cf44f39d938854e514629d62557ee192419b
#2 transferring context: 2B done
#2 DONE 0.0s

#3 [internal] load metadata for docker.io/library/python:3.6-slim-buster
#3 sha256:4479281d18059b3f797d359bb841ea019ba8f5017826dc09a157e5902a39b8d5
#3 ...

#4 [auth] library/python:pull token for registry-1.docker.io
#4 sha256:d12ea473c4a1ead649dea9b36f10b28c74a2da728d6500309a2e4f823b129d1c
#4 DONE 0.0s

#3 [internal] load metadata for docker.io/library/python:3.6-slim-buster
#3 sha256:4479281d18059b3f797d359bb841ea019ba8f5017826dc09a157e5902a39b8d5
#3 DONE 2.0s

#5 [1/6] FROM docker.io/library/python:3.6-slim-buster@sha256:e10aa83604948c6d8d9f72a9a20193d84bb2dbe550b725eb5208387117fde065
#5 sha256:2d122478d432b1e01288efe6d28570f7ef38e6ff79fffa843887f52cf207b

In [36]:
from sagemaker.estimator import Estimator

job_name = "scratch-container-ctr-prediction-" + strftime("%Y%m%d-%H-%M-%S", gmtime())

hyperparameters = {"rank": 7, "n_iter": 12}

enable_local_mode_training=False
if enable_local_mode_training:
    train_instance_type = "local"
    inputs = {"train": f"file://{train_file}", "test": f"file://{test_file}"}
else:
    train_instance_type = "ml.m5.large"
    inputs = {"train": s3_train_data, "test": s3_test_data}
    
estimator = Estimator(image_uri=f'{account}.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-scratch-container:latest',
                      role=role,
                      instance_count=1,
                      instance_type=train_instance_type,
                      output_path=output_location,
                      hyperparameters=hyperparameters,
                      base_job_name=job_name)

estimator.fit(inputs)


2022-05-27 00:32:56 Starting - Starting the training job...
2022-05-27 00:33:12 Starting - Preparing the instances for trainingProfilerReport-1653611575: InProgress
.........
2022-05-27 00:34:48 Downloading - Downloading input data...
2022-05-27 00:35:08 Training - Training image download completed. Training in progress.....[34mRunning training...[0m
[34m{'n_iter': '12', 'rank': '7'}[0m
[34mHyperparameters configuration:{'n_iter': '12', 'rank': '7'}[0m
[34mInput data configuration:{'test': {'TrainingInputMode': 'File', 'S3DistributionType': 'FullyReplicated', 'RecordWrapperType': 'None'}, 'train': {'TrainingInputMode': 'File', 'S3DistributionType': 'FullyReplicated', 'RecordWrapperType': 'None'}}[0m
[34mList of files in test channel: [0m
[34m/opt/ml/input/data/test/test.csv[0m
[34mList of files in train channel: [0m
[34m/opt/ml/input/data/train/train.csv[0m
[34mResource configuration:{'current_host': 'algo-1', 'current_instance_type': 'ml.m5.large', 'current_group_name