In [1]:
import os
import pandas as pd
import yaml
import io
import boto3
from time import gmtime, strftime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
%matplotlib inline

import sagemaker
import sagemaker.amazon.common as smac
from sagemaker import image_uris


pd.set_option('display.max_columns', 50)



In [5]:
import yaml
import sagemaker
import boto3

SETTING_FILE_PATH = "../../config/settings.yaml"
DATA_FOLDER_PATH = "../avazu-ctr-prediction"

with open(SETTING_FILE_PATH) as file:
    aws_info = yaml.safe_load(file)
        
sess = sagemaker.Session()
role = aws_info['aws']['sagemaker']['role']
bucket = aws_info['aws']['sagemaker']['s3bucket']
region = aws_info['aws']['sagemaker']['region']
account = aws_info['aws']['sagemaker']['account']

sm = boto3.client('sagemaker')
s3 = boto3.client('s3')

In [6]:
from sklearn.model_selection import train_test_split
import pandas as pd
import os 

# train, validation, test データを用意
df_train = pd.read_csv(os.path.join(DATA_FOLDER_PATH, "train_partial"), dtype="object")
df_train, df_test = train_test_split(df_train, train_size=0.8, random_state=0, shuffle=True)
df_train, df_validation = train_test_split(df_train, train_size=0.7, random_state=0, shuffle=True)


In [8]:
# local mode用にローカル環境にデータを保存
df_train.to_csv('train.csv', index=False)
df_validation.to_csv('validation.csv', index=False)
df_test.to_csv('test.csv', index=False)

In [7]:
# S3にアップロード
prefix = 'custom-script-training'

train_file = "train.csv"
validation_file = "validation.csv"
test_file = "test.csv"

df_train.to_csv(train_file, index=False)
df_validation.to_csv(validation_file, index=False)
df_test.to_csv(test_file, index=False)

s3_resource_bucket = boto3.Session().resource("s3").Bucket(bucket)

s3_resource_bucket.Object(os.path.join(prefix, "train", train_file)).upload_file(train_file)
s3_resource_bucket.Object(os.path.join(prefix, "validation", validation_file)).upload_file(validation_file)
s3_resource_bucket.Object(os.path.join(prefix, "test", test_file)).upload_file(test_file)


In [9]:
output_location = f"s3://{bucket}/{prefix}/output"

s3_train_data = f"s3://{bucket}/{prefix}/train/{train_file}"
s3_validation_data = f"s3://{bucket}/{prefix}/validation/{validation_file}"
s3_test_data = f"s3://{bucket}/{prefix}/test/{test_file}"

In [5]:
from sagemaker.sklearn.estimator import SKLearn

estimator = SKLearn(entry_point="",
                                     framework_version="0.23-1",
                                     py_version="py3",
                                     role=role,
                                     instance_type='local')


print(estimator.training_image_uri())

354813040037.dkr.ecr.ap-northeast-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3


In [15]:
%%sh

chmod +x extend_sagemaker_container/extend_sagemaker_container_trainig_script.py

# Specify an algorithm name
algorithm_name=ctr-prediction-extend-sagemaker-container

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
# region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"
echo $fullname
# If the repository doesn't exist in ECR, create it.

aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1
if [ $? -ne 0 ]
then
aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly

aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build -t ${algorithm_name} -f extend_sagemaker_container/Dockerfile .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

547760918250.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-extend-sagemaker-container:latest
Login Succeeded
The push refers to repository [547760918250.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-extend-sagemaker-container]
8e009502e429: Preparing
b8ccc62f606d: Preparing
6a5ca360b4a2: Preparing
6b01f020c189: Preparing
b2441e7b42d4: Preparing
bdeb2cf114ee: Preparing
c5d05b4b53cb: Preparing
d49da1cd03ad: Preparing
a7a48ec604ba: Preparing
bdeb2cf114ee: Waiting
c5d05b4b53cb: Waiting
d49da1cd03ad: Waiting
4387bb22a2ae: Preparing
0e58ecc8bdbd: Preparing
220cc31ad6da: Preparing
611239e85648: Preparing
17058ddb7377: Preparing
70cefb165e49: Preparing
8215a68c2a4b: Preparing
f3cb95302c96: Preparing
5e97ab70ebe9: Preparing
213551f26e74: Preparing
c8e3d2f95e4e: Preparing
1dc52a6b4de8: Preparing
a7a48ec604ba: Waiting
8215a68c2a4b: Waiting
f3cb95302c96: Waiting
4387bb22a2ae: Waiting
5e97ab70ebe9: Waiting
0e58ecc8bdbd: Waiting
220cc31ad6da: Waiting
213551f26e74: Waiting
17058ddb7377: W

#1 [internal] load build definition from Dockerfile
#1 sha256:23e1b7f49ab3ba3dfc99430e43a11e6b08f464a1717724659166f060d981f033
#1 transferring dockerfile: 535B 0.0s done
#1 DONE 0.0s

#2 [internal] load .dockerignore
#2 sha256:a096432f43237d48e3f8e295b829a6270273fa0ccb17daeb1db28ad92648084b
#2 transferring context:
#2 transferring context: 2B done
#2 DONE 0.0s

#3 [internal] load metadata for 354813040037.dkr.ecr.ap-northeast-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3
#3 sha256:1b0afbc7fe22dc2105381bb4e920f88d3384efc8fa11e6ecac6191d29a93d67a
#3 DONE 0.0s

#4 [1/4] FROM 354813040037.dkr.ecr.ap-northeast-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3
#4 sha256:b6566a50f304fe0d0ca504a18353a8e31f884384e1a3d1f775e36f9ac8c62009
#4 CACHED

#7 [internal] load build context
#7 sha256:4c36ef6243beab35261dc896ca4054058152afe1d0bd6d12605f3489118a4396
#7 transferring context: 139B done
#7 DONE 0.0s

#5 [2/4] RUN pip3 install --upgrade pip
#5 sha256:cd40a66ede25963ca5e59e025e6f5dcd

In [18]:
from sagemaker.estimator import Estimator

job_name = "extend-sagemaker-container-ctr-prediction-" + strftime("%Y%m%d-%H-%M-%S", gmtime())

hyperparameters = {"max_alpha": 0.2}

enable_local_mode_training=False
if enable_local_mode_training:
    train_instance_type = "local"
    inputs = {"train": f"file://{train_file}", "validation": f"file://{valiation_file}", "test": f"file://{test_file}"}
else:
    train_instance_type = "ml.m5.large"
    inputs = {"train": s3_train_data, "validation": s3_validation_data, "test": s3_test_data}


estimator = Estimator(image_uri=f'{account}.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-extend-sagemaker-container:latest',
                      role=role,
                      instance_count=1,
                      instance_type=train_instance_type,
                      hyperparameters=hyperparameters,
                      base_job_name=job_name)

estimator.fit(inputs)

2022-06-11 03:52:26 Starting - Starting the training job...
2022-06-11 03:52:42 Starting - Preparing the instances for trainingProfilerReport-1654919545: InProgress
.........
2022-06-11 03:54:18 Downloading - Downloading input data...
2022-06-11 03:54:58 Training - Downloading the training image......
2022-06-11 03:55:58 Training - Training image download completed. Training in progress.[34m2022-06-11 03:55:45,746 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2022-06-11 03:55:45,749 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-06-11 03:55:45,766 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2022-06-11 03:55:45,771 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-06-11 03:55:45,799 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-06-11 03:55:45,819 sage

[34mx: 0.154, score: 0.830[0m
[34m#033[32m[I 2022-06-11 03:56:26,216]#033[0m Trial 17 finished with value: 0.8296609296093992 and parameters: {'alpha': 0.15394086586785358}. Best is trial 12 with value: 0.8313305163351541.#033[0m[0m
[34mx: 0.028, score: 0.830[0m
[34m#033[32m[I 2022-06-11 03:56:26,765]#033[0m Trial 18 finished with value: 0.8296609296093992 and parameters: {'alpha': 0.0275195070628864}. Best is trial 12 with value: 0.8313305163351541.#033[0m[0m
[34mx: 0.066, score: 0.830[0m
[34m#033[32m[I 2022-06-11 03:56:27,365]#033[0m Trial 19 finished with value: 0.8296609296093992 and parameters: {'alpha': 0.06640415855476177}. Best is trial 12 with value: 0.8313305163351541.#033[0m[0m
[34mx: 0.020, score: 0.830[0m
[34m#033[32m[I 2022-06-11 03:56:27,959]#033[0m Trial 20 finished with value: 0.8297021539729981 and parameters: {'alpha': 0.019552564995504368}. Best is trial 12 with value: 0.8313305163351541.#033[0m[0m
[34mx: 0.001, score: 0.831[0m
[34m#033[32m[I 2022

[34mx: 0.084, score: 0.830[0m
[34m#033[32m[I 2022-06-11 03:56:55,966]#033[0m Trial 68 finished with value: 0.8296609296093992 and parameters: {'alpha': 0.08405206799386178}. Best is trial 58 with value: 0.8314232711532515.#033[0m[0m
[34mx: 0.018, score: 0.830[0m
[34m#033[32m[I 2022-06-11 03:56:56,576]#033[0m Trial 69 finished with value: 0.8297124600638978 and parameters: {'alpha': 0.01809241445387761}. Best is trial 58 with value: 0.8314232711532515.#033[0m[0m
[34mx: 0.009, score: 0.830[0m
[34m#033[32m[I 2022-06-11 03:56:57,178]#033[0m Trial 70 finished with value: 0.8297330722456973 and parameters: {'alpha': 0.00851185290200379}. Best is trial 58 with value: 0.8314232711532515.#033[0m[0m
[34mx: 0.006, score: 0.830[0m
[34m#033[32m[I 2022-06-11 03:56:57,716]#033[0m Trial 71 finished with value: 0.829928887972792 and parameters: {'alpha': 0.006192426209864707}. Best is trial 58 with value: 0.8314232711532515.#033[0m[0m
[34mx: 0.001, score: 0.831[0m
[34m#033[32m[I 2022

## sagemaker-training-toolkitを利用

In [21]:
%%sh

chmod +x custom_toolkit_container/custom_toolkit_container_training_script.py

# Specify an algorithm name
algorithm_name=ctr-prediction-custom-toolkit-container

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
# region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"
echo $fullname
# If the repository doesn't exist in ECR, create it.

aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1
if [ $? -ne 0 ]
then
aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly

aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build -t ${algorithm_name} -f custom_toolkit_container/Dockerfile .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

547760918250.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-custom-toolkit-container:latest
Login Succeeded
The push refers to repository [547760918250.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-custom-toolkit-container]
836d75d98d69: Preparing
74579fcdcd80: Preparing
9e01cdbddbe3: Preparing
ee3114554ad3: Preparing
9e0a57d01dbf: Preparing
e30df7d241f6: Preparing
64978906fbf1: Preparing
82baccdbb070: Preparing
c42229255bc9: Preparing
ad6b69b54919: Preparing
82baccdbb070: Waiting
e30df7d241f6: Waiting
64978906fbf1: Waiting
c42229255bc9: Waiting
ad6b69b54919: Waiting
836d75d98d69: Pushed
9e01cdbddbe3: Pushed
64978906fbf1: Pushed
e30df7d241f6: Pushed
c42229255bc9: Pushed
82baccdbb070: Pushed
74579fcdcd80: Pushed
ad6b69b54919: Pushed
9e0a57d01dbf: Pushed
ee3114554ad3: Pushed
latest: digest: sha256:19991a9d9b2d38da02f5eb641ceabbf3dc6fc000f795603f11478b95e022b0f7 size: 2424


#1 [internal] load build definition from Dockerfile
#1 sha256:d6636378397ba7f1a03b1e3ce5d9edd560efb9749d605c7c816b55b4eda4f9d7
#1 transferring dockerfile: 526B done
#1 DONE 0.0s

#2 [internal] load .dockerignore
#2 sha256:550acb91a722f506fd2fc6c695d87a36d3a2d7dae9edca5f6e4853d815334ed3
#2 transferring context: 2B done
#2 DONE 0.0s

#3 [internal] load metadata for docker.io/library/python:3.6-slim-buster
#3 sha256:4479281d18059b3f797d359bb841ea019ba8f5017826dc09a157e5902a39b8d5
#3 DONE 0.8s

#4 [1/6] FROM docker.io/library/python:3.6-slim-buster@sha256:e10aa83604948c6d8d9f72a9a20193d84bb2dbe550b725eb5208387117fde065
#4 sha256:2d122478d432b1e01288efe6d28570f7ef38e6ff79fffa843887f52cf207bc79
#4 DONE 0.0s

#6 [3/6] RUN pip3 install pandas numpy joblib scikit-learn cython
#6 sha256:48d2ec5bb0cbe21ac51059280b3e50e12b30b80d70c843cc7e49496d30ac84bc
#6 CACHED

#5 [2/6] RUN apt-get update     && apt-get install -y --no-install-recommends gcc g++
#5 sha256:ba51ef5faa0f818a86e6f1d771cc790d819a786c

In [23]:
from sagemaker.estimator import Estimator

job_name = "custom-toolkit-containe-ctr-prediction-" + strftime("%Y%m%d-%H-%M-%S", gmtime())

hyperparameters = {"rank": 7, "n_iter": 12}

enable_local_mode_training=False
if enable_local_mode_training:
    train_instance_type = "local"
    inputs = {"train": f"file://{train_file}", "test": f"file://{test_file}"}
else:
    train_instance_type = "ml.m5.large"
    inputs = {"train": s3_train_data, "test": s3_test_data}
    
estimator = Estimator(image_uri=f'{account}.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-custom-toolkit-container:latest',
                      role=role,
                      instance_count=1,
                      instance_type=train_instance_type,
                      output_path=output_location,
                      hyperparameters=hyperparameters,
                     base_job_name=job_name)

estimator.fit(inputs)

2022-06-11 04:33:25 Starting - Starting the training job...
2022-06-11 04:33:48 Starting - Preparing the instances for trainingProfilerReport-1654922004: InProgress
.........
2022-06-11 04:35:23 Downloading - Downloading input data
  from cryptography.hazmat.backends import default_backend[0m
[34m2022-06-11 04:35:25,304 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-06-11 04:35:25,340 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-06-11 04:35:25,360 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-06-11 04:35:25,380 sagemaker-training-toolkit INFO     Invoking user script[0m
[34mTraining Env:[0m
[34m{
    "additional_framework_parameters": {},
    "channel_input_dirs": {
        "test": "/opt/ml/input/data/test",
        "train": "/opt/ml/input/data/train"
    },
    "current_host": "algo-1",
    "framework_module": null,
    "hosts":

## スクラッチのコンテイメージ

In [24]:
%%sh

chmod +x scratch_container/trainer.py

# Specify an algorithm name
algorithm_name=ctr-prediction-scratch-container

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"
echo $fullname
# If the repository doesn't exist in ECR, create it.

aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1
if [ $? -ne 0 ]
then
aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly

aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build -t ${algorithm_name} -f scratch_container/Dockerfile .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

547760918250.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-scratch-container:latest
Login Succeeded
The push refers to repository [547760918250.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-scratch-container]
8b2526158275: Preparing
9e01cdbddbe3: Preparing
ee3114554ad3: Preparing
9e0a57d01dbf: Preparing
e30df7d241f6: Preparing
64978906fbf1: Preparing
82baccdbb070: Preparing
c42229255bc9: Preparing
ad6b69b54919: Preparing
64978906fbf1: Waiting
82baccdbb070: Waiting
c42229255bc9: Waiting
ad6b69b54919: Waiting
9e0a57d01dbf: Layer already exists
e30df7d241f6: Layer already exists
9e01cdbddbe3: Layer already exists
ee3114554ad3: Layer already exists
64978906fbf1: Layer already exists
82baccdbb070: Layer already exists
c42229255bc9: Layer already exists
ad6b69b54919: Layer already exists
8b2526158275: Pushed
latest: digest: sha256:9bac1d0fda95bdcc170e0da02a8b909cb66df295229b84b008a014e1688c6d98 size: 2212


chmod: scratch_container/trainer.py: No such file or directory
#1 [internal] load build definition from Dockerfile
#1 sha256:346b60104028867cf9768e5e05951ab211c8fa1570cae7d21956870d1f220e3b
#1 transferring dockerfile: 406B done
#1 DONE 0.0s

#2 [internal] load .dockerignore
#2 sha256:e26b8a3192e5ba24e691ad1eee24106cfcf4fb05535160e88ad122c9a7e9f5f4
#2 transferring context: 2B done
#2 DONE 0.0s

#3 [internal] load metadata for docker.io/library/python:3.6-slim-buster
#3 sha256:4479281d18059b3f797d359bb841ea019ba8f5017826dc09a157e5902a39b8d5
#3 ...

#4 [auth] library/python:pull token for registry-1.docker.io
#4 sha256:2079e98a4765968c52b046515c94b702e3fd3074122c126c3b94681650c6a6c7
#4 DONE 0.0s

#3 [internal] load metadata for docker.io/library/python:3.6-slim-buster
#3 sha256:4479281d18059b3f797d359bb841ea019ba8f5017826dc09a157e5902a39b8d5
#3 DONE 1.7s

#10 [1/6] FROM docker.io/library/python:3.6-slim-buster@sha256:e10aa83604948c6d8d9f72a9a20193d84bb2dbe550b725eb5208387117fde065
#10 sha

In [25]:
from sagemaker.estimator import Estimator

job_name = "scratch-container-ctr-prediction-" + strftime("%Y%m%d-%H-%M-%S", gmtime())

hyperparameters = {"rank": 7, "n_iter": 12}

enable_local_mode_training=False
if enable_local_mode_training:
    train_instance_type = "local"
    inputs = {"train": f"file://{train_file}", "test": f"file://{test_file}"}
else:
    train_instance_type = "ml.m5.large"
    inputs = {"train": s3_train_data, "test": s3_test_data}
    
estimator = Estimator(image_uri=f'{account}.dkr.ecr.ap-northeast-1.amazonaws.com/ctr-prediction-scratch-container:latest',
                      role=role,
                      instance_count=1,
                      instance_type=train_instance_type,
                      output_path=output_location,
                      hyperparameters=hyperparameters,
                      base_job_name=job_name)

estimator.fit(inputs)


2022-06-11 04:37:25 Starting - Starting the training job...
2022-06-11 04:37:49 Starting - Preparing the instances for trainingProfilerReport-1654922244: InProgress
.........
2022-06-11 04:39:24 Downloading - Downloading input data
2022-06-11 04:39:24 Training - Training image download completed. Training in progress......[34mRunning training...[0m
[34m{'n_iter': '12', 'rank': '7'}[0m
[34mHyperparameters configuration:{'n_iter': '12', 'rank': '7'}[0m
[34mInput data configuration:{'test': {'TrainingInputMode': 'File', 'S3DistributionType': 'FullyReplicated', 'RecordWrapperType': 'None'}, 'train': {'TrainingInputMode': 'File', 'S3DistributionType': 'FullyReplicated', 'RecordWrapperType': 'None'}}[0m
[34mList of files in test channel: [0m
[34m/opt/ml/input/data/test/test.csv[0m
[34mList of files in train channel: [0m
[34m/opt/ml/input/data/train/train.csv[0m
[34mResource configuration:{'current_host': 'algo-1', 'current_instance_type': 'ml.m5.large', 'current_group_name':