In [34]:
from sagemaker import get_execution_role
from sagemaker.session import Session
from sagemaker.tensorflow import TensorFlow

In [35]:
import os

os.environ['CONFIG_FILE'] = 'config_click.json'

In [36]:
import boto3
import random
import json
import importlib
import bidding_data
import config

In [37]:
bidding_data.download_stats_file_from_s3()

In [38]:
# S3 bucket for saving code and model artifacts.
# Feel free to specify a different bucket here if you wish.
bucket = 'wsbidder'

# Location where results of model training are saved.
output_path = 's3://{}/trainer_predict_imp/model_output_click'.format(bucket)
model_dir = 's3://{}/trainer_predict_imp/model_click'.format(bucket)

# train_instance_type = 'ml.c4.xlarge'
train_instance_type = 'ml.p2.xlarge'
train_instance_count = 1
hyperparameters = {'epochs': 10000, 'batch_size': 512, 'config_file': 'config_click.json'}
inputs = {'train': 's3://wsbidder/trainer_predict_imp/data/train_click/',
          'eval': 's3://wsbidder/trainer_predict_imp/data/eval_click/',
          'test': 's3://wsbidder/trainer_predict_imp/data/eval_click/'}
# inputs = {'train': '/opt/ml/input/data/train/',
#           'eval': '/opt/ml/input/data/eval/',
#           'test': '/opt/ml/input/data/eval/'}
base_job_name = 'tf-click-prediction'

In [39]:
estimator = TensorFlow(entry_point='train_predict_imp.py',
                       source_dir='.',
                       output_path=output_path,
                       model_dir=model_dir,
                       train_instance_type=train_instance_type,
                       train_instance_count=train_instance_count,
                       hyperparameters=hyperparameters,
                       role=get_execution_role(),
#                        image_name='520713654638.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-tensorflow-scriptmode:1.12.0-gpu-py3',
                       base_job_name=base_job_name,
                       framework_version='1.14.0',
                       py_version='py3',
#                        distributions={'parameter_server': {'enabled': True}},
                       script_mode=True)

In [40]:
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

# Define objective
objective_metric_name = 'loss'
objective_type = 'Minimize'
metric_definitions = [{'Name': 'loss',
                       'Regex': 'loss = ([0-9\\.]+)'}]
# Define hyperparameter ranges
hyperparameter_ranges = {
                            'learning_rate': ContinuousParameter(1e-6, 1e-4),
                              'dropout_rate': ContinuousParameter(0.0, 0.9)
                        }  
# hyperparameter_ranges = {
#                             'learning_rate': ContinuousParameter(0.0000001, 0.0001),
#                             'dropout_rate': ContinuousParameter(0.0, 0.9),
#                             'batch_size': IntegerParameter(512, 4096)
#                         }  
# Initialise Sagemaker's hyperparametertuner
tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=16,
                            max_parallel_jobs=4,
                            objective_type=objective_type)

In [41]:
s3 = boto3.resource('s3')
for obj in s3.Bucket(bucket).objects.filter(Prefix='trainer_predict_imp/model_click/'):
    s3.Object(bucket,obj.key).delete()

In [42]:
tuner.fit(inputs)

In [43]:
# tuner.deploy(
#     initial_instance_count=1,
#     instance_type='ml.c5.large',
#     endpoint_name='ClickPredictionProductionEndpoint',
#     update_endpoint=True
# )

In [44]:
# %%time

# estimator.fit(inputs)

In [45]:
# estimator.deploy(
#     initial_instance_count=1,
#     instance_type='ml.c5.large',
#     endpoint_name='ClickPredictionProductionEndpoint',
#     update_endpoint=True
# )

In [46]:
# estimator.delete_endpoint()