## TF Script Mode HPO

This notebook outlines how to create a model using TF Script model, train locally, train in hosted mode, and deploy to an endpoint.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import sagemaker

## Data Preparation

In [None]:
# load dataset
dataset = pd.read_csv('../data/churn_modelling.csv')

In [None]:
dataset.head(5)

In [None]:
# filter relevant features
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

In [None]:
# encode categorical features

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_1 = LabelEncoder()
X[:, 1] = labelencoder_1.fit_transform(X[:, 1])

labelencoder_2 = LabelEncoder()
X[:, 2] = labelencoder_2.fit_transform(X[:, 2])

onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()

X = X[:,1:]

In [None]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
print("Training dataset size: {}".format(X_train.size))
print("Test dataset size: {}".format(X_test.size))

In [None]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
# create local folders
data_dir = os.path.join(os.getcwd(), 'data')
os.makedirs(data_dir, exist_ok=True)

train_dir = os.path.join(os.getcwd(), 'data/train')
os.makedirs(train_dir, exist_ok=True)

test_dir = os.path.join(os.getcwd(), 'data/test')
os.makedirs(test_dir, exist_ok=True)

In [None]:
# save train and test data to disk
np.save(os.path.join(train_dir, 'x_train.npy'), X_train)
np.save(os.path.join(train_dir, 'y_train.npy'), y_train)

np.save(os.path.join(test_dir, 'x_test.npy'), X_test)
np.save(os.path.join(test_dir, 'y_test.npy'), y_test)

In [None]:
bucket_name = 'first-stage-input'
s3_prefix = 'tf-keras-churn-hpo'

traindata_s3_prefix = '{}/data/train'.format(s3_prefix)
testdata_s3_prefix = '{}/data/test'.format(s3_prefix)

train_s3 = sagemaker.Session().upload_data(bucket=bucket_name, path='./data/train/', key_prefix=traindata_s3_prefix)
test_s3 = sagemaker.Session().upload_data(bucket=bucket_name, path='./data/test/', key_prefix=testdata_s3_prefix)

inputs = {'train':train_s3, 'test': test_s3}
print(inputs)

## Hyperparameter Optimization

If the use case requires individual predictions in near real-time, SageMaker hosted endpoints can be created.

In [None]:
from sagemaker.tensorflow import TensorFlow

model_dir = '/opt/ml/model'
train_instance_type = 'ml.m4.xlarge'
hyperparameters = {'epochs':1, 'batch_size':64}

estimator = TensorFlow(entry_point='tf-script-mode-hpo.py',
                       train_instance_type=train_instance_type,
                       train_instance_count=1,
                       hyperparameters=hyperparameters,
                       role=sagemaker.get_execution_role(),
                       base_job_name='tf-keras-churn-hpo',
                       framework_version='1.12.0',
                       py_version='py3',
                       script_mode=True)

In [None]:
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner
hyperparameter_ranges = {'epochs': IntegerParameter(10, 100),
                         'batch_size': IntegerParameter(64, 256)}

In [None]:
objective_metric_name = 'acc'
metric_definitions = [{'Name': 'acc',
                       'Regex': 'acc: ([0-9\\.]+)'}]

In [None]:
tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            objective_type='Maximize',
                            max_jobs=9,
                            max_parallel_jobs=3)

In [None]:
tuner.fit(inputs)

In [None]:
import boto3
boto3.client('sagemaker').describe_hyper_parameter_tuning_job(
    HyperParameterTuningJobName=tuner.latest_tuning_job.job_name)['HyperParameterTuningJobStatus']