In [18]:
# set up SageMaker environment
import sagemaker, boto3

sess = boto3.Session()
sm = sess.client('sagemaker')
role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session(boto_session=sess)

In [16]:
print(role)

arn:aws:iam::844357513200:role/AmazonSageMaker-FullAccessRole


In [2]:
# load packages
import time, os, sys
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from keras.utils import np_utils
from sklearn.model_selection import train_test_split

from sagemaker.tensorflow import TensorFlow

Using TensorFlow backend.





In [3]:
# load data from S3 bucket
bucket = "sagemaker-michaelwu-ma5852"
subfolder = 'src'
input_file_name = 'diabetic_data.csv'
input_file_path = f's3://{bucket}/{subfolder}/{input_file_name}'

# load data
df_raw = pd.read_csv(input_file_path)
df_raw.shape

(101766, 50)

In [4]:
# subset data with relevant predictors
# proposed predictors (features)
features = ['max_glu_serum', 'A1Cresult', 'change', 'diabetesMed',
       'metformin', 'repaglinide', 'nateglinide', 'chlorpropamide',
       'glimepiride', 'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide',
       'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol', 'troglitazone',
       'tolazamide', 'examide', 'citoglipton', 'insulin',
       'glyburide-metformin', 'glipizide-metformin',
       'glimepiride-pioglitazone', 'metformin-rosiglitazone',
       'metformin-pioglitazone']

X = df_raw[features]
y = df_raw[['readmitted']]
num_class = len(df_raw['readmitted'].unique())

In [5]:
'''
    one hot encode categorical data
    this is because all the predictors and target variable are categorical data type
'''
# prepare input data
def prepare_inputs(X):
    ohe = OneHotEncoder()
    ohe.fit(X)
    X_enc = ohe.transform(X)
    X_enc = X_enc.toarray()
    return X_enc

# prepare target
def prepare_target(y):
    le = LabelEncoder()
    le.fit(y)
    y_enc = le.transform(y)
    y_enc = np_utils.to_categorical(y_enc, num_class)
    return y_enc

X_enc = prepare_inputs(X)
y_enc = prepare_target(y)

  return f(*args, **kwargs)


In [6]:
'''
    train test split
'''
X_train, X_test, y_train, y_test = train_test_split(X_enc, y_enc, test_size=0.2, random_state=1234) # 80/20 split

In [7]:
# create local directory for data and save the training and test data there
os.makedirs("./data", exist_ok=True)
os.makedirs("./output", exist_ok=True)
np.savez('./data/training', feature=X_train, target=y_train)
np.savez('./data/test', feature=X_test, target=y_test)

In [8]:
'''
    train model locally
'''
# set environment variables
local_training_input_path = 'file://data/training.npz'
local_test_input_path = 'file://data/test.npz'
output = 'file://output'

tf_estimator = TensorFlow(entry_point='train_nn_baseline.py',
                         role=role,
                         source_dir='.',
                         instance_count=1, # the number of EC2 isntance to use
                         instance_type='local', # type of EC2 instance to use local
                         framework_version='2.1.0', # tensorflow version
                         py_version='py3',
                         script_mode=True, #enable to use python script to train the model
                         hyperparameters={'epochs':1}, # only need 1 epoch to test whether our code is working or not
                         output_path=output)

In [9]:
# train model locally to see whether the code is working or not
tf_estimator.fit({'training': local_training_input_path, 'test': local_test_input_path})

Creating avxno56gkp-algo-1-nc3a7 ... 
Creating avxno56gkp-algo-1-nc3a7 ... done
Attaching to avxno56gkp-algo-1-nc3a7
[36mavxno56gkp-algo-1-nc3a7 |[0m 2022-02-05 06:32:26,284 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training
[36mavxno56gkp-algo-1-nc3a7 |[0m 2022-02-05 06:32:26,291 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mavxno56gkp-algo-1-nc3a7 |[0m 2022-02-05 06:32:26,733 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mavxno56gkp-algo-1-nc3a7 |[0m 2022-02-05 06:32:26,751 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mavxno56gkp-algo-1-nc3a7 |[0m 2022-02-05 06:32:26,768 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mavxno56gkp-algo-1-nc3a7 |[0m 2022-02-05 06:32:26,780 sagemaker-containers INFO     Invoking user script
[36mavxno56gkp-algo-1-nc3a7 |[0m 
[36mavxno56gkp-algo-1-nc3a7 |[0m Training E

In [11]:
'''
    train model on AWS
'''
# upload data to s3 bucket
prefix = 'A2_preprocessed_data'

training_input_path = sagemaker_session.upload_data(path='data/training.npz', bucket=bucket, key_prefix=prefix+'/training')

test_input_path = sagemaker_session.upload_data(path='data/test.npz', bucket=bucket, key_prefix=prefix+'/test')

print(training_input_path)
print(test_input_path)

s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/training/training.npz
s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/test/test.npz


In [14]:
tf_estimator = TensorFlow(entry_point='train_nn_baseline.py',
                          base_job_name='A2-MLP-NN-baseline',
                          role=role,
                          source_dir='.',
                          instance_count=1, # the number of EC2 isntance to use
                          instance_type='ml.m5.4xlarge',
                          framework_version='2.1.0', # tensorflow version
                          py_version='py3',
                          script_mode=True, #enable to use python script to train the model
                          hyperparameters={'epochs':30}
                         )

In [15]:
tf_estimator.fit({'training': training_input_path, 'test': test_input_path})

2022-02-05 06:40:47 Starting - Starting the training job...
2022-02-05 06:41:10 Starting - Launching requested ML instancesProfilerReport-1644043241: InProgress
...
2022-02-05 06:41:45 Starting - Preparing the instances for training.........
2022-02-05 06:43:15 Downloading - Downloading input data...
2022-02-05 06:43:35 Training - Downloading the training image..[34m2022-02-05 06:43:53,599 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2022-02-05 06:43:53,606 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-05 06:43:54,239 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-05 06:43:54,256 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-05 06:43:54,271 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-05 06:43:54,281 sagemaker-containers INFO     Invoking user 

In [23]:
'''
    deploy baseline nn model
'''
tf_baseline_endpoint_name = 'A2-keras-tf-baseline-nn-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

predictor = tf_estimator.deploy(initial_instance_count=1,
                                instance_type='ml.m5.4xlarge',
                                endpoint_name=tf_baseline_endpoint_name)

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


----!

In [37]:
# delete the endpoint
predictor.delete_endpoint()

In [25]:
'''
    hyperparameter tuning on the baseline nn model
'''
from sagemaker.tuner import IntegerParameter

hyperparameter_ranges = {
    'epochs': IntegerParameter(5, 300),
    'batch-size': IntegerParameter(32, 1024)
}

print(hyperparameter_ranges)

{'epochs': <sagemaker.parameter.IntegerParameter object at 0x7f17ef1ae588>, 'batch-size': <sagemaker.parameter.IntegerParameter object at 0x7f17ef1ae400>}


In [27]:
objective_metric_name = 'validation_accuracy' # name of the metric for evaluating training jobs

objective_type = 'Maximize'

metric_definitions = [
    {'Name': 'training_loss', 'Regex': 'loss: ([0-9\\.]+)'},
    {'Name': 'training_accuracy', 'Regex': 'accuracy: ([0-9\\.]+)'},
    {'Name': 'validation_loss', 'Regex': 'val_loss: ([0-9\\.]+)'},
    {'Name': 'validation_accuracy', 'Regex': 'val_accuracy: ([0-9\\.]+)'},
    {'Name': 'training_precision', 'Regex': 'precision: ([0-9\\.]+)'},
    {'Name': 'training_recall', 'Regex': 'recall: ([0-9\\.]+)'}
]

In [28]:
# configure a training job using the Tensorflow estimator
tf_estimator = TensorFlow(entry_point='train_nn_baseline.py',
                         role=role,
                         instance_count=1,
                         instance_type='ml.m5.4xlarge',
                         framework_version='2.1.0',
                         py_version='py3',
                         script_mode=True)

from sagemaker.tuner import HyperparameterTuner

tuner = HyperparameterTuner(tf_estimator,
                           objective_metric_name,
                           hyperparameter_ranges,
                           metric_definitions,
                           max_jobs=6,
                           max_parallel_jobs=2,
                           objective_type=objective_type,
                           base_tuning_job_name='HPO-A2-MLP-NN-baseline')

In [29]:
# fit is used to train the model
tuner.fit({'training': training_input_path, 'test': test_input_path})

....................................................................................................................................................................................................................!


In [33]:
# inspect jobs with AWS
tuner = HyperparameterTuner.attach("HPO-A2-MLP-NN-baseli-220205-0951")
analytics = tuner.analytics()
df_hpo_res = analytics.dataframe()
df_hpo_res

Unnamed: 0,batch-size,epochs,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,893.0,70.0,HPO-A2-MLP-NN-baseli-220205-0951-006-7d266fbf,Completed,0.5341,2022-02-05 10:07:12+00:00,2022-02-05 10:08:37+00:00,85.0
1,906.0,60.0,HPO-A2-MLP-NN-baseli-220205-0951-005-0c0622c1,Completed,0.5341,2022-02-05 10:03:47+00:00,2022-02-05 10:05:37+00:00,110.0
2,979.0,39.0,HPO-A2-MLP-NN-baseli-220205-0951-004-330088de,Completed,0.5349,2022-02-05 09:59:30+00:00,2022-02-05 10:00:39+00:00,69.0
3,529.0,274.0,HPO-A2-MLP-NN-baseli-220205-0951-003-75030598,Completed,0.533,2022-02-05 09:58:04+00:00,2022-02-05 10:04:35+00:00,391.0
4,882.0,55.0,HPO-A2-MLP-NN-baseli-220205-0951-002-4ac75de7,Completed,0.5371,2022-02-05 09:53:55+00:00,2022-02-05 09:56:29+00:00,154.0
5,772.0,127.0,HPO-A2-MLP-NN-baseli-220205-0951-001-bbebefa7,Completed,0.5346,2022-02-05 09:53:46+00:00,2022-02-05 09:56:47+00:00,181.0


In [35]:
# select the best job with the highest accuracy
best_job = df_hpo_res.sort_values('FinalObjectiveValue', ascending=False)[:1]

best_job

Unnamed: 0,batch-size,epochs,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
4,882.0,55.0,HPO-A2-MLP-NN-baseli-220205-0951-002-4ac75de7,Completed,0.5371,2022-02-05 09:53:55+00:00,2022-02-05 09:56:29+00:00,154.0


In [36]:
best_job_name = best_job['TrainingJobName'].to_string(index=False).strip()
best_job_name

'HPO-A2-MLP-NN-baseli-220205-0951-002-4ac75de7'

In [38]:
best_job_endpoint_name = best_job_name + '-ep'

best_model_predictor = tuner.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.4xlarge',
    endpoint_name=best_job_endpoint_name
)


2022-02-05 09:56:29 Starting - Preparing the instances for training
2022-02-05 09:56:29 Downloading - Downloading input data
2022-02-05 09:56:29 Training - Training image download completed. Training in progress.
2022-02-05 09:56:29 Uploading - Uploading generated training model
2022-02-05 09:56:29 Completed - Training job completed

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.



----!

In [39]:
# delete endpoint once done
best_model_predictor.delete_endpoint()