In [1]:
# set up SageMaker environment
import sagemaker, boto3

sess = boto3.Session()
sm = sess.client('sagemaker')
role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session(boto_session=sess)

In [2]:
print(role)

arn:aws:iam::844357513200:role/AmazonSageMaker-FullAccessRole


In [3]:
# load packages
import time, os, sys
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from keras.utils import np_utils
from sklearn.model_selection import train_test_split

from sagemaker.tensorflow import TensorFlow

Using TensorFlow backend.





In [4]:
# load data from S3 bucket
bucket = "sagemaker-michaelwu-ma5852"
subfolder = 'src'
input_file_name = 'diabetic_data.csv'
input_file_path = f's3://{bucket}/{subfolder}/{input_file_name}'

# load data
df_raw = pd.read_csv(input_file_path)
df_raw.shape

(101766, 50)

In [5]:
# modified the target variable for the assignment, as we only interested in whether a patient is readmitted or not, not how long they have been readmitted.
df_raw['readmitted_mod'] = np.where(df_raw['readmitted'] == 'NO', 'NO', 'YES')

In [6]:
# subset data with relevant predictors
# proposed predictors (features)
features = ['max_glu_serum', 'A1Cresult', 'change', 'diabetesMed',
       'metformin', 'repaglinide', 'nateglinide', 'chlorpropamide',
       'glimepiride', 'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide',
       'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol', 'troglitazone',
       'tolazamide', 'examide', 'citoglipton', 'insulin',
       'glyburide-metformin', 'glipizide-metformin',
       'glimepiride-pioglitazone', 'metformin-rosiglitazone',
       'metformin-pioglitazone']

X = df_raw[features]
y = df_raw[['readmitted_mod']]
num_class = len(df_raw['readmitted_mod'].unique())

In [7]:
'''
    one hot encode categorical data
    this is because all the predictors and target variable are categorical data type
'''
# prepare input data
def prepare_inputs(X):
    ohe = OneHotEncoder()
    ohe.fit(X)
    X_enc = ohe.transform(X)
    X_enc = X_enc.toarray()
    return X_enc

# prepare target
def prepare_target(y):
    le = LabelEncoder()
    le.fit(y)
    y_enc = le.transform(y)
    y_enc = np_utils.to_categorical(y_enc, num_class)
    return y_enc

X_enc = prepare_inputs(X)
y_enc = prepare_target(y)

  return f(*args, **kwargs)


In [8]:
'''
    train test split
'''
X_train, X_test, y_train, y_test = train_test_split(X_enc, y_enc, test_size=0.2, random_state=1234) # 80/20 split

In [9]:
# create local directory for data and save the training and test data there
os.makedirs("./data", exist_ok=True)
os.makedirs("./output", exist_ok=True)
np.savez('./data/training', feature=X_train, target=y_train)
np.savez('./data/test', feature=X_test, target=y_test)

In [10]:
'''
    train model locally
'''
# set environment variables
local_training_input_path = 'file://data/training.npz'
local_test_input_path = 'file://data/test.npz'
output = 'file://output'

tf_estimator_local = TensorFlow(entry_point='train_nn_baseline.py',
                         role=role,
                         source_dir='.',
                         instance_count=1, # the number of EC2 isntance to use
                         instance_type='local', # type of EC2 instance to use local
                         framework_version='2.1.0', # tensorflow version
                         py_version='py3',
                         script_mode=True, #enable to use python script to train the model
                         hyperparameters={'epochs':1}, # only need 1 epoch to test whether our code is working or not
                         output_path=output)

In [11]:
# train model locally to see whether the code is working or not
tf_estimator_local.fit({'training': local_training_input_path, 'test': local_test_input_path})

Creating 0whe9uwhac-algo-1-7aiwt ... 
Creating 0whe9uwhac-algo-1-7aiwt ... done
Attaching to 0whe9uwhac-algo-1-7aiwt
[36m0whe9uwhac-algo-1-7aiwt |[0m 2022-02-10 11:27:15,006 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training
[36m0whe9uwhac-algo-1-7aiwt |[0m 2022-02-10 11:27:15,015 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36m0whe9uwhac-algo-1-7aiwt |[0m 2022-02-10 11:27:15,629 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36m0whe9uwhac-algo-1-7aiwt |[0m 2022-02-10 11:27:15,651 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36m0whe9uwhac-algo-1-7aiwt |[0m 2022-02-10 11:27:15,672 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36m0whe9uwhac-algo-1-7aiwt |[0m 2022-02-10 11:27:15,685 sagemaker-containers INFO     Invoking user script
[36m0whe9uwhac-algo-1-7aiwt |[0m 
[36m0whe9uwhac-algo-1-7aiwt |[0m Training E

In [12]:
'''
    train model on AWS
'''
# upload data to s3 bucket
prefix = 'A2_preprocessed_data'

training_input_path = sagemaker_session.upload_data(path='data/training.npz', bucket=bucket, key_prefix=prefix+'/training')

test_input_path = sagemaker_session.upload_data(path='data/test.npz', bucket=bucket, key_prefix=prefix+'/test')

print(training_input_path)
print(test_input_path)

s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/training/training.npz
s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/test/test.npz


In [13]:
tf_estimator_baseline_awb = TensorFlow(entry_point='train_nn_baseline.py',
                                       base_job_name='A2-MLP-NN-baseline',
                                       role=role,
                                       source_dir='.',
                                       instance_count=1, # the number of EC2 isntance to use
                                       instance_type='ml.m5.4xlarge',
                                       framework_version='2.1.0', # tensorflow version
                                       py_version='py3',
                                       script_mode=True, #enable to use python script to train the model
                                       hyperparameters={'epochs':30}
                                      )

In [14]:
tf_estimator_baseline_awb.fit({'training': training_input_path, 'test': test_input_path})

2022-02-10 11:27:38 Starting - Starting the training job...
2022-02-10 11:28:02 Starting - Launching requested ML instancesProfilerReport-1644492451: InProgress
.........
2022-02-10 11:29:22 Starting - Preparing the instances for training......
2022-02-10 11:30:33 Downloading - Downloading input data
2022-02-10 11:30:33 Training - Downloading the training image..
2022-02-10 11:31:03 Training - Training image download completed. Training in progress.[34m2022-02-10 11:30:53,483 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2022-02-10 11:30:53,490 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-10 11:31:04,352 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-10 11:31:04,369 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-10 11:31:04,385 sagemaker-containers INFO     No GPUs detected (normal if no gpus 

In [12]:
'''
    hyperparameter tuning on the baseline nn model
'''
from sagemaker.tuner import IntegerParameter

hyperparameter_ranges = {
    'epochs': IntegerParameter(5, 300),
    'batch-size': IntegerParameter(32, 1024)
}

print(hyperparameter_ranges)

{'epochs': <sagemaker.parameter.IntegerParameter object at 0x7feb21680390>, 'batch-size': <sagemaker.parameter.IntegerParameter object at 0x7feb18748c50>}


In [13]:
objective_metric_name = 'validation_accuracy' # name of the metric for evaluating training jobs

objective_type = 'Maximize'

metric_definitions = [
    {'Name': 'training_loss', 'Regex': 'loss: ([0-9\\.]+)'},
    {'Name': 'training_accuracy', 'Regex': 'accuracy: ([0-9\\.]+)'},
    {'Name': 'validation_loss', 'Regex': 'val_loss: ([0-9\\.]+)'},
    {'Name': 'validation_accuracy', 'Regex': 'val_accuracy: ([0-9\\.]+)'},
    {'Name': 'training_precision', 'Regex': 'precision: ([0-9\\.]+)'},
    {'Name': 'training_recall', 'Regex': 'recall: ([0-9\\.]+)'}
]

In [14]:
# configure a training job using the Tensorflow estimator
tf_estimator_baseline_hpo_awb = TensorFlow(entry_point='train_nn_baseline.py',
                                           role=role,
                                           instance_count=1,
                                           instance_type='ml.m5.4xlarge',
                                           framework_version='2.1.0',
                                           py_version='py3',
                                           script_mode=True
                                          )

from sagemaker.tuner import HyperparameterTuner

tuner = HyperparameterTuner(tf_estimator_baseline_hpo_awb,
                           objective_metric_name,
                           hyperparameter_ranges,
                           metric_definitions,
                           max_jobs=6,
                           max_parallel_jobs=2,
                           objective_type=objective_type,
                           base_tuning_job_name='HPO-A2-MLP-NN-baseline')

In [18]:
# fit is used to train the model
tuner.fit({'training': training_input_path, 'test': test_input_path})

.................................................................................................................................................................................................................!


In [15]:
# inspect jobs with AWS
tuner = HyperparameterTuner.attach("HPO-A2-MLP-NN-baseli-220210-1139")
analytics = tuner.analytics()
df_hpo_res = analytics.dataframe()
df_hpo_res

Unnamed: 0,batch-size,epochs,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,286.0,161.0,HPO-A2-MLP-NN-baseli-220210-1139-006-deaa9c9b,Completed,0.5569,2022-02-10 11:51:48+00:00,2022-02-10 11:55:23+00:00,215.0
1,300.0,179.0,HPO-A2-MLP-NN-baseli-220210-1139-005-c9c7c985,Completed,0.5534,2022-02-10 11:51:58+00:00,2022-02-10 11:55:41+00:00,223.0
2,984.0,269.0,HPO-A2-MLP-NN-baseli-220210-1139-004-e5dc6ea7,Completed,0.5523,2022-02-10 11:46:27+00:00,2022-02-10 11:48:54+00:00,147.0
3,544.0,89.0,HPO-A2-MLP-NN-baseli-220210-1139-003-f4a3c527,Completed,0.5522,2022-02-10 11:46:13+00:00,2022-02-10 11:49:01+00:00,168.0
4,226.0,17.0,HPO-A2-MLP-NN-baseli-220210-1139-002-bde00239,Completed,0.5542,2022-02-10 11:41:57+00:00,2022-02-10 11:43:19+00:00,82.0
5,908.0,179.0,HPO-A2-MLP-NN-baseli-220210-1139-001-2cdcbae6,Completed,0.5544,2022-02-10 11:41:49+00:00,2022-02-10 11:43:51+00:00,122.0


In [16]:
# select the best job with the highest accuracy
best_job = df_hpo_res.sort_values('FinalObjectiveValue', ascending=False)[:1]

best_job

Unnamed: 0,batch-size,epochs,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,286.0,161.0,HPO-A2-MLP-NN-baseli-220210-1139-006-deaa9c9b,Completed,0.5569,2022-02-10 11:51:48+00:00,2022-02-10 11:55:23+00:00,215.0


In [17]:
'''
    train baseline model on AWS using the optimal hyperparameters
'''
# upload data to s3 bucket
prefix = 'A2_preprocessed_data'

training_input_path = sagemaker_session.upload_data(path='data/training.npz', bucket=bucket, key_prefix=prefix+'/training')

test_input_path = sagemaker_session.upload_data(path='data/test.npz', bucket=bucket, key_prefix=prefix+'/test')

print(training_input_path)
print(test_input_path)

s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/training/training.npz
s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/test/test.npz


In [18]:
baseline_optimal_epochs = int(best_job['epochs'].values[0])
baseline_optimal_batch_size = int(best_job['batch-size'].values[0])

tf_estimator_baseline_hpo_tuned_awb = TensorFlow(entry_point='train_nn_baseline.py',
                                                base_job_name='A2-MLP-NN-baseline-tuned',
                                                role=role,
                                                source_dir='.',
                                                instance_count=1, # the number of EC2 isntance to use
                                                instance_type='ml.m5.4xlarge',
                                                framework_version='2.1.0', # tensorflow version
                                                py_version='py3',
                                                script_mode=True, #enable to use python script to train the model
                                                hyperparameters={'epochs':baseline_optimal_epochs, 'batch-size':baseline_optimal_batch_size}
                                            )

In [19]:
tf_estimator_baseline_hpo_tuned_awb.fit({'training': training_input_path, 'test': test_input_path})

2022-02-10 12:05:28 Starting - Starting the training job...
2022-02-10 12:05:51 Starting - Launching requested ML instancesProfilerReport-1644494721: InProgress
.........
2022-02-10 12:07:11 Starting - Preparing the instances for training......
2022-02-10 12:08:20 Downloading - Downloading input data
2022-02-10 12:08:20 Training - Downloading the training image..[34m2022-02-10 12:08:42,879 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2022-02-10 12:08:42,886 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m

2022-02-10 12:08:52 Training - Training image download completed. Training in progress.[34m2022-02-10 12:08:57,606 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-10 12:08:57,622 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-10 12:08:57,637 sagemaker-containers INFO     No GPUs detected (normal if no gpus 

In [34]:
'''
    deploy hyperparameter tuned baseline model
'''
tf_tuned_baseline_endpoint_name = 'A2-keras-tf-tuned-baseline-nn-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

predictor_tuned_baseline_awb = tf_estimator_baseline_hpo_tuned_awb.deploy(initial_instance_count=1,
                                                                          instance_type='ml.m5.4xlarge',
                                                                          endpoint_name=tf_tuned_baseline_endpoint_name
                                                                         )

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


----!

In [20]:
'''
    apply dropout and early stopping to the baseline NN model above
'''
# train locally first
local_training_input_path = 'file://data/training.npz'
local_test_input_path = 'file://data/test.npz'
output = 'file://output'

tf_estimator_mod_local = TensorFlow(entry_point='train_nn_modified.py',
                                    role=role,
                                    source_dir='.',
                                    instance_count=1, # the number of EC2 isntance to use
                                    instance_type='local', # type of EC2 instance to use local
                                    framework_version='2.1.0', # tensorflow version
                                    py_version='py3',
                                    script_mode=True, #enable to use python script to train the model
                                    hyperparameters={'epochs':1}, # only need 1 epoch to test whether our code is working or not
                                    output_path=output
                                )

In [21]:
tf_estimator_mod_local.fit({'training': local_training_input_path, 'test': local_test_input_path})

Creating fvjp14r30t-algo-1-ion3h ... 
Creating fvjp14r30t-algo-1-ion3h ... done
Attaching to fvjp14r30t-algo-1-ion3h
[36mfvjp14r30t-algo-1-ion3h |[0m 2022-02-10 12:16:18,546 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training
[36mfvjp14r30t-algo-1-ion3h |[0m 2022-02-10 12:16:18,554 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mfvjp14r30t-algo-1-ion3h |[0m 2022-02-10 12:16:19,140 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mfvjp14r30t-algo-1-ion3h |[0m 2022-02-10 12:16:19,160 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mfvjp14r30t-algo-1-ion3h |[0m 2022-02-10 12:16:19,180 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mfvjp14r30t-algo-1-ion3h |[0m 2022-02-10 12:16:19,192 sagemaker-containers INFO     Invoking user script
[36mfvjp14r30t-algo-1-ion3h |[0m 
[36mfvjp14r30t-algo-1-ion3h |[0m Training E

In [22]:
'''
    apply dropout and early stopping to the baseline NN model above and train in AWS
'''
# upload data to s3 bucket
prefix = 'A2_preprocessed_data'

training_input_path = sagemaker_session.upload_data(path='data/training.npz', bucket=bucket, key_prefix=prefix+'/training')

test_input_path = sagemaker_session.upload_data(path='data/test.npz', bucket=bucket, key_prefix=prefix+'/test')

print(training_input_path)
print(test_input_path)

s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/training/training.npz
s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/test/test.npz


In [23]:
tf_estimator_mod_awb = TensorFlow(entry_point='train_nn_modified.py',
                                  base_job_name='A2-MLP-NN-modified',
                                  role=role,
                                  source_dir='.',
                                  instance_count=1, # the number of EC2 isntance to use
                                  instance_type='ml.m5.4xlarge',
                                  framework_version='2.1.0', # tensorflow version
                                  py_version='py3',
                                  script_mode=True, #enable to use python script to train the model
                                  hyperparameters={'epochs':30}
                                 )

In [24]:
tf_estimator_mod_awb.fit({'training': training_input_path, 'test': test_input_path})

2022-02-10 12:16:55 Starting - Starting the training job...
2022-02-10 12:17:19 Starting - Launching requested ML instancesProfilerReport-1644495409: InProgress
.........
2022-02-10 12:18:39 Starting - Preparing the instances for training......
2022-02-10 12:19:52 Downloading - Downloading input data
2022-02-10 12:19:52 Training - Downloading the training image...
2022-02-10 12:20:20 Training - Training image download completed. Training in progress.[34m2022-02-10 12:20:11,690 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2022-02-10 12:20:11,696 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-10 12:20:15,425 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-10 12:20:15,440 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-10 12:20:15,454 sagemaker-containers INFO     No GPUs detected (normal if no gpus

In [25]:
'''
    hyperparameter tuning on the modified nn model
'''
from sagemaker.tuner import IntegerParameter

hyperparameter_ranges = {
    'epochs': IntegerParameter(5, 300),
    'batch-size': IntegerParameter(32, 1024)
}

print(hyperparameter_ranges)

{'epochs': <sagemaker.parameter.IntegerParameter object at 0x7feb21709240>, 'batch-size': <sagemaker.parameter.IntegerParameter object at 0x7feb0865abe0>}


In [26]:
objective_metric_name = 'validation_accuracy' # name of the metric for evaluating training jobs

objective_type = 'Maximize'

metric_definitions = [
    {'Name': 'training_loss', 'Regex': 'loss: ([0-9\\.]+)'},
    {'Name': 'training_accuracy', 'Regex': 'accuracy: ([0-9\\.]+)'},
    {'Name': 'validation_loss', 'Regex': 'val_loss: ([0-9\\.]+)'},
    {'Name': 'validation_accuracy', 'Regex': 'val_accuracy: ([0-9\\.]+)'},
    {'Name': 'training_precision', 'Regex': 'precision: ([0-9\\.]+)'},
    {'Name': 'training_recall', 'Regex': 'recall: ([0-9\\.]+)'}
]

In [27]:
# configure a training job using the Tensorflow estimator
tf_estimator_modified_hpo_awb = TensorFlow(entry_point='train_nn_modified.py',
                                           role=role,
                                           instance_count=1,
                                           instance_type='ml.m5.4xlarge',
                                           framework_version='2.1.0',
                                           py_version='py3',
                                           script_mode=True
                                          )

from sagemaker.tuner import HyperparameterTuner

tuner_mod = HyperparameterTuner(tf_estimator_modified_hpo_awb,
                                objective_metric_name,
                                hyperparameter_ranges,
                                metric_definitions,
                                max_jobs=6,
                                max_parallel_jobs=2,
                                objective_type=objective_type,
                                base_tuning_job_name='HPO-A2-MLP-NN-modified'
                               )

In [28]:
# fit is used to train the model
tuner_mod.fit({'training': training_input_path, 'test': test_input_path})

.........................................................................................................................................................................................................................!


In [29]:
# inspect jobs with AWS
tuner_mod = HyperparameterTuner.attach("HPO-A2-MLP-NN-modifi-220210-2151")
analytics_mod = tuner_mod.analytics()
df_hpo_mod_res = analytics_mod.dataframe()
df_hpo_mod_res

Unnamed: 0,batch-size,epochs,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,734.0,288.0,HPO-A2-MLP-NN-modifi-220210-2151-006-beb9a842,Completed,0.5518,2022-02-10 22:03:45+00:00,2022-02-10 22:07:26+00:00,221.0
1,123.0,286.0,HPO-A2-MLP-NN-modifi-220210-2151-005-fd7efea0,Completed,0.5526,2022-02-10 22:04:32+00:00,2022-02-10 22:09:12+00:00,280.0
2,770.0,166.0,HPO-A2-MLP-NN-modifi-220210-2151-004-abbe6fb3,Completed,0.5506,2022-02-10 21:59:28+00:00,2022-02-10 22:01:29+00:00,121.0
3,754.0,294.0,HPO-A2-MLP-NN-modifi-220210-2151-003-f7bc1fdb,Completed,0.5578,2022-02-10 21:58:27+00:00,2022-02-10 22:01:25+00:00,178.0
4,848.0,139.0,HPO-A2-MLP-NN-modifi-220210-2151-002-d3d0ed53,Completed,0.5522,2022-02-10 21:54:07+00:00,2022-02-10 21:56:17+00:00,130.0
5,921.0,119.0,HPO-A2-MLP-NN-modifi-220210-2151-001-07e52db1,Completed,0.5518,2022-02-10 21:54:06+00:00,2022-02-10 21:56:09+00:00,123.0


In [30]:
# select the best job with the highest accuracy
best_job_mod = df_hpo_mod_res.sort_values('FinalObjectiveValue', ascending=False)[:1]

best_job_mod

Unnamed: 0,batch-size,epochs,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
3,754.0,294.0,HPO-A2-MLP-NN-modifi-220210-2151-003-f7bc1fdb,Completed,0.5578,2022-02-10 21:58:27+00:00,2022-02-10 22:01:25+00:00,178.0


In [31]:
'''
    train modified model on AWS using the optimal hyperparameters
'''
# upload data to s3 bucket
prefix = 'A2_preprocessed_data'

training_input_path = sagemaker_session.upload_data(path='data/training.npz', bucket=bucket, key_prefix=prefix+'/training')

test_input_path = sagemaker_session.upload_data(path='data/test.npz', bucket=bucket, key_prefix=prefix+'/test')

print(training_input_path)
print(test_input_path)

s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/training/training.npz
s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/test/test.npz


In [32]:
modified_optimal_epochs = int(best_job_mod['epochs'].values[0])
modified_optimal_batch_size = int(best_job_mod['batch-size'].values[0])

tf_estimator_modified_hpo_tuned_awb = TensorFlow(entry_point='train_nn_modified.py',
                                                base_job_name='A2-MLP-NN-modified-tuned',
                                                role=role,
                                                source_dir='.',
                                                instance_count=1, # the number of EC2 isntance to use
                                                instance_type='ml.m5.4xlarge',
                                                framework_version='2.1.0', # tensorflow version
                                                py_version='py3',
                                                script_mode=True, #enable to use python script to train the model
                                                hyperparameters={'epochs':modified_optimal_epochs, 'batch-size':modified_optimal_batch_size}
                                            )

In [33]:
tf_estimator_modified_hpo_tuned_awb.fit({'training': training_input_path, 'test': test_input_path})

2022-02-10 22:10:59 Starting - Starting the training job...
2022-02-10 22:11:27 Starting - Launching requested ML instancesProfilerReport-1644531053: InProgress
.....................
2022-02-10 22:14:49 Starting - Preparing the instances for training...
2022-02-10 22:15:28 Downloading - Downloading input data...
2022-02-10 22:16:00 Training - Training image download completed. Training in progress..[34m2022-02-10 22:16:04,946 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2022-02-10 22:16:04,953 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-10 22:16:21,137 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-10 22:16:21,152 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-10 22:16:21,166 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-10 22:16:21,176 sagemak

In [35]:
'''
    deploy hyperparameter tuned modified nn model
'''
tf_tuned_modified_endpoint_name = 'A2-keras-tf-tuned-modified-nn-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

predictor_tuned_modified_awb = tf_estimator_modified_hpo_tuned_awb.deploy(initial_instance_count=1,
                                                                          instance_type='ml.m5.4xlarge',
                                                                          endpoint_name=tf_tuned_modified_endpoint_name
                                                                         )

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


----!