In [1]:
# set up SageMaker environment
import sagemaker, boto3

sess = boto3.Session()
sm = sess.client('sagemaker')
role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session(boto_session=sess)

In [2]:
print(role)

arn:aws:iam::844357513200:role/AmazonSageMaker-FullAccessRole


In [3]:
# load packages
import time, os, sys
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from keras.utils import np_utils
from sklearn.model_selection import train_test_split

from sagemaker.tensorflow import TensorFlow

Using TensorFlow backend.


In [4]:
# load data from S3 bucket
bucket = "sagemaker-michaelwu-ma5852"
subfolder = 'src'
input_file_name = 'diabetic_data.csv'
input_file_path = f's3://{bucket}/{subfolder}/{input_file_name}'

# load data
df_raw = pd.read_csv(input_file_path)
df_raw.shape

(101766, 50)

In [5]:
# subset data with relevant predictors
# proposed predictors (features)
features = ['max_glu_serum', 'A1Cresult', 'change', 'diabetesMed',
       'metformin', 'repaglinide', 'nateglinide', 'chlorpropamide',
       'glimepiride', 'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide',
       'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol', 'troglitazone',
       'tolazamide', 'examide', 'citoglipton', 'insulin',
       'glyburide-metformin', 'glipizide-metformin',
       'glimepiride-pioglitazone', 'metformin-rosiglitazone',
       'metformin-pioglitazone']

X = df_raw[features]
y = df_raw[['readmitted']]
num_class = len(df_raw['readmitted'].unique())

In [6]:
'''
    one hot encode categorical data
    this is because all the predictors and target variable are categorical data type
'''
# prepare input data
def prepare_inputs(X):
    ohe = OneHotEncoder()
    ohe.fit(X)
    X_enc = ohe.transform(X)
    X_enc = X_enc.toarray()
    return X_enc

# prepare target
def prepare_target(y):
    le = LabelEncoder()
    le.fit(y)
    y_enc = le.transform(y)
    y_enc = np_utils.to_categorical(y_enc, num_class)
    return y_enc

X_enc = prepare_inputs(X)
y_enc = prepare_target(y)

  return f(*args, **kwargs)


In [7]:
'''
    train test split
'''
X_train, X_test, y_train, y_test = train_test_split(X_enc, y_enc, test_size=0.2, random_state=1234) # 80/20 split

In [8]:
# create local directory for data and save the training and test data there
os.makedirs("./data", exist_ok=True)
os.makedirs("./output", exist_ok=True)
np.savez('./data/training', feature=X_train, target=y_train)
np.savez('./data/test', feature=X_test, target=y_test)

In [9]:
'''
    train model locally
'''
# set environment variables
local_training_input_path = 'file://data/training.npz'
local_test_input_path = 'file://data/test.npz'
output = 'file://output'

tf_estimator_local = TensorFlow(entry_point='train_nn_baseline.py',
                         role=role,
                         source_dir='.',
                         instance_count=1, # the number of EC2 isntance to use
                         instance_type='local', # type of EC2 instance to use local
                         framework_version='2.1.0', # tensorflow version
                         py_version='py3',
                         script_mode=True, #enable to use python script to train the model
                         hyperparameters={'epochs':1}, # only need 1 epoch to test whether our code is working or not
                         output_path=output)

In [10]:
# train model locally to see whether the code is working or not
tf_estimator_local.fit({'training': local_training_input_path, 'test': local_test_input_path})

Creating cw1a47kojv-algo-1-qylbk ... 
Creating cw1a47kojv-algo-1-qylbk ... done
Attaching to cw1a47kojv-algo-1-qylbk
[36mcw1a47kojv-algo-1-qylbk |[0m 2022-02-06 09:22:43,759 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training
[36mcw1a47kojv-algo-1-qylbk |[0m 2022-02-06 09:22:43,766 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mcw1a47kojv-algo-1-qylbk |[0m 2022-02-06 09:22:44,275 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mcw1a47kojv-algo-1-qylbk |[0m 2022-02-06 09:22:44,295 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mcw1a47kojv-algo-1-qylbk |[0m 2022-02-06 09:22:44,313 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mcw1a47kojv-algo-1-qylbk |[0m 2022-02-06 09:22:44,325 sagemaker-containers INFO     Invoking user script
[36mcw1a47kojv-algo-1-qylbk |[0m 
[36mcw1a47kojv-algo-1-qylbk |[0m Training E

In [11]:
'''
    train model on AWS
'''
# upload data to s3 bucket
prefix = 'A2_preprocessed_data'

training_input_path = sagemaker_session.upload_data(path='data/training.npz', bucket=bucket, key_prefix=prefix+'/training')

test_input_path = sagemaker_session.upload_data(path='data/test.npz', bucket=bucket, key_prefix=prefix+'/test')

print(training_input_path)
print(test_input_path)

s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/training/training.npz
s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/test/test.npz


In [12]:
tf_estimator_baseline_awb = TensorFlow(entry_point='train_nn_baseline.py',
                                       base_job_name='A2-MLP-NN-baseline',
                                       role=role,
                                       source_dir='.',
                                       instance_count=1, # the number of EC2 isntance to use
                                       instance_type='ml.m5.4xlarge',
                                       framework_version='2.1.0', # tensorflow version
                                       py_version='py3',
                                       script_mode=True, #enable to use python script to train the model
                                       hyperparameters={'epochs':30}
                                      )

In [13]:
tf_estimator_baseline_awb.fit({'training': training_input_path, 'test': test_input_path})

2022-02-06 09:23:25 Starting - Starting the training job...
2022-02-06 09:23:51 Starting - Launching requested ML instancesProfilerReport-1644139399: InProgress
......
2022-02-06 09:24:48 Starting - Preparing the instances for training......
2022-02-06 09:25:54 Downloading - Downloading input data...
2022-02-06 09:26:22 Training - Training image download completed. Training in progress..[34m2022-02-06 09:26:26,016 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2022-02-06 09:26:26,022 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 09:26:26,599 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 09:26:26,614 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 09:26:26,628 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 09:26:26,638 sagemaker-container

In [14]:
'''
    hyperparameter tuning on the baseline nn model
'''
from sagemaker.tuner import IntegerParameter

hyperparameter_ranges = {
    'epochs': IntegerParameter(5, 300),
    'batch-size': IntegerParameter(32, 1024)
}

print(hyperparameter_ranges)

{'epochs': <sagemaker.parameter.IntegerParameter object at 0x7f01e0b78a20>, 'batch-size': <sagemaker.parameter.IntegerParameter object at 0x7f01e0b78b00>}


In [15]:
objective_metric_name = 'validation_accuracy' # name of the metric for evaluating training jobs

objective_type = 'Maximize'

metric_definitions = [
    {'Name': 'training_loss', 'Regex': 'loss: ([0-9\\.]+)'},
    {'Name': 'training_accuracy', 'Regex': 'accuracy: ([0-9\\.]+)'},
    {'Name': 'validation_loss', 'Regex': 'val_loss: ([0-9\\.]+)'},
    {'Name': 'validation_accuracy', 'Regex': 'val_accuracy: ([0-9\\.]+)'},
    {'Name': 'training_precision', 'Regex': 'precision: ([0-9\\.]+)'},
    {'Name': 'training_recall', 'Regex': 'recall: ([0-9\\.]+)'}
]

In [16]:
# configure a training job using the Tensorflow estimator
tf_estimator_baseline_hpo_awb = TensorFlow(entry_point='train_nn_baseline.py',
                                           role=role,
                                           instance_count=1,
                                           instance_type='ml.m5.4xlarge',
                                           framework_version='2.1.0',
                                           py_version='py3',
                                           script_mode=True
                                          )

from sagemaker.tuner import HyperparameterTuner

tuner = HyperparameterTuner(tf_estimator_baseline_hpo_awb,
                           objective_metric_name,
                           hyperparameter_ranges,
                           metric_definitions,
                           max_jobs=6,
                           max_parallel_jobs=2,
                           objective_type=objective_type,
                           base_tuning_job_name='HPO-A2-MLP-NN-baseline')

In [17]:
# fit is used to train the model
tuner.fit({'training': training_input_path, 'test': test_input_path})

....................................................................................................................................................................................................................................................!


In [18]:
# inspect jobs with AWS
tuner = HyperparameterTuner.attach("HPO-A2-MLP-NN-baseli-220206-0930")
analytics = tuner.analytics()
df_hpo_res = analytics.dataframe()
df_hpo_res

Unnamed: 0,batch-size,epochs,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,216.0,288.0,HPO-A2-MLP-NN-baseli-220206-0930-006-e524b4f0,Completed,0.5305,2022-02-06 09:43:07+00:00,2022-02-06 09:50:55+00:00,468.0
1,105.0,8.0,HPO-A2-MLP-NN-baseli-220206-0930-005-cd7c64d3,Completed,0.5344,2022-02-06 09:42:26+00:00,2022-02-06 09:43:49+00:00,83.0
2,205.0,17.0,HPO-A2-MLP-NN-baseli-220206-0930-004-a127eae4,Completed,0.538,2022-02-06 09:38:15+00:00,2022-02-06 09:39:41+00:00,86.0
3,726.0,269.0,HPO-A2-MLP-NN-baseli-220206-0930-003-faeaf453,Completed,0.5371,2022-02-06 09:37:36+00:00,2022-02-06 09:40:37+00:00,181.0
4,996.0,273.0,HPO-A2-MLP-NN-baseli-220206-0930-002-a69722c9,Completed,0.5297,2022-02-06 09:33:16+00:00,2022-02-06 09:35:43+00:00,147.0
5,750.0,71.0,HPO-A2-MLP-NN-baseli-220206-0930-001-47fd4290,Completed,0.5361,2022-02-06 09:33:04+00:00,2022-02-06 09:34:48+00:00,104.0


In [19]:
# select the best job with the highest accuracy
best_job = df_hpo_res.sort_values('FinalObjectiveValue', ascending=False)[:1]

best_job

Unnamed: 0,batch-size,epochs,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
2,205.0,17.0,HPO-A2-MLP-NN-baseli-220206-0930-004-a127eae4,Completed,0.538,2022-02-06 09:38:15+00:00,2022-02-06 09:39:41+00:00,86.0


In [20]:
'''
    train baseline model on AWS using the optimal hyperparameters
'''
# upload data to s3 bucket
prefix = 'A2_preprocessed_data'

training_input_path = sagemaker_session.upload_data(path='data/training.npz', bucket=bucket, key_prefix=prefix+'/training')

test_input_path = sagemaker_session.upload_data(path='data/test.npz', bucket=bucket, key_prefix=prefix+'/test')

print(training_input_path)
print(test_input_path)

s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/training/training.npz
s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/test/test.npz


In [41]:
baseline_optimal_epochs = int(best_job['epochs'].values[0])
baseline_optimal_batch_size = int(best_job['batch-size'].values[0])

tf_estimator_baseline_hpo_tuned_awb = TensorFlow(entry_point='train_nn_baseline.py',
                                                base_job_name='A2-MLP-NN-baseline-tuned',
                                                role=role,
                                                source_dir='.',
                                                instance_count=1, # the number of EC2 isntance to use
                                                instance_type='ml.m5.4xlarge',
                                                framework_version='2.1.0', # tensorflow version
                                                py_version='py3',
                                                script_mode=True, #enable to use python script to train the model
                                                hyperparameters={'epochs':baseline_optimal_epochs, 'batch-size':baseline_optimal_batch_size}
                                            )

In [42]:
tf_estimator_baseline_hpo_tuned_awb.fit({'training': training_input_path, 'test': test_input_path})

2022-02-06 10:01:21 Starting - Starting the training job...
2022-02-06 10:01:44 Starting - Launching requested ML instancesProfilerReport-1644141674: InProgress
.........
2022-02-06 10:03:05 Starting - Preparing the instances for training.........
2022-02-06 10:04:52 Downloading - Downloading input data
2022-02-06 10:04:52 Training - Training image download completed. Training in progress...[34m2022-02-06 10:04:57,005 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2022-02-06 10:04:57,011 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 10:05:11,405 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 10:05:11,420 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 10:05:11,435 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 10:05:11,445 sagemaker-conta

In [43]:
'''
    deploy hyperparameter tuned baseline model
'''
tf_tuned_baseline_endpoint_name = 'A2-keras-tf-tuned-baseline-nn-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

predictor_tuned_baseline_awb = tf_estimator_baseline_hpo_tuned_awb.deploy(initial_instance_count=1,
                                                                          instance_type='ml.m5.4xlarge',
                                                                          endpoint_name=tf_tuned_baseline_endpoint_name
                                                                         )

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


------!

In [44]:
'''
    apply dropout and early stopping to the baseline NN model above
'''
# train locally first
local_training_input_path = 'file://data/training.npz'
local_test_input_path = 'file://data/test.npz'
output = 'file://output'

tf_estimator_mod_local = TensorFlow(entry_point='train_nn_modified.py',
                                    role=role,
                                    source_dir='.',
                                    instance_count=1, # the number of EC2 isntance to use
                                    instance_type='local', # type of EC2 instance to use local
                                    framework_version='2.1.0', # tensorflow version
                                    py_version='py3',
                                    script_mode=True, #enable to use python script to train the model
                                    hyperparameters={'epochs':1}, # only need 1 epoch to test whether our code is working or not
                                    output_path=output
                                )

In [45]:
tf_estimator_mod_local.fit({'training': local_training_input_path, 'test': local_test_input_path})

Creating ocuuwzzbii-algo-1-fq9qa ... 
Creating ocuuwzzbii-algo-1-fq9qa ... done
Attaching to ocuuwzzbii-algo-1-fq9qa
[36mocuuwzzbii-algo-1-fq9qa |[0m 2022-02-06 10:13:18,251 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training
[36mocuuwzzbii-algo-1-fq9qa |[0m 2022-02-06 10:13:18,259 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mocuuwzzbii-algo-1-fq9qa |[0m 2022-02-06 10:13:18,764 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mocuuwzzbii-algo-1-fq9qa |[0m 2022-02-06 10:13:18,784 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mocuuwzzbii-algo-1-fq9qa |[0m 2022-02-06 10:13:18,803 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36mocuuwzzbii-algo-1-fq9qa |[0m 2022-02-06 10:13:18,815 sagemaker-containers INFO     Invoking user script
[36mocuuwzzbii-algo-1-fq9qa |[0m 
[36mocuuwzzbii-algo-1-fq9qa |[0m Training E

In [11]:
'''
    apply dropout and early stopping to the baseline NN model above and train in AWS
'''
# upload data to s3 bucket
prefix = 'A2_preprocessed_data'

training_input_path = sagemaker_session.upload_data(path='data/training.npz', bucket=bucket, key_prefix=prefix+'/training')

test_input_path = sagemaker_session.upload_data(path='data/test.npz', bucket=bucket, key_prefix=prefix+'/test')

print(training_input_path)
print(test_input_path)

s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/training/training.npz
s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/test/test.npz


In [47]:
tf_estimator_mod_awb = TensorFlow(entry_point='train_nn_modified.py',
                                  base_job_name='A2-MLP-NN-modified',
                                  role=role,
                                  source_dir='.',
                                  instance_count=1, # the number of EC2 isntance to use
                                  instance_type='ml.m5.4xlarge',
                                  framework_version='2.1.0', # tensorflow version
                                  py_version='py3',
                                  script_mode=True, #enable to use python script to train the model
                                  hyperparameters={'epochs':30}
                                 )

In [48]:
tf_estimator_mod_awb.fit({'training': training_input_path, 'test': test_input_path})

2022-02-06 10:17:08 Starting - Starting the training job...
2022-02-06 10:17:31 Starting - Launching requested ML instancesProfilerReport-1644142621: InProgress
...
2022-02-06 10:18:06 Starting - Preparing the instances for training.........
2022-02-06 10:19:35 Downloading - Downloading input data...
2022-02-06 10:19:52 Training - Downloading the training image..[34m2022-02-06 10:20:14,478 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2022-02-06 10:20:14,485 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 10:20:15,187 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 10:20:15,202 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 10:20:15,216 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 10:20:15,226 sagemaker-containers INFO     Invoking user 

In [9]:
'''
    hyperparameter tuning on the modified nn model
'''
from sagemaker.tuner import IntegerParameter

hyperparameter_ranges = {
    'epochs': IntegerParameter(5, 300),
    'batch-size': IntegerParameter(32, 1024)
}

print(hyperparameter_ranges)

{'epochs': <sagemaker.parameter.IntegerParameter object at 0x7fea9d3a4ac8>, 'batch-size': <sagemaker.parameter.IntegerParameter object at 0x7fea9d3a4b38>}


In [10]:
objective_metric_name = 'validation_accuracy' # name of the metric for evaluating training jobs

objective_type = 'Maximize'

metric_definitions = [
    {'Name': 'training_loss', 'Regex': 'loss: ([0-9\\.]+)'},
    {'Name': 'training_accuracy', 'Regex': 'accuracy: ([0-9\\.]+)'},
    {'Name': 'validation_loss', 'Regex': 'val_loss: ([0-9\\.]+)'},
    {'Name': 'validation_accuracy', 'Regex': 'val_accuracy: ([0-9\\.]+)'},
    {'Name': 'training_precision', 'Regex': 'precision: ([0-9\\.]+)'},
    {'Name': 'training_recall', 'Regex': 'recall: ([0-9\\.]+)'}
]

In [12]:
# configure a training job using the Tensorflow estimator
tf_estimator_modified_hpo_awb = TensorFlow(entry_point='train_nn_modified.py',
                                           role=role,
                                           instance_count=1,
                                           instance_type='ml.m5.4xlarge',
                                           framework_version='2.1.0',
                                           py_version='py3',
                                           script_mode=True
                                          )

from sagemaker.tuner import HyperparameterTuner

tuner_mod = HyperparameterTuner(tf_estimator_modified_hpo_awb,
                                objective_metric_name,
                                hyperparameter_ranges,
                                metric_definitions,
                                max_jobs=6,
                                max_parallel_jobs=2,
                                objective_type=objective_type,
                                base_tuning_job_name='HPO-A2-MLP-NN-modified'
                               )

In [14]:
# fit is used to train the model
tuner_mod.fit({'training': training_input_path, 'test': test_input_path})

.........................................................................................................................................................................................................!


In [15]:
# inspect jobs with AWS
tuner_mod = HyperparameterTuner.attach("HPO-A2-MLP-NN-modifi-220206-1034")
analytics_mod = tuner_mod.analytics()
df_hpo_mod_res = analytics_mod.dataframe()
df_hpo_mod_res

Unnamed: 0,batch-size,epochs,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,994.0,286.0,HPO-A2-MLP-NN-modifi-220206-1034-006-1e51e533,Completed,0.5373,2022-02-06 10:48:51+00:00,2022-02-06 10:50:36+00:00,105.0
1,1023.0,137.0,HPO-A2-MLP-NN-modifi-220206-1034-005-c964b6e3,Completed,0.5365,2022-02-06 10:47:39+00:00,2022-02-06 10:50:11+00:00,152.0
2,417.0,292.0,HPO-A2-MLP-NN-modifi-220206-1034-004-0e3694e3,Completed,0.5378,2022-02-06 10:42:33+00:00,2022-02-06 10:45:13+00:00,160.0
3,73.0,66.0,HPO-A2-MLP-NN-modifi-220206-1034-003-0fb2098f,Completed,0.5365,2022-02-06 10:41:50+00:00,2022-02-06 10:46:22+00:00,272.0
4,528.0,213.0,HPO-A2-MLP-NN-modifi-220206-1034-002-3d54a165,Completed,0.5377,2022-02-06 10:36:38+00:00,2022-02-06 10:39:48+00:00,190.0
5,574.0,6.0,HPO-A2-MLP-NN-modifi-220206-1034-001-b0c5f613,Completed,0.5366,2022-02-06 10:36:31+00:00,2022-02-06 10:38:53+00:00,142.0


In [16]:
# select the best job with the highest accuracy
best_job_mod = df_hpo_mod_res.sort_values('FinalObjectiveValue', ascending=False)[:1]

best_job_mod

Unnamed: 0,batch-size,epochs,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
2,417.0,292.0,HPO-A2-MLP-NN-modifi-220206-1034-004-0e3694e3,Completed,0.5378,2022-02-06 10:42:33+00:00,2022-02-06 10:45:13+00:00,160.0


In [17]:
'''
    train modified model on AWS using the optimal hyperparameters
'''
# upload data to s3 bucket
prefix = 'A2_preprocessed_data'

training_input_path = sagemaker_session.upload_data(path='data/training.npz', bucket=bucket, key_prefix=prefix+'/training')

test_input_path = sagemaker_session.upload_data(path='data/test.npz', bucket=bucket, key_prefix=prefix+'/test')

print(training_input_path)
print(test_input_path)

s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/training/training.npz
s3://sagemaker-michaelwu-ma5852/A2_preprocessed_data/test/test.npz


In [21]:
modified_optimal_epochs = int(best_job_mod['epochs'].values[0])
modified_optimal_batch_size = int(best_job_mod['batch-size'].values[0])

tf_estimator_modified_hpo_tuned_awb = TensorFlow(entry_point='train_nn_modified.py',
                                                base_job_name='A2-MLP-NN-modified-tuned',
                                                role=role,
                                                source_dir='.',
                                                instance_count=1, # the number of EC2 isntance to use
                                                instance_type='ml.m5.4xlarge',
                                                framework_version='2.1.0', # tensorflow version
                                                py_version='py3',
                                                script_mode=True, #enable to use python script to train the model
                                                hyperparameters={'epochs':modified_optimal_epochs, 'batch-size':modified_optimal_batch_size}
                                            )

In [22]:
tf_estimator_modified_hpo_tuned_awb.fit({'training': training_input_path, 'test': test_input_path})

2022-02-06 10:53:24 Starting - Starting the training job...
2022-02-06 10:53:26 Starting - Launching requested ML instancesProfilerReport-1644144798: InProgress
...
2022-02-06 10:54:24 Starting - Preparing the instances for training.........
2022-02-06 10:55:53 Downloading - Downloading input data...
2022-02-06 10:56:21 Training - Training image download completed. Training in progress..[34m2022-02-06 10:56:26,425 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2022-02-06 10:56:26,431 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 10:56:38,917 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 10:56:38,932 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 10:56:38,946 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-02-06 10:56:38,955 sagemaker-container

In [25]:
'''
    deploy hyperparameter tuned modified nn model
'''
tf_tuned_modified_endpoint_name = 'A2-keras-tf-tuned-modified-nn-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

predictor_tuned_modified_awb = tf_estimator_modified_hpo_tuned_awb.deploy(initial_instance_count=1,
                                                                          instance_type='ml.m5.4xlarge',
                                                                          endpoint_name=tf_tuned_modified_endpoint_name
                                                                         )

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


-----!