In [None]:
# Last updated 8/6/19

# Cell 1

print('\nBegin Part 1 of 19 - Imports\n')

import datetime as dt
import keras
from keras.callbacks import EarlyStopping
from keras.layers import Dense  
from keras.layers import LSTM  
from keras.layers import Dropout 
from keras.models import Sequential, load_model 
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import statistics
import tensorflow as tf
import time

total_run_time_start_time = dt.datetime.now()
print('Start time: ' + str(dt.datetime.now()))

# Specify the GPU to use
gpu_number = str(0)

os.environ["CUDA_VISIBLE_DEVICES"]=gpu_number

print('\nFinished Part 1 of 19 - Imports.  Ready to move to next cell.')


In [None]:
# Cell 2

# Set initial variables
print('\nBegin Part 2 of 19 - Set variables\n')

# Set train_YN to Y if actual training needs to be done.  If just runnin against the test set,
# then set to N.
train_YN = 'Y'

# Set initial_path
initial_path = '/home/pace/glucose/data_files/adolescent-2160-0-Dexcom-Cozmo-Basal/'

input_train_file_name_prefix = 'adolescent#001'
input_test_file_name_prefix = 'adolescent#001'

# Determine if data should be plotted
plot_data = 'Y' # Values are Y or N

# Set processed_train_file_name and processed_test_file_name
processed_train_file_name = input_train_file_name_prefix + '-train-processed.csv'
processed_test_file_name = input_test_file_name_prefix + '-test-processed.csv'

# Set value of how many records to use depending on training set size
# If training set size is 2160 hours, use 1824 
train_record_number = 1824

# Set value of how many records to use depending on test set size
# If training set size is 2160 hours, use 396 
test_record_number = 396

# Number of epochs to run
num_epochs = 100
# Batch size
training_batch_size = 32
# Set initial loss value to 0.  This is needed for printing the graph if training is not run.
loss = 0

# Model file
model_file = input_train_file_name_prefix + '-' + str(num_epochs) + '-' + gpu_number + '.h5'

print('\nFinished Part 2 of 19 - Set variables.  Ready to move to next cell.')

In [None]:
# Cell 3 

# Open the processed files as the input files

print('\nBegin Part 6 of 19 - Open and read processed files\n')

train_df = pd.read_csv(str(initial_path + processed_train_file_name), index_col=False)
test_df = pd.read_csv(str(initial_path + processed_test_file_name), index_col=False)

print('\nFinished Part 6 of 19 - Open and read processed files.  Ready to move to next cell.')

In [None]:
# Cell 4

# Group by date
print('\nBegin Part 7 of 19 - Group by date\n')

# Group BG levels by Date and Hour in both df.  means is a Series object.
train_means = train_df['BG'].groupby([train_df['Date'], train_df['Hour']]).mean()
print('Finished grouping by date for train')

test_means = test_df['BG'].groupby([test_df['Date'], test_df['Hour']]).mean()
print('Finished grouping by date for test')

print('\nFinished Part 7 of 19 - Group by date.  Ready to move to next cell.')

In [None]:
# Cell 5

# Plot test and train data
print('\nBegin Part 8 of 19 - Plot train and test data\n')

# Plot means
if(plot_data == 'Y'):
  plt.figure(figsize=(20,12))
  train_means.plot('line', title='Training Data for Blood Glucose Over Time\nadolescent-2160-0-Dexcom-Cozmo-Basal\n' + 
                  input_train_file_name_prefix)
  plt.xlabel('Time Point')  
  plt.ylabel('Glucose Levels')
  plt.show()

  print('Beginning date for train_means')
  print(train_means[[0]])
  print('Ending date for train_means')
  print(train_means[[len(train_means) - 1]])

if(plot_data == 'Y'):
  plt.figure(figsize=(20,12))
  test_means.plot('line', color='green', title='Testing Data for Blood Glucose Over Time\nadolescent-2160-0-Dexcom-Cozmo-Basal\n' + 
                 input_test_file_name_prefix)
  plt.xlabel('Time Point')  
  plt.ylabel('Glucose Levels')
  plt.show()  
    
  print('Beginning date for test_means')
  print(test_means[[0]])
  print('Ending date for test_means')
  print(test_means[[len(test_means) - 1]])
    
print('\nFinished Part 8 of 19 - Plot train and test data.  Ready to move to next cell.')

In [None]:
# Cell 6

# Add each element of train_means to train_glucose_df
print('\nBegin Part 9 of 19 - Add elements to train_glucose_df and test_glucose_df\n')

print('Begin adding elements to train_glucose_df')   
train_glucose_df = pd.DataFrame(columns=['Date_Hour', 'Glucose_Level'])
for i in range(0, len(train_means)):
  temp_date = train_means.index[[i][0]]
  temp_date_hour = str(temp_date[0]) + ':' + str(temp_date[1])
  train_glucose_level = train_means[[i][0]]
  train_glucose_df.loc[len(train_glucose_df)] = [temp_date_hour, train_glucose_level]
print('Finished adding elements to train_glucose_df')  

print('Begin adding elements to test_glucose_df')  
test_glucose_df = pd.DataFrame(columns=['Date_Hour', 'Glucose_Level'])
for i in range(0, len(test_means)):
  temp_date = test_means.index[[i][0]]
  temp_date_hour = str(temp_date[0]) + ':' + str(temp_date[1])
  test_glucose_level = test_means[[i][0]]
  test_glucose_df.loc[len(test_glucose_df)] = [temp_date_hour, test_glucose_level]
print('Finished adding elements to test_glucose_df')    

print('\nFinished Part 9 of 19 - Add elements to train_glucose_df and test_glucose_df.  Ready to move to next cell.')

In [None]:
# Cell 7

# Begin LSTM
# Scale training set
print('\nBegin Part 10 of 19 - Scale training set\n')

glucose_training_set = train_glucose_df.iloc[:,1:2].values
# Feature scaling
scaler = MinMaxScaler(feature_range = (0,1))
glucose_training_set_scaled = scaler.fit_transform(glucose_training_set)

print('\nFinished Part 10 of 19 - Scale training set.  Ready to move to next cell.')

In [None]:
# Cell 8

# Begin prepping training set
print('\nBegin Part 11 of 19 - Prep training set\n')

features_set = []  
labels = []  

for i in range(60, train_record_number):  
    # Original
    #features_set.append(glucose_training_set_scaled[i-60:i, 0])
    # Updated (Get values from 0-59 to use as the value to predict 60)
    features_set.append(glucose_training_set_scaled[i-60:i, 0])
    labels.append(glucose_training_set_scaled[i, 0])

    
print('\nFinished Part 11 of 19 - Prep training set.  Ready to move to next cell.')

In [None]:
# Cell 9

# Reshape training set
print('\nBegin Part 12 of 19 - Reshape training set\n')

features_set, labels = np.array(features_set), np.array(labels)

features_set = np.reshape(features_set, (features_set.shape[0], features_set.shape[1], 1))

print('\nFinished Part 12 of 19 - Reshape training set.  Ready to move to next cell.')

In [None]:
# Cell 10

# Build LSTM model
# This only needs to be done if train_YN == Y
if train_YN == 'Y':
  print('\nBegin Part 13 of 19 - Build LSTM\n')

  model = Sequential() 

  model.add(LSTM(units=50, return_sequences=True, input_shape=(features_set.shape[1], 1)))  
  model.add(Dropout(0.2))  
  model.add(LSTM(units=50, return_sequences=True))  
  model.add(Dropout(0.2))

  model.add(LSTM(units=50, return_sequences=True))  
  model.add(Dropout(0.2))

  model.add(LSTM(units=50))  
  model.add(Dropout(0.2)) 

  model.add(Dense(units = 1))  

  model.compile(optimizer = 'adam', loss = 'mean_squared_error')

  print('\nFinished Part 13 of 19 - Build LSTM.  Ready to move to next cell.')

In [None]:
# Cell 11

# Create model
# This only needs to be done if train_YN == Y
if train_YN == 'Y':
    
  # Get start time for training
  training_start_time = dt.datetime.now()
    
  print('\nBegin Part 14 of 19 - Create model\n')
  from keras.callbacks import EarlyStopping

  # Patient early stopping
  es = EarlyStopping(monitor='loss', mode='min', verbose=1, patience=100)

  history = model.fit(features_set, labels, epochs = num_epochs, batch_size = training_batch_size, callbacks = [es])  

  loss = model.evaluate(features_set, labels, verbose=0)
  print('\nUnscaled Loss = ' + str(loss))
  #print('\nScaled Loss = ' + str(scaler.inverse_transform(loss)))


  #######
  # Save model
  model.save(str(initial_path + model_file))

  # Get end time for training
  training_end_time = dt.datetime.now()

  # Calculate total training time
  training_total_time = str((training_end_time-training_start_time).total_seconds()) + ' seconds.'

  print('\n\nTotal training time ' + training_total_time)

  print('\nFinished Part 14 of 19 - Create model.  Ready to move to next cell.')

In [None]:
# Cell 12

# Concat train and test to be able to do run testing
print('\nBegin Part 15 of 19 - Concat train and test\n')

glucose_total = pd.DataFrame(columns=['Glucose_Level'])

glucose_total = pd.concat((train_glucose_df['Glucose_Level'], test_glucose_df['Glucose_Level']), axis=0) 

print('\nFinished Part 15 of 19 - Concat train and test.  Ready to move to next cell.')

In [None]:
# Cell 13

#Reshape test shape
print('\nBegin Part 16 of 19 - Reshape test set\n')

test_inputs = glucose_total[len(train_glucose_df) - 60:].values 

test_inputs = test_inputs.reshape(-1,1)  
test_inputs = scaler.transform(test_inputs)

print('\nFinished Part 16 of 19 - Reshape test set.  Ready to move to next cell.')

In [None]:
# Cell 14

#Prep test set
print('\nBegin Part 17 of 19 - Prep test set\n')

test_features = []  
for i in range(60, test_record_number):  
    test_features.append(test_inputs[i-60:i, 0])
    
test_features = np.array(test_features)  
test_features = np.reshape(test_features, (test_features.shape[0], test_features.shape[1], 1))      

print('\nFinished Part 17 of 19 - Prep test set.  Ready to move to next cell.')

In [None]:
# Cell 15

#Predictions
print('\nBegin Part 18 of 19 - Predictions\n')

# Get start time for inference
inf_start_time = dt.datetime.now()

model = load_model(str(initial_path + model_file))

predictions = model.predict(test_features)  

#Inverse transform predictions
predictions = scaler.inverse_transform(predictions)

actual_predicted_difference_list = []

for i in range(0,len(predictions)):
    print('\nActual value for ' + str(test_glucose_df.loc[i,'Date_Hour']) + ' = ' 
          + str(test_glucose_df.loc[i,'Glucose_Level']))
    print('Predicted value = {0}'.format(predictions[i,0]))

    # Calculate mean square error
    actual_predicted_difference_list.append((predictions[i,0] - test_glucose_df.loc[i,'Glucose_Level']) ** 2)

mean_square = statistics.mean(actual_predicted_difference_list)
print('Mean square error = ' + str(mean_square))

# Get end time for inference
inf_end_time = dt.datetime.now()

# Calculate total inference time
inf_total_time = str((inf_end_time-inf_start_time).total_seconds()) + ' seconds.'

print('\n\nTotal inference time ' + inf_total_time)

print('\nFinished Part 18 of 19 - Predictions.  Ready to move to next cell.')

In [None]:
# Cell 16

#Plot predictions
print('\nBegin Part 19 of 19 - Plot predictions')

actual = test_glucose_df['Glucose_Level'].values
actual = actual[0:len(test_glucose_df)]



plot_data = 'Y' # Values are Y or N

print('\n\nPlotting predictions')
plt.figure(figsize=(20,12))  
plt.plot(actual, color='blue', label='Actual Glucose Levels')  
plt.plot(predictions , color='red', label='Predicted Glucose Levels')  
plt.title('Glucose Levels Prediction LSTM\nTraining File = adolescent-2160-0-Dexcom-Cozmo-Basal\n' + input_train_file_name_prefix
          + '\nTest File = adolescent-2160-0-Dexcom-Cozmo-Basal\n' + input_test_file_name_prefix + '\nTraining Loss = ' +
         str(loss) + '\nMean Square Error = ' + str(mean_square) + '\n' + str(num_epochs) + ' Epochs, ' +
         'Batch Size = ' + str(training_batch_size))  
plt.xlabel('Time Point')  
plt.ylabel('Glucose Levels')  
plt.legend()  
plt.show() 

print('\nFinished Part 19 of 19 - Plot predictions.  Ready to move to next cell.')




In [None]:
# Cell 17

total_run_time_end_time = dt.datetime.now()

print('Training Loss = ' + str(loss))
print('Mean Square Error = ' + str(mean_square))

#print('\n\nTotal training time ' + training_total_time)

print('\n\nTotal inference time ' + inf_total_time)

print('\n\nTotal run time ' + str((total_run_time_end_time-total_run_time_start_time).total_seconds()) + ' seconds.')
