In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras import backend as K
import numpy as np
import tensorflow.keras as keras

# Only needed when running the code on Colab
#from google.colab import files
#import io

  from ._conv import register_converters as _register_converters


In [2]:
# Only Colab
#train1_csv = files.upload()

In [3]:
# Only Colab
#dev1_csv = files.upload()

In [4]:
# Only Colab
#test1_normalized = files.upload()

In [5]:
SEQUENCE_LEN = 5
TRAIN_BATCH_SIZE = 50
DEV_BATCH_SIZE = 15
TEST_BATCH_SIZE = 10
NR_EPOCHS = 200
NR_TRAIN_EXAMPLES = 140
NR_DEV_EXAMPLES = 40
NR_TEST_EXAMPLES = 20

# Default values for ech column in case data is missing in the csv file
RECORD_DEFAULTS = [[0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]]

# Following paths need to be updated when running on Colab

train_path1 = '../dataset/train1_normalized.csv'
TRAIN_FILE_PATHS = [train_path1]

dev_path1 = '../dataset/dev1_normalized.csv'
DEV_FILE_PATHS = [dev_path1]

test_path1 = '../dataset/test1_normalized.csv'
TEST_FILE_PATHS = [test_path1]



In [6]:
def decode_csv(line):
   parsed_line = tf.decode_csv(line, RECORD_DEFAULTS)
   label =  parsed_line[-1]      # label is the last element of the list
   del parsed_line[-1]           # delete the last element from the list
   del parsed_line[0]            # even delete the first element bcz it is assumed NOT to be a feature
   features = tf.stack(parsed_line)  # Stack features so that you can later vectorize forward prop., etc.
   #label = tf.stack(label)          #NOT needed. Only if more than 1 column makes the label...
   return features, label

In [7]:

def data_generator(file_path_list, batch_size):

  filenames = tf.placeholder(tf.string, shape=[None])
  dataset = tf.data.Dataset.from_tensor_slices(filenames)
  dataset = dataset.flat_map(lambda filename: tf.data.TextLineDataset(filename).skip(1).map(decode_csv))
  dataset = dataset.shuffle(buffer_size=1000)
  dataset = dataset.batch(batch_size)
  iterator = dataset.make_initializable_iterator()
  next_element = iterator.get_next()

  with tf.Session() as sess:
      while True:
          #for i in range (NR_EPOCHS):
          #print('\nepoch: ', i)
          sess.run(iterator.initializer, feed_dict={filenames: file_path_list})
          while True:            
              try:
                #print('\nbreak\n')
                batch_data, batch_labels = sess.run(next_element)
                # Dimension of the data needs to be: (batch_size, length_of_each_sequence, nr_inputs_in_each_timestep)
                # Since the last batch in a epoch can have a different size,
                # "batch_data.shape[0]" is used instead of batch_size
                batch_data = np.reshape(batch_data, (batch_data.shape[0], SEQUENCE_LEN, 1))
                #print(batch_data)
                #print(batch_data.shape)
                #print('\n', batch_labels)
                #print(batch_labels.shape)
                #print(batch_labels)
              except tf.errors.OutOfRangeError:
                break
              yield (batch_data, batch_labels)

In [8]:
# For testing generator

#next_batch = train_data_generator()
#for i in range (6):
#  print(next(next_batch)[0].shape)

In [9]:
# Build the RNN LSTM model
lstm_model = Sequential()
lstm_model.add(LSTM(5, input_shape=(SEQUENCE_LEN, 1), return_sequences=True))
lstm_model.add(LSTM(5, input_shape=(SEQUENCE_LEN, 1), return_sequences=False))
lstm_model.add(Dense(1))

opt = tf.keras.optimizers.Adam(lr=0.001, decay=0.0009)

lstm_model.compile(loss='mean_absolute_error', optimizer=opt, metrics=['accuracy'])

In [10]:
# Define callbacks to be used in training

# Terminate if a NaN loss is encountered
terminate_on_nan_loss = keras.callbacks.TerminateOnNaN()

# Save the model as configured by the parameters
# If desired to save multiple models:   #filepath="./saved_models/lstm_model-{epoch:02d}-{val_acc:.5f}.hdf5", 
checkpoint = keras.callbacks.ModelCheckpoint(filepath="./saved_models/lstm_model.hdf5",
                                             monitor='val_loss', 
                                             verbose=1, 
                                             save_best_only=True, 
                                             save_weights_only=False, 
                                             mode='auto', 
                                             period=1)

# Stop training when a monitored quantity has stopped improving.
early_stopper = keras.callbacks.EarlyStopping(monitor='val_loss', 
                                              min_delta=0.0001, 
                                              patience=50, 
                                              verbose=1, 
                                              mode='auto', 
                                              baseline=None)

# Start tensorboard on the command line with:  tensorboard --logdir=/full_path_to_your_logs
tesorboard = keras.callbacks.TensorBoard(log_dir='./logs', 
                                         histogram_freq=5,    # frequency in terms of # of epochs
                                         batch_size=10, 
                                         write_graph=True, 
                                         write_grads=True,    # gradient histograms
                                         write_images=True) #,   # model weights 
                                         #embeddings_freq=5,   
                                         #embeddings_layer_names=None, 
                                         #embeddings_metadata=None,
                                         #embeddings_data=None)

# For example, let's say cooldown=5. After the learning rate is reduced, the algorithm waits 5
# epochs before starting to monitor the metrics again. So if there is no improvement in the metric
# and patience=20, the learning rate will be reduced again after 25 epochs.
reduce_lr_on_plateau = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', 
                                                         factor=0.1, 
                                                         patience=25, 
                                                         verbose=1, 
                                                         mode='auto', 
                                                         min_delta=0.0001, 
                                                         cooldown=5,  # do not monitor metrics this many epochs after reducing lr 
                                                         min_lr=0.000001)

In [11]:
# Train the model

train_data_generator = data_generator(TRAIN_FILE_PATHS, TRAIN_BATCH_SIZE)
dev_data_generator   = data_generator(DEV_FILE_PATHS,   DEV_BATCH_SIZE)

lstm_model.fit_generator(train_data_generator,
                         steps_per_epoch=(NR_TRAIN_EXAMPLES // TRAIN_BATCH_SIZE),
                         epochs=NR_EPOCHS, 
                         verbose=1,
                         validation_data=dev_data_generator,
                         validation_steps=(NR_DEV_EXAMPLES // DEV_BATCH_SIZE),
                         callbacks=[terminate_on_nan_loss, checkpoint, early_stopper,
                                    tesorboard, reduce_lr_on_plateau])

Epoch 1/200
Epoch 00001: val_loss improved from inf to 0.48742, saving model to ./saved_models/lstm_model.hdf5
Epoch 2/200
Epoch 00002: val_loss improved from 0.48742 to 0.44664, saving model to ./saved_models/lstm_model.hdf5
Epoch 3/200
Epoch 00003: val_loss improved from 0.44664 to 0.44092, saving model to ./saved_models/lstm_model.hdf5
Epoch 4/200
Epoch 00004: val_loss did not improve from 0.44092
Epoch 5/200
Epoch 00005: val_loss did not improve from 0.44092
Epoch 6/200
Epoch 00006: val_loss improved from 0.44092 to 0.40722, saving model to ./saved_models/lstm_model.hdf5
Epoch 7/200
Epoch 00007: val_loss did not improve from 0.40722
Epoch 8/200
Epoch 00008: val_loss improved from 0.40722 to 0.39827, saving model to ./saved_models/lstm_model.hdf5
Epoch 9/200
Epoch 00009: val_loss did not improve from 0.39827
Epoch 10/200
Epoch 00010: val_loss did not improve from 0.39827
Epoch 11/200
Epoch 00011: val_loss did not improve from 0.39827
Epoch 12/200
Epoch 00012: val_loss did not improv

Epoch 00029: val_loss did not improve from 0.23454
Epoch 30/200
Epoch 00030: val_loss improved from 0.23454 to 0.23098, saving model to ./saved_models/lstm_model.hdf5
Epoch 31/200
Epoch 00031: val_loss did not improve from 0.23098
Epoch 32/200
Epoch 00032: val_loss did not improve from 0.23098
Epoch 33/200
Epoch 00033: val_loss improved from 0.23098 to 0.22557, saving model to ./saved_models/lstm_model.hdf5
Epoch 34/200
Epoch 00034: val_loss did not improve from 0.22557
Epoch 35/200
Epoch 00035: val_loss did not improve from 0.22557
Epoch 36/200
Epoch 00036: val_loss did not improve from 0.22557
Epoch 37/200
Epoch 00037: val_loss did not improve from 0.22557
Epoch 38/200
Epoch 00038: val_loss improved from 0.22557 to 0.20922, saving model to ./saved_models/lstm_model.hdf5
Epoch 39/200
Epoch 00039: val_loss improved from 0.20922 to 0.20658, saving model to ./saved_models/lstm_model.hdf5
Epoch 40/200
Epoch 00040: val_loss did not improve from 0.20658
Epoch 41/200
Epoch 00041: val_loss di

Epoch 00058: val_loss did not improve from 0.16354
Epoch 59/200
Epoch 00059: val_loss did not improve from 0.16354
Epoch 60/200
Epoch 00060: val_loss did not improve from 0.16354
Epoch 61/200
Epoch 00061: val_loss did not improve from 0.16354
Epoch 62/200
Epoch 00062: val_loss did not improve from 0.16354
Epoch 63/200
Epoch 00063: val_loss improved from 0.16354 to 0.16266, saving model to ./saved_models/lstm_model.hdf5
Epoch 64/200
Epoch 00064: val_loss improved from 0.16266 to 0.14468, saving model to ./saved_models/lstm_model.hdf5
Epoch 65/200
Epoch 00065: val_loss did not improve from 0.14468
Epoch 66/200
Epoch 00066: val_loss did not improve from 0.14468
Epoch 67/200
Epoch 00067: val_loss did not improve from 0.14468
Epoch 68/200
Epoch 00068: val_loss improved from 0.14468 to 0.14417, saving model to ./saved_models/lstm_model.hdf5
Epoch 69/200
Epoch 00069: val_loss did not improve from 0.14417
Epoch 70/200
Epoch 00070: val_loss did not improve from 0.14417
Epoch 71/200
Epoch 00071:

Epoch 00087: val_loss improved from 0.08489 to 0.08182, saving model to ./saved_models/lstm_model.hdf5
Epoch 88/200
Epoch 00088: val_loss did not improve from 0.08182
Epoch 89/200
Epoch 00089: val_loss improved from 0.08182 to 0.07755, saving model to ./saved_models/lstm_model.hdf5
Epoch 90/200
Epoch 00090: val_loss improved from 0.07755 to 0.07723, saving model to ./saved_models/lstm_model.hdf5
Epoch 91/200
Epoch 00091: val_loss improved from 0.07723 to 0.06507, saving model to ./saved_models/lstm_model.hdf5
Epoch 92/200
Epoch 00092: val_loss did not improve from 0.06507
Epoch 93/200
Epoch 00093: val_loss improved from 0.06507 to 0.05882, saving model to ./saved_models/lstm_model.hdf5
Epoch 94/200
Epoch 00094: val_loss did not improve from 0.05882
Epoch 95/200
Epoch 00095: val_loss improved from 0.05882 to 0.05609, saving model to ./saved_models/lstm_model.hdf5
Epoch 96/200
Epoch 00096: val_loss improved from 0.05609 to 0.04670, saving model to ./saved_models/lstm_model.hdf5
Epoch 97/

Epoch 00115: val_loss improved from 0.01341 to 0.01077, saving model to ./saved_models/lstm_model.hdf5
Epoch 116/200
Epoch 00116: val_loss did not improve from 0.01077
Epoch 117/200
Epoch 00117: val_loss did not improve from 0.01077
Epoch 118/200
Epoch 00118: val_loss did not improve from 0.01077
Epoch 119/200
Epoch 00119: val_loss did not improve from 0.01077
Epoch 120/200
Epoch 00120: val_loss did not improve from 0.01077
Epoch 121/200
Epoch 00121: val_loss did not improve from 0.01077
Epoch 122/200
Epoch 00122: val_loss did not improve from 0.01077
Epoch 123/200
Epoch 00123: val_loss did not improve from 0.01077
Epoch 124/200
Epoch 00124: val_loss did not improve from 0.01077
Epoch 125/200
Epoch 00125: val_loss did not improve from 0.01077
Epoch 126/200
Epoch 00126: val_loss did not improve from 0.01077
Epoch 127/200
Epoch 00127: val_loss did not improve from 0.01077
Epoch 128/200
Epoch 00128: val_loss did not improve from 0.01077
Epoch 129/200
Epoch 00129: val_loss did not improve 

Epoch 00145: val_loss did not improve from 0.01077
Epoch 146/200
Epoch 00146: val_loss did not improve from 0.01077
Epoch 147/200
Epoch 00147: val_loss did not improve from 0.01077
Epoch 148/200
Epoch 00148: val_loss did not improve from 0.01077
Epoch 149/200
Epoch 00149: val_loss did not improve from 0.01077
Epoch 150/200
Epoch 00150: val_loss did not improve from 0.01077
Epoch 151/200
Epoch 00151: val_loss did not improve from 0.01077
Epoch 152/200
Epoch 00152: val_loss did not improve from 0.01077
Epoch 153/200
Epoch 00153: val_loss did not improve from 0.01077
Epoch 154/200
Epoch 00154: val_loss did not improve from 0.01077
Epoch 155/200
Epoch 00155: val_loss did not improve from 0.01077
Epoch 156/200
Epoch 00156: val_loss did not improve from 0.01077
Epoch 157/200
Epoch 00157: val_loss did not improve from 0.01077
Epoch 158/200
Epoch 00158: val_loss did not improve from 0.01077
Epoch 159/200
Epoch 00159: val_loss did not improve from 0.01077
Epoch 160/200
Epoch 00160: val_loss did

Epoch 176/200
Epoch 00176: val_loss did not improve from 0.00921
Epoch 177/200
Epoch 00177: val_loss did not improve from 0.00921
Epoch 178/200
Epoch 00178: val_loss did not improve from 0.00921
Epoch 179/200
Epoch 00179: val_loss did not improve from 0.00921
Epoch 180/200
Epoch 00180: val_loss did not improve from 0.00921
Epoch 181/200
Epoch 00181: val_loss did not improve from 0.00921
Epoch 182/200
Epoch 00182: val_loss did not improve from 0.00921
Epoch 183/200
Epoch 00183: val_loss did not improve from 0.00921
Epoch 184/200
Epoch 00184: val_loss did not improve from 0.00921
Epoch 185/200
Epoch 00185: val_loss did not improve from 0.00921
Epoch 186/200
Epoch 00186: val_loss did not improve from 0.00921
Epoch 187/200
Epoch 00187: val_loss did not improve from 0.00921
Epoch 188/200
Epoch 00188: val_loss did not improve from 0.00921
Epoch 189/200
Epoch 00189: val_loss did not improve from 0.00921
Epoch 190/200
Epoch 00190: val_loss did not improve from 0.00921

Epoch 00190: ReduceLROnP

<tensorflow.python.keras.callbacks.History at 0x7fdc1b30eba8>

In [12]:
# Evaluate the test set 

test_eval_data_generator = data_generator(TEST_FILE_PATHS, TEST_BATCH_SIZE)
lstm_model.evaluate_generator(test_eval_data_generator, 
                              steps=(NR_TEST_EXAMPLES // TEST_BATCH_SIZE), 
                              verbose=1)



[0.011405786965042353, 0.0]

In [13]:
# Make predictions on the test set

test_predict_data_generator = data_generator(TEST_FILE_PATHS, TEST_BATCH_SIZE)
lstm_model.predict_generator(test_predict_data_generator, 
                             steps=(NR_TEST_EXAMPLES // TEST_BATCH_SIZE), 
                             verbose=1)



array([[0.19072168],
       [0.32024473],
       [0.29224843],
       [0.8700494 ],
       [0.64393985],
       [0.9269407 ],
       [0.554426  ],
       [0.6122184 ],
       [0.76550317],
       [0.27383167],
       [0.86244947],
       [0.085961  ],
       [0.81788164],
       [0.33503988],
       [0.25629824],
       [0.2348272 ],
       [0.9202893 ],
       [0.6335715 ],
       [0.8346967 ],
       [0.8425851 ]], dtype=float32)