# Sequence to Sequence models for sugar level prediction

For the first part, which make use of Recurrent Neural Networks, 
take a look at https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html

The second part (to be done) will use the more advanced Transformer arquitecture. 

In [1]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [0]:
%matplotlib inline
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
import os
import tensorflow as tf

In [0]:
from tensorflow.keras.layers import Dense, LSTM, Input
from tensorflow.keras.models import Model
from tensorflow.keras.losses import MAE
from tensorflow.keras.metrics import MAPE
from tensorflow.keras.backend import clear_session

## Data preprocessing

### Load datasets

Each dataset consits of sequences of `history`+`future` points, with 4 features: 

* time interval: days counted starting from the end of the `history` of the sequence. Thus, for points in the `history`, this feature takes negatuve values, while for points in the `future`, it's positive. 
* hour: hour of the day, divided by 24.
* day of week: day of the week in numbers ('Monday': 0, 'Tuesday': 1, 'Wednesday': 2, 'Thursday': 3, 'Friday': 4, 'Saturday': 5, 'Sunday': 6), divided by 7.
* sugar level: recorded sugar level, scaled with min/max scaler.

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [8]:
!ls '/content/drive/My Drive/Colab Notebooks/sugar_level_prediction/data/'

javi_measurements.csv  noisy0.0_test.npy  noisy0.0_train.npy  noisy0.0_vad.npy


In [0]:
root = "/content/drive/My Drive/Colab Notebooks/sugar_level_prediction/data/"
noise=0.0
history = 100 
future = 12

train = np.load(os.path.join(root, "noisy%s_train.npy" %noise))
vad = np.load(os.path.join(root, "noisy%s_vad.npy" %noise))
test = np.load(os.path.join(root, "noisy%s_test.npy" %noise))

train_steps = train.shape[0] // train_batch
vad_steps = vad.shape[0] // train_batch
test_steps = test.shape[0] // test_batch

In [10]:
train.shape, vad.shape, test.shape

((70487, 112, 4), (9116, 112, 4), (8781, 112, 4))

###  Split data 

Each input sequence has both the features and labels (x and y, if you wish), so we have to separate them.

In [0]:
def split_features_labels(data, history, future):
    """
    Method to separate historic and future events (features and labels). 
    It returns input data for the encoder and decoder, and the output data 
    for the decoder. The input data for the decoder is just the output data 
    of the decoder, shifted by one step. 

    :param data: numpy ndarray with sequences of history+future points, and 4 attributes 
      (time_interval, hour_of_day, day_of_week, sugar_level). It has shape (?, history+future, 4)
    :param history: number of points for the features
    :param future: number of points for the labels
    :return three numpy arrays with the input data for the encoder (shape=(?, history, 4))
        and decoder (shape=(?, future+1, 1)), and the output data for the decoder
        (shape=(?, future+1, 1))
    """
    # split features and labels . Note that for the later, we only keep the 
    # feature with the sugar level, which constitutes our target
    yf, yl = data[:, :history], data[:, history:history+future, -1]
    
    # add start of sentence to labels (input to the decoder)
    yl_input = np.zeros(shape=(yl.shape[0], yl.shape[1]+1))
    yl_input[:, 1:] = yl
    # add end of sentence to labels (output of the decoder)
    yl_output = np.zeros(shape=(yl.shape[0], yl.shape[1]+1))
    yl_output[:, :-1] = yl
    
    # add new dimension at the end of input/output arrays to the decoder
    yl_input = yl_input[:, :, np.newaxis].astype(np.float32)
    yl_output = yl_output[:, :, np.newaxis].astype(np.float32)
    
    return (yf, yl_input), yl_output

In [0]:
train_data = split_features_labels(train, history, future)
vad_data = split_features_labels(vad, history, future)
test_data = split_features_labels(test, history, future)

In [16]:
train_data[0][0].shape, train_data[0][1].shape, train_data[1].shape

((70487, 100, 4), (70487, 13, 1), (70487, 13, 1))

### Make generators to feed the NN

In [0]:
def make_dataset(tensor):
    return tf.data.Dataset.from_tensor_slices(tensor)

def make_iterator(tensor, batch_size, num_epochs):
    dataset = make_dataset(tensor)
    return dataset.batch(batch_size).repeat(num_epochs)

In [0]:
train_batch = 200
vad_batch = 200
test_batch = 200
num_epochs = 5

train_gen = make_iterator(train_data, train_batch, num_epochs)
vad_gen = make_iterator(vad_data, vad_batch, num_epochs=1)
test_gen = make_iterator(test_data, test_batch, num_epochs=1)

## Seq2seq model with RNN

In [0]:
def seq2seq(history, future, latent_dim):
    clear_session()
    # Define an input sequence and process it.
    encoder_inputs = Input(shape=(history, 1))
    encoder = LSTM(latent_dim, return_state=True)
    encoder_outputs, state_h, state_c = encoder(encoder_inputs)
    # We discard `encoder_outputs` and only keep the states.
    encoder_states = [state_h, state_c]

    # Set up the decoder, using `encoder_states` as initial state.
    decoder_inputs = Input(shape=(future+1, 1))
    # We set up our decoder to return full output sequences,
    # and to return internal states as well. We don't use the 
    # return states in the training model, but we will use them in inference.
    decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                         initial_state=encoder_states)
    decoder_dense = Dense(1, activation='selu')
    decoder_outputs = decoder_dense(decoder_outputs)

    # Define the model that will turn
    # `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    return model

In [0]:
m2 = seq2seq(history, future, latent_dim=50)
m2.compile(optimizer='rmsprop', loss=MAE, metrics=[MAPE, MAE])
h2 = m2.fit(x=train_gen[0], y=train_gen[1], batch_size=train_batch, epochs=num_epochs, 
      validation_data=vad_gen)

Train on 70350 samples, validate on 9365 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [0]:
m3 = seq2seq(history, future, latent_dim=50)
m3.compile(optimizer='rmsprop', loss=MAPE, metrics=[MAPE, MAE])
h3 = m3.fit(x=train_gen[0], y=train_gen[1], batch_size=train_batch, epochs=num_epochs, 
      validation_data=vad_gen)

Train on 70350 samples, validate on 9365 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [0]:
m4 = seq2seq(history, future, latent_dim=100)
m4.compile(optimizer='rmsprop', loss=MAE, metrics=[MAPE, MAE])
h4 = m4.fit(x=train_gen[0], y=train_gen[1], batch_size=train_batch, epochs=num_epochs, 
      validation_data=vad_gen)

Train on 70350 samples, validate on 9365 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
