## Imports

In [0]:
import matplotlib.pyplot as plt
import pandas as pd #
import numpy as np #
import math, time
from IPython.display import clear_output  # clear cell output
from keras.models import Sequential
from keras.layers import Dense, LSTM, CuDNNLSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
####### LOAD DATASET
url = 'https://raw.githubusercontent.com/kevinscaccia/data/master/hobbit_02-12-2013.csv'
df = pd.read_csv(url, usecols=[7,8,9])  # latency, jitter and packet_loss
print(df.columns)

## LSTM Module Class

In [0]:
########################################################
# GENERIC CLASS THAT IMPLEMENTS AN LSTM NETWORK AND FORECAST THE NEXT 
# N_STEPS_OUT FUTURE VALUES
#
class LSTM_Module:
    #######
    # CONSTRUCTOR
    #
    def __init__(self, data, n_steps_in, n_steps_out):
        ####### MODEL HYPER-PARAMETERS
        self.n_steps_in = n_steps_in  # past window size
        self.n_steps_out = n_steps_out  # future output window
        self.n_features = 1  # each point dimension (escalar)
        ####### DATA
        self.data = data  # entire data
        self.train_data = None  # entire train data
        self.test_data = None  # entire test data
        self.train_x = None  # train input, shape: [train_len, n_steps_in]
        self.train_y = None  # train output, shape: [train_len, n_steps_out]
        self.test_x = None  # test input, shape: [test_len, n_steps_in]
        self.test_y = None  # test OUTPUT, shape: [test_len, n_steps_in]
        ####### MODEL INFO
        self.model = None  # model reference
        self.model_history = None  # model train history
        self.train_scaler = None  # train data scaler
        self.test_scaler = None  # test data scaler
        self.predicted_scaled = None  # predicted values (model scale)
        self.predicted = None  # predicted values (real scale)
        
        
    #######
    # LOAD AND SPLIT DATA
    #    
    def load_data(self, start, end, percent_train):
        ###### DATA LENGTH
        data = self.data.values[start:end]
        data = data.astype('float32')  # to float32
        data = data.reshape(data.shape[0])
        ###### SPLIT TRAIN AND TEST
        split_index = int(len(data)*percent_train) # split index
        train_data = data[:split_index]
        test_data = data[split_index:]
        ####### DATA SCALER
        self.train_scaler = MinMaxScaler(feature_range=(0, 1))  # scale between [0,1]
        self.train_data  = self.train_scaler.fit_transform(train_data.reshape(-1, 1))  # transform only train data
        self.test_scaler =  MinMaxScaler(feature_range=(0, 1))  # scale between [0,1]
        self.test_data = self.test_scaler.fit_transform(test_data.reshape(-1, 1))  # transform only train data
        ###### DATA WINDOW SPLIT
        x_train, y_train = LSTM_Module.split_sequence(train_data, self.n_steps_in, self.n_steps_out)
        self.test_x, self.test_y = LSTM_Module.split_sequence(test_data, self.n_steps_in, self.n_steps_out)
        # reshape from [samples, timesteps] into [samples, timesteps, features] for LSTM INPUT
        self.train_x = x_train.reshape((x_train.shape[0], x_train.shape[1], self.n_features))
        self.train_y = y_train.reshape((y_train.shape[0], y_train.shape[1]))

        
    #######
    # DEFINE AND TRAIN LSTM NETWORK 
    #
    def train_model(self, hidden_1=1, hidden_2=1, epochs=10):
        ###### DEFINE MODEL
        model = Sequential()
        model.add(CuDNNLSTM(hidden_1, return_sequences=True, input_shape=(self.n_steps_in, self.n_features)))
        model.add(CuDNNLSTM(hidden_2))
        model.add(Dense(self.n_steps_out, ))# activation='sigmoid'))
        ###### COMPILE MODEL
        model.compile(optimizer='adam', loss='mse')
        ###### TRAIN MODEL
        self.model_history = model.fit(self.train_x, self.train_y, 
                   epochs=epochs)
        clear_output()  # clear console output
        #['acc', 'loss', 'val_acc', 'val_loss']
        #print("Model acc: {}\n Model loss: {}", 
        #      self.model_history['acc'], self.model_history['loss'])
        self.model = model
    
    
    #######
    # PREDICTC N STEPS AHEAD THE TRAIN DATA
    #
    def predict(self, steps_ahead=1):
        ###### PREDICTED VALUES
        history = self.train_x[-1]  # last train value
        history = history.reshape(-1)
        ###### PREDICTS AND APPEND(PREDICTION) TO THE HISTORY(AND USES IT)
        for i in range(0, steps_ahead*self.n_steps_in, self.n_steps_out):
            input_values = np.array([history[-self.n_steps_in:]])  # last predicted value
            inn = input_values.reshape(1, self.n_steps_in,1)
            pred = self.model.predict(np.array(inn))  # predict
            pred = pred.reshape(-1)  # reshape
            history = np.concatenate([history, pred])  # concatenate prediction to data
        ###### STORE PREDICTION
        history = history.reshape(-1, 1)
        self.predicted_scaled = history  # model scale, between [0,1]
        ###### RETURN TO THE PROBLEM REAL SCALE
        self.predicted = self.train_scaler.inverse_transform(history)  # rescale to original metric
    
    
    #######
    # PLOT PREDICTED
    #
    def plot_predicted(self, steps_ahead=1):
        len_ahead = steps_ahead*self.n_steps_out
        plt.figure(figsize=(10, 4), dpi=80, facecolor='w', edgecolor='k'); plt.grid(True)
        plt.ylabel("Jitter"); plt.xlabel("Time"); plt.grid(True)
        ###### PREDICTED VALUES
        plt_pred_values = self.predicted[self.n_steps_in:self.n_steps_in+len_ahead].reshape(-1)
        ###### REAL TEST VALUES
        plt_test_values = self.test_y[:len_ahead, 0].reshape(-1)
        #
        plt.plot(plt_test_values, 'b')
        plt.plot(plt_pred_values, 'r')
        plt.show()
    
    
    #######
    # SPLIT TRAIN TEST AUXILIARY FUNCTION
    #
    def split_sequence(sequence, n_steps_in, n_steps_out):
        X, y = list(), list()
        for i in range(len(sequence)):
            # find the end of this pattern
            end_ix = i + n_steps_in
            out_end_ix = end_ix + n_steps_out
            # check if we are beyond the sequence
            if out_end_ix > len(sequence):
                break
            # gather input and output parts of the pattern
            seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
            X.append(seq_x)
            y.append(seq_y)
        return np.array(X), np.array(y)
########################################################
########################################################
########################################################

## MLP Module Class

In [0]:
########################################################
# MLP CLASS THAT USES FORECASTED VALUES(2*N_STEPS_OUT) TO FORECAST THE LOSS
# (N_STEPS_OUT FUTURE LOSS VALUES)
#
class MLP_Module():
    #######
    # CONSTRUCTOR
    #
    def __init__(self, loss_data, jitter_model, latency_model):
        ####### DATA
        self.loss_data = loss_data  # loss data
        self.train_data = None  
        self.train_data_x = list()  # 
        self.train_data_y = list()
        ####### MODELS
        self.jitter_model = jitter_model
        self.latency_model = latency_model
        ####### MLP MODEL PARAMETERS
        self.model = None  # model reference
        self.n_steps_out = jitter_model.n_steps_out
        self.input_dim = jitter_model.n_steps_out + latency_model.n_steps_out
        self.X = None
        self.Y = None
    
    #####
    # Load train data
    #
    def load_train_data(self,):
        jitter_seq = self.jitter_model.train_data.reshape(-1)
        latency_seq = self.latency_model.train_data.reshape(-1)
        loss_seq = self.loss_data
        X, Y = list(), list()
        for i in range(0, len(jitter_seq), self.n_steps_out):
            # organize in blocks
            steps = jitter_seq[i:i+self.n_steps_out].reshape(-1)
            steps = np.concatenate([steps, latency_seq[i:i+self.n_steps_out].reshape(-1) ])
            loss_steps = loss_seq[i:i+self.n_steps_out]
            # populate list
            X.append(steps)
            Y.append(loss_steps)
        self.X = np.array(X)
        self.Y = np.array(Y)
        
    #####
    # Train model
    #
    def train_model(self, n_hidden_1, n_hidden_2, epochs=10):
        latency_input = self.latency_model.train_data.reshape(-1)
        jitter_input = self.jitter_model.train_data.reshape(-1)
        self.network_input = np.concatenate([jitter_input,latency_input])
        ###
        # create model
        self.model = model = Sequential()
        model.add(Dense(n_hidden_1, input_shape=(self.n_steps_out*2,)))
        model.add(Dense(n_hidden_2, kernel_initializer='normal', activation='relu'))
        model.add(Dense(self.n_steps_out, kernel_initializer='normal'))
        # Compile model
        model.compile(loss='mean_squared_error', optimizer='adam')
        #model.summary()
        model.fit(self.X, self.Y, epochs=epochs)
        
    #####
    # Predict next n_steps_out
    #
    def predict(self,):
        n = len(self.jitter_model.train_data)
        loss_seq_test = df[' packet_loss'][n:n+self.n_steps_out].values
        jitter_seq_test = self.jitter_model.test_data[:self.n_steps_out].reshape(-1)
        latency_seq_test = self.latency_model.test_data[:self.n_steps_out].reshape(-1)
        #
        X, Y = list(), list()
        for i in range(0, len(jitter_seq_test), self.n_steps_out):
            # organize in blocks
            steps = jitter_seq_test[i:i+self.n_steps_out].reshape(-1)
            steps = np.concatenate([steps, latency_seq_test[i:i+self.n_steps_out].reshape(-1) ])
            print("-->",steps.shape)
            loss_steps = loss_seq_test[i:i+self.n_steps_out]
            # populate list
            X.append(steps)
            Y.append(loss_steps)
        ################################### 
        x_value = np.array(X)
        y_value = self.model.predict(x_value)
        ###################################
        plt.plot(loss_seq_test, 'y')
        plt.plot(y_value.reshape(-1),'r')
        

## Hybrid Model

In [0]:
class HybridModel():
    def __init__(self, df):
        self.n_steps_out = 20
        self.n_steps_in = 10
        self.start_data = 1000
        self.end_data = 3000
        self.df = df
        self.percent_train = 0.9
        ################### Models
        self.jitter_model = None
        self.latency_model = None
        self.mlp_module = None
        ################### HYPER-PARAMETERS JITTER LSTM
        self.jitter_lstm_hidden_1 = 20
        self.jitter_lstm_hidden_2 = 20
        self.jitter_lstm_epochs = 100
        self.jitter_predict_steps_ahead = 1
        ################### HYPER-PARAMETERS LATENCY LSTM
        self.latency_lstm_hidden_1 = 20
        self.latency_lstm_hidden_2 = 20
        self.latency_lstm_epochs = 100
        self.latency_predict_steps_ahead = 1
        ################### HYPER-PARAMETERS MLP 
        self.mlp_hidden_1 = 10
        self.mlp_hidden_2 = 10
        self.mlp_epochs = 100
        ###################
        #################
        
    def load_models(self,):
        self.jitter_model = LSTM_Module(data=self.df[' jitter'], n_steps_in=self.n_steps_in, n_steps_out=self.n_steps_out)
        self.jitter_model.load_data(start=self.start_data, end=self.end_data, percent_train=self.percent_train)
        #
        self.latency_model = LSTM_Module(data=self.df[' latency'], n_steps_in=self.n_steps_in, n_steps_out=self.n_steps_out)
        self.latency_model.load_data(start=self.start_data, end=self.end_data, percent_train=self.percent_train)
        #
    
    def train_models(self,): 
        print("Training LSTM Jitter Module")
        self.jitter_model.train_model(hidden_1=self.jitter_lstm_hidden_1, 
                                      hidden_2=self.jitter_lstm_hidden_1,
                                      epochs=self.jitter_lstm_epochs)
        
        print("Training LSTM Latency Module")
        self.latency_model.train_model(hidden_1=self.latency_lstm_hidden_1, 
                                       hidden_2=self.latency_lstm_hidden_2, 
                                       epochs=self.latency_lstm_epochs)
        #
        # arrumar aqui
        #
        self.loss_seq = df[' packet_loss'][0:len(self.latency_model.train_data)].values
        self.mlp_module = MLP_Module(self.loss_seq, self.jitter_model, self.latency_model)
        self.mlp_module.load_train_data() 
        print("Training MLP Module")
        
        self.mlp_module.train_model(self.mlp_hidden_1,
                                    self.mlp_hidden_2,
                                    self.mlp_epochs)
        
        
    def predict(self,):
        self.jitter_model.predict(steps_ahead=self.jitter_predict_steps_ahead)
        self.latency_model.predict(steps_ahead=self.latency_predict_steps_ahead)
        self.mlp_module.predict()
########################################################
########################################################
########################################################

In [0]:
start_time = time.time()
model = HybridModel(df)
model.load_models()
model.train_models()
model.predict()
print("time: {}s".format( time.time() - start_time))

## Auxiliary Functions

In [0]:
def plot_informations(df, model):
    start_data = model.start_data
    end_data = model.end_data
    #
    plt.ylabel("Jitter")
    plt.plot(df[' jitter'].values[start_data:end_data])
    plt.show()
    plt.ylabel("Latency")
    plt.plot(df[' latency'].values[start_data:end_data])
    plt.show()
    plt.ylabel("Loss")
    plt.plot(df[' packet_loss'].values[start_data:end_data])
    plt.show()

## Testes

In [0]:
jitter_model = LSTM_Module(data=df[' jitter'], 
                           n_steps_in=50, n_steps_out=50)
jitter_model.load_data(start=1000, end=9000, 
                       percent_train=0.8)
################### TRAIN MODEL
jitter_model.train_model(hidden_1=100, hidden_2=100, epochs=500)
jitter_model.predict(steps_ahead=10)
jitter_model.plot_predicted(10)

## Notas

#### Perguntas mais relevantes

É Possível a partir do dataset original retirar os parâmetros **jitter**, **latency/delay** e **loss** ?

Posso continuar utilizando o modelo de forma a prever os parâmetros em uma janela **t** e utilizar a previsão que o modelo fez para prever os valores na janela **t+1**


Ou utilizar o modelo para prever os parâmetros em um tempo **t**, depois, com a captura dos **valores reais**, prever o tempo **t+1** (Walk-forward Validation)

Tenho dois modelos (LSTM e MLP), devo utilizar os mesmos **dados de treino para eles**? (mesma partição para treino, feita a partir do dataset original, já escalada entre [0,1])

É possível treinar os dois modelos separadamente? Ou é necessário que os dados de treino da LSTM sejam os mesmos dados utilizados no treino da MLP

#### Perguntas menos relevantes

Quais outras técnicas para realizar a validação do modelo? 

#### Informações

Entrada da LSTM:
> (samples , input_window, feature_dimension)

Saída da LSTM:
> (samples , output_window,)

Entrada da MLP:
> (output_window*2,)

Saída da MLP:
> (output_window,)




