# Service-Aware Google Colab Notebook

In this notebook, we load and pre-process the data collected from the network traffic scenarios. Moreover, we define, train and evaluate an LSTM model to fit the data optimally.
<br>
<br>

### Upload collected data

In [1]:
from google.colab import files
uploaded = files.upload()

Saving binary-exp.csv to binary-exp.csv
Saving binary9.csv to binary9.csv
Saving binary8.csv to binary8.csv
Saving binary7.csv to binary7.csv
Saving binary6.csv to binary6.csv
Saving binary5.csv to binary5.csv
Saving binary41.csv to binary41.csv
Saving binary40.csv to binary40.csv
Saving binary4.csv to binary4.csv
Saving binary39.csv to binary39.csv
Saving binary38.csv to binary38.csv
Saving binary37.csv to binary37.csv
Saving binary36.csv to binary36.csv
Saving binary35.csv to binary35.csv
Saving binary34.csv to binary34.csv
Saving binary33.csv to binary33.csv
Saving binary32.csv to binary32.csv
Saving binary31.csv to binary31.csv
Saving binary30.csv to binary30.csv
Saving binary3.csv to binary3.csv
Saving binary29.csv to binary29.csv
Saving binary28.csv to binary28.csv
Saving binary27.csv to binary27.csv
Saving binary26.csv to binary26.csv
Saving binary25.csv to binary25.csv
Saving binary24.csv to binary24.csv
Saving binary23.csv to binary23.csv
Saving binary22.csv to binary22.csv
Sa

### Mount to Google Drive

We mount the notebook to google drive to save our final model.

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


### Import necessary libraries

In [2]:
import os
import pandas as pd
import numpy as np
from keras import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from os import walk

### Define Pre-processing Functions

In [3]:
def extract_X_y_from_file(array, X_num, y_num):
    """ 
    Function that pre-process the data with a sliding-window supervised 
    learning approach extracting X and y

    - X_num: Input Window Length (N)
    - y_num: Number of prediction time-steps (M). We calculate the y as a mean 
              of the next M time-slots.
    """

    X = []
    y = []
    
    iterations = len(array)
    for i in range(iterations):
        ### check if we reach to end
        if i+X_num + y_num > len(array):
            break
            
        ### calculate X,y
        X_temp = array[i: i+X_num]
        y_temp = array[i+X_num: i+X_num + y_num]
        
        ### format y use mean
        new_y_temp = []
        # for every column
        for j in range(9):
            j_temp = []
            # for every row
            for i in range(y_num):
                row = y_temp[i][j]                

                j_temp.append(row)

            # obtain mean
            new_y_temp.append(np.mean(j_temp))
                        
        
        # append
        X.append(X_temp)
        y.append(new_y_temp)        
        
    return X,y

In [4]:
def create_data(X_num = 30, y_num = 5, mypath = 'binary/binary'):
    """
    Function that reads all data csvs and calls the above mentioned function to
    extract X and y
    """
    all_X = []
    all_y = []
    
    
    # find files in path
    f = []
    for (dirpath, dirnames, filenames) in walk(mypath):
        f.extend(filenames)
        break
        
    # iterate and analyze file
    for j,file in enumerate(filenames):
        if file == 'binary-exp.csv' or 'binary' not in file:
          continue

        # temp path
        path = mypath +'//' + file

        # read csv
        df = pd.read_csv(path)
        df = df.drop('Time',axis=1)

        # convert to list
        array = df.values.tolist()

        # create X and y
        X,y = extract_X_y_from_file(array, X_num, y_num)

        # add to final data
        all_X = all_X + X
        all_y = all_y + y

    return all_X, all_y

# Pre-processing

Start with pre-processing the augmentation data.

In [5]:
all_X, all_y = create_data(mypath='.')

In [6]:
len(all_X)

5276

In [7]:
len(all_y)

5276

In [28]:
X_data = np.array(all_X)
X_data.shape

(5276, 30, 9)

In [29]:
y_data = np.array(all_y)
print(y_data.shape)

(5276, 9)


Pre-process the basic non-augmented scenario to use as validation data.

In [10]:
val_X = []
val_y = []

# temp path
path = 'binary-exp.csv'

# read csv
df = pd.read_csv(path)
df = df.drop('Time',axis=1)

# convert to list
array = df.values.tolist()

# create X and y
X,y = extract_X_y_from_file(array, 30, 5)


# add to final data
val_X = val_X + X
val_y = val_y + y

X_val = np.array(val_X)
print(X_val.shape)

y_val = np.array(val_y)
print(y_val.shape)

(125, 30, 9)
(125, 9)


In [31]:
X_data = X_data.reshape(X_data.shape[0], X_data.shape[2]* X_data.shape[1])
X_data.shape

(5276, 270)

In [46]:
y_data.shape

(5276, 9)

In [13]:
# y_data = y_data.reshape(y_data.shape[0], y_data.shape[1], 1)
# y_data.shape

(5276, 9, 1)

In [39]:
X_data[0].shape

(270,)

In [42]:
X_data[0]

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0])

In [44]:
y_data[0].shape

(9,)

In [45]:
y_data[0]

array([1., 0., 0., 0., 1., 0., 0., 0., 0.])

# FNN Model Implementation

In [36]:
model = Sequential()
model.add(Dense(25, activation='relu', input_shape=[270]))
model.add(Dense(25, activation='relu'))
model.add(Dense(9))
model.compile(optimizer='adam', loss='mse')
model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_26 (Dense)            (None, 25)                6775      
                                                                 
 dense_27 (Dense)            (None, 25)                650       
                                                                 
 dense_28 (Dense)            (None, 9)                 234       
                                                                 
Total params: 7,659
Trainable params: 7,659
Non-trainable params: 0
_________________________________________________________________


Define a callback function to save on every iteration the best optimal trained model, based on validation accuracy. Then fit the model to the data.

In [None]:
from keras.callbacks import ModelCheckpoint
filepath="/content/gdrive/MyDrive/app_aware_FNN_weights_with_val_loss.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [37]:
#model.fit(X_data,y_data, validation_data=(X_val,y_val),epochs=300, callbacks=callbacks_list)
model.fit(X_data,y_data,epochs=2000)

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000
Epoch 50/2000
Epoch 51/2000
Epoch 52/2000
Epoch 53/2000
Epoch 54/2000
Epoch 55/2000
Epoch 56/2000
Epoch 57/2000
Epoch 58/2000
Epoch 59/2000
Epoch 60/2000
Epoch 61/2000
Epoch 62/2000
Epoch 63/2000
Epoch 64/2000
Epoch 65/2000
Epoch 66/2000
Epoch 67/2000
Epoch 68/2000
Epoch 69/2000
Epoch 70/2000
Epoch 71/2000
Epoch 72/2000
E

KeyboardInterrupt: ignored

Now the model is trained. Make a prediction test ( with 1 value, the X[0] ) to present the format of the input and the format of the predictions.

In [None]:
test = X_data[0].reshape(1,30,9)
test.shape

(1, 30, 9)

In [None]:
test

array([[[1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 1],
        [1, 0, 0, 0, 0, 0, 0, 0, 1],
        [1, 0, 0, 0, 1, 0, 0, 0, 1],
        [1, 0, 0, 0, 1, 0, 0, 0, 1],
        [1, 0, 0, 0, 1, 0, 0, 0, 1],
        [1, 0, 0, 0, 1, 0, 0, 0, 1],
        [1, 0, 0, 0, 1, 0, 0, 0, 1],
        [1, 0, 0, 0, 1, 0, 0, 0, 1],
 

In [None]:
model.predict(test)

array([[ 9.93981957e-01, -2.08920203e-04,  4.01371717e-03,
         6.17049634e-03,  9.96633291e-01, -1.66730577e-04,
        -6.78896904e-04, -1.14658615e-04, -2.58818269e-03]], dtype=float32)

# Predictions - Evaluation

## Training Evaluation

We load the (same) model from the saved HDF5 file in our repository. We begin by evaluating the model on the training - augmented data.

In [None]:
from keras.models import load_model

In [None]:
model = load_model('app_aware_LSTM_weights_with_val_loss.best.hdf5')

In [None]:
X_data.shape

(5276, 30, 9)

In [None]:
yhat_training = model.predict(X_data)

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

rms = mean_squared_error(y_data, yhat_training)
print('RMSE training',rms)

mae = mean_absolute_error(y_data, yhat_training)
print('MAE: training',mae)

RMSE training 0.002514107324999888
MAE: training 0.018280894814327965


In our algorithm we use the *round()* function to round the predictions.

In [None]:
print(yhat[100])
print(y_data[100])

[ 6.5811425e-03 -4.0366774e-04  1.0098476e+00  1.0056287e+00
  3.9668232e-03 -1.3133387e-04 -1.2566149e-03 -6.3567958e-04
  9.9916881e-01]
[0. 0. 1. 1. 0. 0. 0. 0. 1.]


In [None]:
for i in yhat[100]:
  print(round(i))

0
0
1
1
0
0
0
0
1


## Testing Evaluation

Now, use the validation X data (X-val) from the basic non-augmented scenario to make predictions. Then evaluate the model utilizing the MAE, and RMSE metrics. 

In [None]:
yhat_testing = model.predict(X_val)

In [None]:
rms = mean_squared_error(y_val, yhat_testing)
print('RMSE testing ',rms)

mae = mean_absolute_error(y_val, yhat_testing)
print('MAE: testing ',mae)

RMSE testing  0.004724671100021649
MAE: testing  0.022024084697091245
