In [None]:
import os
import sys
import numpy as np
import pandas as pd
from tqdm import tqdm
from matplotlib import pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error

import keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM, Bidirectional, GRU, Embedding, Dropout
from keras.optimizers import RMSprop
from keras import backend as K

import warnings
warnings.filterwarnings('ignore')

sys.path.insert(0, '../src')

from utils import *
from features import *

In [None]:
fp_240 = '../data/240p/' 
fp_360 = '../data/360p/' 
fp_480 = '../data/480p/'
fp_720 = '../data/720p/'
fp_1080 = '../data/1080p/' 

In [None]:
dfs_240 = []
for file in os.listdir(fp_240):
    if file != '.ipynb_checkpoints' and file != '.DS_Store':
        dfs_240.append(pd.read_csv(fp_240+file))
    
dfs_360 = []
for file in os.listdir(fp_360):
    if file != '.ipynb_checkpoints' and file != '.DS_Store':
        dfs_360.append(pd.read_csv(fp_360+file))
        
dfs_480 = []
for file in os.listdir(fp_480):
    if file != '.ipynb_checkpoints' and file != '.DS_Store':
        dfs_480.append(pd.read_csv(fp_480+file))
        
dfs_720 = []
for file in os.listdir(fp_720):
    if file != '.ipynb_checkpoints' and file != '.DS_Store':
        dfs_720.append(pd.read_csv(fp_720+file))

dfs_1080 = []
for file in os.listdir(fp_1080):
    if file != '.ipynb_checkpoints' and file != '.DS_Store':
        dfs_1080.append(pd.read_csv(fp_1080+file))

In [None]:
ms_240 = []
for df in dfs_240:
    ms_240.append(convert_ms_df(df,True))
    
ms_360 = []
for df in dfs_360:
    ms_360.append(convert_ms_df(df,True))
    
ms_480 = []
for df in dfs_480:
    ms_480.append(convert_ms_df(df,True))
    
ms_720 = []
for df in dfs_720:
    ms_720.append(convert_ms_df(df,True))
    
ms_1080 = []
for df in dfs_1080:
    ms_1080.append(convert_ms_df(df,True))

In [None]:
resamples_240 = []
for df in ms_240:
    resamples_240.append(df.resample('500ms', on='Time').sum())
    
resamples_360 = []
for df in ms_360:
    resamples_360.append(df.resample('500ms', on='Time').sum())
    
resamples_480 = []
for df in ms_480:
    resamples_480.append(df.resample('500ms', on='Time').sum())
    
resamples_720 = []
for df in ms_720:
    resamples_720.append(df.resample('500ms', on='Time').sum())
    
resamples_1080 = []
for df in ms_1080:
    resamples_1080.append(df.resample('500ms', on='Time').sum())

In [None]:
ms_240[1]

## Preprocessing

In [7]:
data = [ms_240, ms_360, ms_480, ms_720, ms_1080]

for i, df in enumerate(data):
    for d in df:
        d["resolution"] = i
        
d = []
for df in data:
    d.append(pd.concat(df))

In [8]:
timesteps = 5

def create_data(d, timesteps=timesteps):
    X = []
    y = np.array([])
    for i, df in enumerate(d):
        if df.shape[0] % timesteps != 0:
            trim = int(df.shape[0] % timesteps)
            x = np.array(df["pkt_size"][:-1*trim]).reshape((df.shape[0]-trim)//5, 5, 1)
        else:
            x = np.array(df["pkt_size"]).reshape(df.shape[0]//5, 5, 1)
        X.append(x)
        for _ in range(x.shape[0]):
            y = np.append(y, i)        
    return np.concatenate(tuple(X)), y
           
    
X, y = create_data(d)
print(X.shape)
print(y.shape)

(193481, 5, 1)
(193481,)


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## LSTM

In [13]:
lstm = Sequential()
lstm.add(LSTM(100, activation="relu", return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
lstm.add(LSTM(50, activation="relu", return_sequences=True))
lstm.add(LSTM(25, activation="relu"))
lstm.add(Dense(20, activation="relu"))
lstm.add(Dense(10, activation="relu"))
lstm.add(Dense(5, activation="softmax"))

lstm.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
lstm.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 5, 100)            40800     
_________________________________________________________________
lstm_5 (LSTM)                (None, 5, 50)             30200     
_________________________________________________________________
lstm_6 (LSTM)                (None, 25)                7600      
_________________________________________________________________
dense_4 (Dense)              (None, 20)                520       
_________________________________________________________________
dense_5 (Dense)              (None, 10)                210       
_________________________________________________________________
dense_6 (Dense)              (None, 5)                 55        
Total params: 79,385
Trainable params: 79,385
Non-trainable params: 0
__________________________________________________

In [14]:
EPOCHS = 25
BATCH_SIZE = 64

lstm.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1)

Epoch 1/25
Epoch 2/25

KeyboardInterrupt: 

### Bidirectional (in development)

In [15]:
bidirectional = Sequential()
bidirectional.add(Bidirectional(LSTM(50, activation="relu"), input_shape=(X.shape[1], X.shape[2])))
bidirectional.add(Dense(5, activation="softmax"))
bidirectional.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
bidirectional.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 100)               20800     
_________________________________________________________________
dense_7 (Dense)              (None, 5)                 505       
Total params: 21,305
Trainable params: 21,305
Non-trainable params: 0
_________________________________________________________________


In [None]:
EPOCHS = 25
BATCH_SIZE = 64

bidirectional.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1)