In [58]:
from tensorflow.keras import Input, Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import GRU, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanAbsoluteError
import pandas as pd
from ydata_synthetic.preprocessing.timeseries.utils import real_data_loading
import numpy as np
import pickle

In [59]:
def fatNum(n):
    i = n // 9
    j = (n % 9) // 3
    k = ((n % 9) % 3) 
    
    return i,j,k

In [60]:
#First implement a simple RNN model for prediction
def RNN_regression(units):
    opt = Adam(name='AdamOpt')
    loss = MeanAbsoluteError(name='MAE')
    model = Sequential()
    model.add(GRU(units=units,
                  name=f'RNN_1'))
    model.add(Dense(units=14,
                    activation='sigmoid',
                    name='OUT'))
    model.compile(optimizer=opt, loss=loss)
    return model

In [61]:
def sumFeatureMetricsOfModels(models, data_metrics):
    count32 = np.zeros(len(models))
    count64 = np.zeros(len(models))

    for i in range(len(models)): 
            count32[i] = sum(data_metrics[0,i,:])
            count64[i] = sum(data_metrics[1,i,:])
            
    return count32, count64

In [62]:
def getFeaturesBestMetricsOfModels(models, data_metrics):
    count32, count64 = sumFeatureMetricsOfModels(models, data_metrics)
    
    print(count32)
    print(count64)
    
    index = np.where(count32 == count32.min())[0][0]
    i, j, k = fatNum(index)
    model = 'so_seqlen_' + str((50*(i) + 50)) + '_hidim_'+str(20*(j)+20) + '_batch_'+str(28*(k)+100)+'.pkl'
    print('bestmodel_int32:' + model)
    best_32 = models.get(model)[0]
    
    index = np.where(count64 == count64.min())[0][0]
    i, j, k = fatNum(index)
    model = 'so_seqlen_' + str((50*(i) + 50)) + '_hidim_'+str(20*(j)+20) + '_batch_'+str(28*(k)+100)+'.pkl'
    print('bestmodel_int64:' + model)
    best_64 = models.get(model)[0]
    
    
    index = np.where(count32 == count32.max())[0][0]
    i, j, k = fatNum(index)
    model = 'so_seqlen_' + str((50*(i) + 50)) + '_hidim_'+str(20*(j)+20) + '_batch_'+str(28*(k)+100)+'.pkl'
    print('bestmodel_int32:' + model)
    worst_32 = models.get(model)[0]
    
    index = np.where(count64 == count64.max())[0][0]
    i, j, k = fatNum(index)
    model = 'so_seqlen_' + str((50*(i) + 50)) + '_hidim_'+str(20*(j)+20) + '_batch_'+str(28*(k)+100)+'.pkl'
    print('bestmodel_int64:' + model)
    worst_64 = models.get(model)[0]
    
    return best_32, worst_32, best_64, worst_64

In [63]:
sample_size = 2400
seq_len = 50

realdata_config = 'real9_50_3600_norm'
models_config = 'models9_50_3600_norm'
metrics_config= 'metrics9_50_3600_norm'

num_cols = ['enq_qdepth1','deq_timedelta1', 'deq_qdepth1',
            ' enq_qdepth2', ' deq_timedelta2', ' deq_qdepth2',
            'enq_qdepth3', 'deq_timedelta3', 'deq_qdepth3',
            'Buffer', 'ReportedBitrate', 'FPS', 'CalcBitrate',
            'q_size'] 
cat_cols = ['Resolution']

data = np.zeros(2*9*len(num_cols)).reshape(2,9,len(num_cols))

with open('../saved_objects/' + metrics_config + '.pkl', 'rb') as file:
        # Load the object from the file
        data = pickle.load(file)
        
with open('../saved_objects/' + models_config + '.pkl', 'rb') as file:
        # Load the object from the file
        models = pickle.load(file)

with open('../saved_objects/' + realdata_config + '.pkl', 'rb') as file:
        # Load the object from the file
        real_data = pickle.load(file)

In [64]:
real_32 = real_data[0]
real_64 = real_data[1]

In [65]:
best_modelsum_32, worst_modelsum_32, best_modelsum_64, worst_modelsum_64 = getFeaturesBestMetricsOfModels(models, data)

[1.55606701 1.07867633 2.49299797 5.54779726 0.43967091 1.30029685
 1.77256908 3.88010907 3.10767045]
[2.06309326 1.72980215 0.65540989 1.51944118 1.70368999 2.74427812
 1.69580751 1.28739328 0.337264  ]
bestmodel_int32:so_seqlen_50_hidim_40_batch_128.pkl
bestmodel_int64:so_seqlen_50_hidim_60_batch_156.pkl
bestmodel_int32:so_seqlen_50_hidim_40_batch_100.pkl
bestmodel_int64:so_seqlen_50_hidim_40_batch_156.pkl


In [66]:
real_32 = real_data_loading(real_32, seq_len=seq_len)
real_64 = real_data_loading(real_64, seq_len=seq_len)

best_modelsum_32 = real_data_loading(best_modelsum_32, seq_len=seq_len)
worst_modelsum_32 = real_data_loading(worst_modelsum_32, seq_len=seq_len)
best_modelsum_64 = real_data_loading(best_modelsum_64, seq_len=seq_len)
worst_modelsum_64 = real_data_loading(worst_modelsum_64, seq_len=seq_len)

In [67]:
#Prepare the dataset for the regression model
real_data = np.asarray(real_32)
synth_data = np.asarray(worst_modelsum_32)

synth_data = synth_data[:len(real_32)]
n_events = len(real_data)

In [68]:
print(type(real_data))
print(type(synth_data))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [69]:

#Split data on train and test
idx = np.arange(n_events)
n_train = int(.75*n_events)
train_idx = idx[:n_train]
test_idx = idx[n_train:]

#Define the X for synthetic and real data
X_real_train = real_data[train_idx, :seq_len-1, 0:14]
X_synth_train = synth_data[train_idx, :seq_len-1, 0:14]

X_real_test = real_data[test_idx, :seq_len-1, 0:14]


#Define the y for synthetic and real datasets
y_real_test = real_data[test_idx, -1, 0:14]
y_real_train = real_data[train_idx, -1, 0:14]

y_synth_train = synth_data[train_idx, -1, 0:14]

print('Synthetic X train: {}'.format(X_synth_train.shape))
print('Real X train: {}'.format(X_real_train.shape))

print('Synthetic y train: {}'.format(y_synth_train.shape))
print('Real y train: {}'.format(y_real_train.shape))

print('Real X test: {}'.format(X_real_test.shape))
print('Real y test: {}'.format(y_real_test.shape))

Synthetic X train: (1782, 49, 14)
Real X train: (1782, 49, 14)
Synthetic y train: (1782, 14)
Real y train: (1782, 14)
Real X test: (594, 49, 14)
Real y test: (594, 14)


In [70]:
#Training the model with the real train data
ts_real = RNN_regression(12)
early_stopping = EarlyStopping(monitor='val_loss',patience=10,min_delta=0.001)

real_train = ts_real.fit(x=X_real_train,
                          y=y_real_train,
                          validation_data=(X_real_test, y_real_test),
                          epochs=200,
                          batch_size=128,
                          callbacks=[early_stopping])

Epoch 1/200


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 7

In [71]:
#Training the model with the synthetic data
ts_synth = RNN_regression(12)
early_stopping = EarlyStopping(monitor='val_loss',patience=10,min_delta=0.001)
synth_train = ts_synth.fit(x=X_synth_train,
                          y=y_synth_train,
                          validation_data=(X_real_test, y_real_test),
                          epochs=200,
                          batch_size=128,
                          callbacks=[early_stopping])

Epoch 1/200


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 7

In [72]:
#Summarize the metrics here as a pandas dataframe
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_log_error
real_predictions = ts_real.predict(X_real_test)
synth_predictions = ts_synth.predict(X_real_test)

""" metrics_dict = {'r2': [r2_score(y_real_test, real_predictions),
                       r2_score(y_real_test, synth_predictions)],
                'MAE': [mean_absolute_error(y_real_test, real_predictions),
                        mean_absolute_error(y_real_test, synth_predictions)],
                'MRLE': [mean_squared_log_error(y_real_test, real_predictions),
                         mean_squared_log_error(y_real_test, synth_predictions)]} """
metrics_dict = {'r2': [r2_score(real_data[train_idx, -1, 0:14], real_data[train_idx, -2, 0:14]),
                       r2_score(synth_data[train_idx, -1, 0:14], synth_data[train_idx, -2, 0:14])],
                'MAE': [mean_absolute_error(real_data[train_idx, -1, 0:14], real_data[train_idx, -2, 0:14]),
                        mean_absolute_error(synth_data[train_idx, -1, 0:14], synth_data[train_idx, -2, 0:14])]}



results = pd.DataFrame(metrics_dict, index=['Real', 'Synthetic'])

results



Unnamed: 0,r2,MAE
Real,0.365571,0.078726
Synthetic,0.877522,0.009401
