In [1]:
%matplotlib inline

import datetime
import glob
import joblib
import os
import time
import pandas as pd
import numpy as np
from numpy import dot
from numpy.linalg import norm
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import PowerTransformer
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')

import seaborn as sns
# sns.set(context='paper', style='whitegrid', color_codes=True)   
sns.set_palette(sns.color_palette(["#017b92", "#f97306", "#0485d1"]))  # ["jade green", "orange", "blue"] 

import mods_utils

2023-03-24 11:22:35.980933: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-24 11:22:36.429698: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64
2023-03-24 11:22:36.429741: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64


## Constants

In [2]:
cfg_teacher_forcing = False  # incremental learning
cfg_transformer = True
cfg_remove_peak = False

cfg_sequence_len = 24        # default=24
cfg_sequence_len_y = 1
cfg_steps_ahead = 1

cfg_stacks = 2
cfg_rnn_units = 48          # default=48 GRU units
cfg_mlp_units = 128

cfg_dropout_rate = 0.3      # default=0.5
cfg_batch_size = 1
cfg_num_epochs = 100
cfg_num_epochs_update = 2    # incremental learning
cfg_epochs_patience = 10

cfg_fig_size_x = 20
cfg_fig_size_y = 5
 
data_train_filename = 'data/data_train.tsv'
data_test_filename = 'data/data_test.tsv'

model_name = 'models/mods2_model'

## Functions

In [3]:
# pandas dataframe to numpy array
def read_data(filename):
    df = pd.read_csv(filename, sep='\t', skiprows=0, skipfooter=0, engine='python')
    data = df.values
    print('read_data: ', filename, '\t', data.shape[1], data.dtype, '\n', list(df))
    return data


# data is numpy array
def transform(data, epsilon=1, remove_peak=cfg_remove_peak):
    if remove_peak:
        # InterQuartile Range (IQR)
        q_min, q_max = np.percentile(data, [25, 75], axis=0)
        iqr = q_max - q_min
        iqr_min = q_min - 1.5*iqr
        iqr_max = q_max + 1.5*iqr
        data = np.clip(data, a_min=iqr_min, a_max=iqr_max)
    data = np.where(data < 0, epsilon, data)
    return data


# Scale all metrics but each separately: normalization or standardization
def normalize(data, scaler=None):
    if not scaler:
        # scaler = Pipeline([
        #    ('PowerTransformer', PowerTransformer()),
        #    ('MinMaxScaler', MinMaxScaler(feature_range=(0,1))),
        #    ('QuantileTransformer', QuantileTransformer(output_distribution='normal', n_quantiles=100)),
        #])
        scaler = MinMaxScaler(feature_range=(0, 1))
        norm_data = scaler.fit_transform(data)
    else:
        norm_data = scaler.transform(data)
    # print('\nnormalize:', norm_data.shape)
    return norm_data, scaler


def make_timeseries(data,
                    sequence_len=cfg_sequence_len,
                    sequence_len_y=cfg_sequence_len_y,
                    steps_ahead=cfg_steps_ahead
                    ):
    data_x = data_y = data

    if sequence_len_y > 1:
        for i in range(1, sequence_len_y):
            data_y = np.column_stack((data_y[:-1], data[i:]))
        data_x = data_x[:-(sequence_len_y-1)]

    if steps_ahead > 1:
        data_x = data_x[:-(steps_ahead-1)]
        data_y = data_y[steps_ahead-1:]

    tsg_data = TimeseriesGenerator(data_x, data_y, length=sequence_len,
                                   sampling_rate=1, stride=1, batch_size=cfg_batch_size)
    # x, y = tsg_data[0]
    # print('\ttsg x.shape=', x.shape, '\n\tx=', x, '\n\ttsg y.shape=', y.shape, '\n\ty=', y)
    return tsg_data


def transform_invert(data, denorm, sequence_len=cfg_sequence_len, steps_ahead=cfg_steps_ahead):
    begin = sequence_len + steps_ahead -1           # indexing is from 0
    end = begin + len(denorm)
    Y = data[begin:end]                             # excludes the end index
    return denorm, Y


def fit_model(data_train, data_test, model, epochs, scaler, callbacks_list, teacher_forcing=cfg_teacher_forcing):
    trans_train = transform(data_train)
    norm_train, _ = normalize(trans_train, scaler)
    tsg_train = make_timeseries(norm_train)
    
    if teacher_forcing:
        for i in range(epochs):
            history = model.fit(tsg_train, epochs=1, batch_size=cfg_batch_size, shuffle=False, callbacks=callbacks_list)
            model.reset_states()
    else:
        trans_test = transform(data_test)
        norm_test, _ = normalize(trans_test, scaler)
        tsg_test = make_timeseries(norm_test)
        history = model.fit(tsg_train, epochs=epochs, callbacks=callbacks_list, validation_data=tsg_test)
    return model, history


def predict(data_test, model, scaler):
    trans_test = transform(data_test)
    norm_test, _ = normalize(trans_test, scaler)
    tsg_test = make_timeseries(norm_test)
    return model.predict(tsg_test)

def eval_predictions(pred_test, Y_test, model_type):
    print('\nEvaluation with real values - One step')
    results = [model_type]

    err_train = err_test = 0
    for m in ['SMAPE', 'MAPE', 'RMSE', 'R2', 'COSINE']:
        if m == 'SMAPE':
            err_test  = mods_utils.smape(Y_test, pred_test)
        elif m == 'RMSE':
            err_test  = mods_utils.rmse(Y_test, pred_test)
        elif m == 'R2':
            err_test  = mods_utils.r2(Y_test, pred_test)
        elif m == 'COSINE':
            err_test  = mods_utils.cosine(Y_test, pred_test)
        results.append([m, err_train, err_test])

    line = results[0]                   # model_type
    for r in results[1:]:
        line += '\t' + r[0] + '\t'      # SMAPE, MAPE, R2, COSINE
        line += '\t'.join(x if isinstance(x, str) else str("{0:0.4f}".format(x)) for x in r[2])  # test
    print(line)
    return line

def plot_predictions(pred_test, Y_test, multivariate,
                     fig_x=cfg_fig_size_x,
                     fig_y=cfg_fig_size_y
                     ):
    plt.rcParams["figure.figsize"] = (fig_x, fig_y)
    if multivariate > 1:
        fig, ax = plt.subplots(multivariate, sharex=False, figsize=(fig_x, multivariate*fig_y))
        for i in range(multivariate):
            ax[i].plot(Y_test[:, i])
            ax[i].plot(pred_test[:, i])
    else:
        fig, ax = plt.subplots(figsize=(fig_x, multivariate*fig_y))
        ax.plot(Y_test[:, 0])
        ax.plot(pred_test[:, 0])

    fig.tight_layout()
    plt.savefig('models/plot_image', bbox_inches='tight')
    plt.show()
    
    # np.savetxt(cfg.app_data_plot + "pred_test.tsv", pred_test, delimiter='\t')
    # np.savetxt(cfg.app_data_plot + "Y_test.tsv", Y_test, delimiter='\t')
    return



## Train data + scaler, Test data

In [4]:
data_train = read_data(data_train_filename)
trans_train = transform(data_train)
norm_train, scaler = normalize(trans_train)

# save scaler
scaler_filename = model_name + '.scaler'
joblib.dump(scaler, scaler_filename)
print('Scaler saved to: ', scaler_filename)

data_test = read_data(data_test_filename)

read_data:  data/data_train.tsv 	 5 int64 
 ['conn_count_uid_in', 'conn_count_uid_out', 'dns_count_uid_out', 'http_count_uid_in', 'ssl_count_uid_in']
Scaler saved to:  models/mods2_model.scaler
read_data:  data/data_test.tsv 	 5 int64 
 ['conn_count_uid_in', 'conn_count_uid_out', 'dns_count_uid_out', 'http_count_uid_in', 'ssl_count_uid_in']


## Transformer blocks

In [5]:
# https://keras.io/examples/timeseries/timeseries_transformer_classification/

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, 
        num_heads=num_heads, 
        dropout=dropout,
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    # outputs = layers.Dense(n_classes, activation="softmax")(x)
    outputs = layers.Dense(units=multivariate*cfg_sequence_len_y, activation="sigmoid")(x)
    return keras.Model(inputs, outputs)

## Create + compile model

In [6]:
print('Model typ: Transformer')
multivariate = data_train.shape[1]

if cfg_teacher_forcing:
    x = layers.Input(batch_shape=(cfg_batch_size, cfg_sequence_len, multivariate))
    h = layers.GRU(units=cfg.units, stateful=True, return_sequences=True)(x)         # activation='tanh'
    h = layers.Dropout(cfg_dropout_rate)(h)
    h = layers.GRU(units=cfg.units, stateful=True, return_sequences=False)(h)
    h = layers.Dropout(cfg_dropout_rate)(h)
    y = layers.Dense(units=multivariate*cfg_sequence_len_y, activation='sigmoid')(h)    
elif cfg_transformer:
    x = layers.Input(shape=(cfg_sequence_len, multivariate))
    input_shape = x.shape[1:]
    
    model = build_model(
        input_shape,
        head_size=cfg_mlp_units,
        num_heads=4,
        ff_dim=4,
        num_transformer_blocks=4,
        mlp_units=[256],
        mlp_dropout=cfg_dropout_rate,
        dropout=cfg_dropout_rate,
    )
else:
    x = layers.Input(shape=(cfg_sequence_len, multivariate)) 
    # GRU
    h = layers.GRU(units=cfg_rnn_units, return_sequences=True)(x)     
    h = layers.Dropout(cfg_dropout_rate)(h)
    h = layers.GRU(units=cfg_rnn_units, return_sequences=False)(h)
    h = layers.Dropout(cfg_dropout_rate)(h)
    # Adding the output layer:
    y = layers.Dense(units=multivariate*cfg_sequence_len_y, activation='sigmoid')(h)
    
    model = Model(inputs=x, outputs=y)

# compile model
loss ='mean_squared_error'
opt = keras.optimizers.Adam(learning_rate=1e-3)
metrics=['mse', 'mae'],     # 'cosine', 'mape'
# loss = "sparse_categorical_crossentropy"
# opt = keras.optimizers.Adam(learning_rate=1e-4),
# metrics = ["sparse_categorical_accuracy"]

model.compile(loss=loss, optimizer=opt, metrics=metrics)
print(model.summary())

Model typ: Transformer


2023-03-24 11:22:37.210411: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-24 11:22:37.210916: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-24 11:22:37.215619: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-24 11:22:37.216069: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-24 11:22:37.216506: I tensorflow/compiler/xla/stream_executo

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 24, 5)]      0           []                               
                                                                                                  
 layer_normalization (LayerNorm  (None, 24, 5)       10          ['input_2[0][0]']                
 alization)                                                                                       
                                                                                                  
 multi_head_attention (MultiHea  (None, 24, 5)       11781       ['layer_normalization[0][0]',    
 dAttention)                                                      'layer_normalization[0][0]']    
                                                                                              

 mbda)                                                            'tf.__operators__.add_5[0][0]'] 
                                                                                                  
 layer_normalization_7 (LayerNo  (None, 24, 5)       10          ['tf.__operators__.add_6[0][0]'] 
 rmalization)                                                                                     
                                                                                                  
 conv1d_6 (Conv1D)              (None, 24, 4)        24          ['layer_normalization_7[0][0]']  
                                                                                                  
 dropout_7 (Dropout)            (None, 24, 4)        0           ['conv1d_6[0][0]']               
                                                                                                  
 conv1d_7 (Conv1D)              (None, 24, 5)        25          ['dropout_7[0][0]']              
          

## Fit + save model

In [None]:
# tensorboard = TensorBoard(log_dir='logs/{}'.format(time()))
earlystops = EarlyStopping(
    monitor='loss', 
    patience=cfg_epochs_patience, 
    verbose=1, 
    restore_best_weights=True
)
callbacks_list = [earlystops]

# fit model
model, history = fit_model(
    data_train, 
    data_test, 
    model, 
    cfg_num_epochs, scaler, callbacks_list
)

# model.evaluate(x_test, y_test, verbose=1)

# save model
model.save(model_name)
print('\nSave trained model: ', model_name)

Epoch 1/100


2023-03-24 11:22:42.295001: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8100
2023-03-24 11:22:42.678763: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7f5928866bf0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-03-24 11:22:42.678776: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): NVIDIA GeForce RTX 2080 Ti, Compute Capability 7.5
2023-03-24 11:22:42.678780: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (1): NVIDIA GeForce RTX 2080 Ti, Compute Capability 7.5
2023-03-24 11:22:42.681599: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-03-24 11:22:42.764121: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
 241/1402 [====>.........................] - ETA: 10s - loss: 0.0102 - mse: 0.0102 - mae: 0.0665

In [None]:
# plot
# print(history.history.keys())
plt.rcParams["figure.figsize"] = (8, 6)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')

## Predict and Update: Incremental or Teacher forcing

In [None]:
if cfg_teacher_forcing:
    data_train_copy = data_train
    data_test_copy = data_test
    # data_test_copy = data_test[0:50,:]

    padding = cfg_sequence_len + cfg_steps_ahead + cfg_sequence_len_y -1
    pred_model = None

    for i in range(data_test_copy.shape[0] - padding):
        if i > 0:
            fit_model(data_train_copy, model, cfg_num_epochs_update, scaler, callbacks_list)
        data = data_test[i:i+padding,:]
        pred = predict(data, model, scaler)
        pred_model = np.vstack((pred_model, pred)) if pred_model is not None else pred
        
        # TODO: peak detection
        data = transform(data)
        data_train_copy = np.vstack((data_train_copy, data))    
        print(i)
        
    # Save incremental model
    model_name = model_name + '_incremental'
    model.save(model_name)
    print('\nSave trained model: ', model_name)
    
    # Evaluation + plot (incremental)
    eval_line = ''
    for i in range(cfg_sequence_len_y):
        one_y_test = pred_model[:, i * multivariate:(i+1) * multivariate]
        denorm_test = scaler.inverse_transform(one_y_test)
        pred_test, Y_test = transform_invert(data_test, denorm_test, cfg_sequence_len, cfg_steps_ahead)

        # Evaluate with real values
        eval_line += str(i+1) + '\t' + eval_predictions(pred_test, Y_test, 'LSTM') + '\n'

        # Plot
        plot_predictions(pred_test, Y_test, multivariate)

## Predict + Evaluation + Plot

In [None]:
pred_model = predict(data_test, model, scaler)

eval_line = ''
for i in range(cfg_sequence_len_y):
    one_y_test = pred_model[:, i * multivariate:(i+1) * multivariate]
    denorm_test = scaler.inverse_transform(one_y_test)
    pred_test, Y_test = transform_invert(data_test, denorm_test, cfg_sequence_len, cfg_steps_ahead)

    # Evaluate with real values
    eval_line += str(i+1) + '\t' + eval_predictions(pred_test, Y_test, 'NN') + '\n'
    
# Plot
if cfg_sequence_len_y == 1:
    plot_predictions(pred_test, Y_test, multivariate)