In [None]:
import pandas as pd
import torch
import os
import itertools
import uuid

import sys
sys.path.append('../')


from dataloader.dataloader import *
from training.training import *
from models.rae import *
from utils.utils import *
from visualizations.visualizations import *
from evaluation.evaluation import *

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Training on GPU.")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Training on CPU.")

In [None]:
data_folder = "../Data/Data v5"


# df = pd.read_csv(os.path.join(data_folder, "amari_ue_data_final_v5_no_outliers.csv"))
# df = df.sort_values(["imeisv", "_time"], ascending = True)
# dataset_used = 'no_outliers'

df = pd.read_csv(os.path.join(data_folder, "amari_ue_data_final_v5_no_outliers_scaled.csv"))
df = df.sort_values(["imeisv", "_time"], ascending = True)
dataset_used = 'no_outliers_scaled'

# df = pd.read_csv(os.path.join(data_folder, "amari_ue_data_final_v5_no_outliers_scaled_sep.csv"))
# df = df.sort_values(["imeisv", "_time"], ascending = True)
# dataset_used = 'no_outliers_scaled_sep'

In [None]:
# feature_columns = [
#     'dl_bitrate','ul_bitrate', 
#     'cell_x_dl_retx', 'cell_x_dl_tx',
#     'cell_x_ul_retx', 'cell_x_ul_tx',
#     'ul_total_bytes_non_incr', 'dl_total_bytes_non_incr'
# ]

# feature_columns = [
#     'dl_bitrate','ul_bitrate','ul_total_bytes_non_incr', 'dl_total_bytes_non_incr'
# ]

feature_columns = [
    'ul_bitrate'
]

In [None]:
rolling_avg_label = False # True for applying rolling avg

if rolling_avg_label:
    df[feature_columns] = df[feature_columns].rolling(window=360).mean()

In [None]:
benign_data_starting_point = "2024-03-20 14:14:50.19"
benign_data_ending_point = "2024-03-23 16:26:19.00"


filter_1 = (df['_time'].between(benign_data_starting_point, benign_data_ending_point))
filter_2 = (~df['imeisv'].isin(['8642840401594200', '8642840401612300','8642840401624200','3557821101183501']))

df.drop(df[filter_1 & filter_2].index, inplace = True)
benign_data = df[df['label'] == 0].copy()
benign_data = benign_data.sort_values(['imeisv','_time'])
print(benign_data.shape[0])
malicious_data = df[df['label'] == 1].copy()
malicious_data = malicious_data.sort_values(['imeisv','_time'])
print(malicious_data.shape[0])

In [None]:
search_space_dict = {
    'window_size': [60, 90, 120], 
    'batch_size' : [32],
    'model_arch_config': [
        {
        'hidden_dim1' : 25, 
        'hidden_dim2' : 50, 
        },
        {
        'hidden_dim1' : 50, 
        'hidden_dim2' : 100, 
        },
        {
        'hidden_dim1' : 25, 
        'hidden_dim2' : 25, 
        },
        {
        'hidden_dim1' : 50, 
        'hidden_dim2' : 50, 
        }
    ],
    'dropout':[0.2, 0.3],
    'layer_norm_flag':[True, False],
    'loss_function' : [nn.L1Loss, nn.MSELoss], 
    'lr' : [1e-4, 1e-3],
    'num_epochs':[52]
    
}

In [None]:
for window_size, batch_size in itertools.product(search_space_dict['window_size'], search_space_dict['batch_size']):
    for config in search_space_dict['model_arch_config']:
        hidden_dim1 = config['hidden_dim1']
        hidden_dim2 = config['hidden_dim2']
        for dropout, layer_norm_flag, loss_function, lr, num_epochs in itertools.product(
            search_space_dict['dropout'], search_space_dict['layer_norm_flag'],
            search_space_dict['loss_function'], search_space_dict['lr'],
            search_space_dict['num_epochs']):
            
            experiment_id = str(uuid.uuid4())
            
            parameters = {
                'window_size': window_size,
                'step_size': int(window_size/3),
                'batch_size': batch_size,
                'hidden_dim1': hidden_dim1,
                'hidden_dim2': hidden_dim2,
                'dropout': dropout,
                'layer_norm_flag': layer_norm_flag,
                'loss_function': loss_function,
                'lr': lr,
                'num_epochs': num_epochs
            }
            
            print("##########################################################")
            print_parameters(parameters, experiment_id)
            
            train_data_loader, val_data_loader, mal_data_loader = create_ds_loader(
                benign_data, 
                malicious_data, 
                parameters['window_size'], 
                parameters['step_size'], 
                feature_columns, 
                parameters['batch_size']
            )
            
            rae_model = LSTMAutoencoder(
                input_dim = len(feature_columns), 
                hidden_dim1 = hidden_dim1, 
                hidden_dim2 = hidden_dim2, 
                output_dim = len(feature_columns), 
                dropout = dropout, 
                layer_norm_flag = layer_norm_flag
            )

            rae_model.to(device)

            early_stopping = EarlyStopping(patience=7, min_delta=0.)
            criterion = loss_function()
            
            history = rae_model.train_model(
                num_epochs = parameters['num_epochs'], 
                early_stopping = early_stopping, 
                train_data_loader = train_data_loader, 
                val_data_loader = val_data_loader, 
                mal_data_loader = mal_data_loader, 
                device = device, 
                criterion = criterion,  
                lr = lr
            )
            
            parameters['loss_function'] = parameters['loss_function'].__name__
            
            additional_metadata = {
                "rolling_avg": rolling_avg_label,
                "feature_columns": feature_columns,
                'dataset_used': dataset_used
            }
            
            
            save_experiment_results(history, parameters, additional_metadata, experiment_id, results_dir='../results')
            
            del rae_model
            torch.cuda.empty_cache()