In [None]:
import pandas as pd


import os

import sys
sys.path.append('../')

import torch.nn as nn
import torch

from dataloader.dataloader import *
from training.training import *
from models.rae import *
from utils.utils import *
from visualizations.visualizations import *
from evaluation.evaluation import *

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Training on GPU.")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Training on CPU.")

In [None]:
data_folder = "../Data/Data v5"
df = pd.read_csv(os.path.join(data_folder, "amari_ue_data_final_v5_no_outliers_scaled.csv"))
df = df.sort_values(["imeisv", "_time"], ascending = True)

In [None]:
# feature_columns = [
#     'dl_bitrate','ul_bitrate', 
#     'cell_x_dl_retx', 'cell_x_dl_tx',
#     'cell_x_ul_retx', 'cell_x_ul_tx',
#     'ul_total_bytes_non_incr', 'dl_total_bytes_non_incr'
# ]

# feature_columns = [
#     'dl_bitrate','ul_bitrate','ul_total_bytes_non_incr', 'dl_total_bytes_non_incr'
# ]

feature_columns = [
    'ul_bitrate'
]

In [None]:
benign_data_starting_point = "2024-03-20 14:14:50.19"
benign_data_ending_point = "2024-03-23 16:26:19.00"


filter_1 = (df['_time'].between(benign_data_starting_point, benign_data_ending_point))
filter_2 = (~df['imeisv'].isin(['8642840401594200', '8642840401612300','8642840401624200','3557821101183501']))

df.drop(df[filter_1 & filter_2].index, inplace = True)
benign_data = df[df['label'] == 0].copy()
benign_data = benign_data.sort_values(['imeisv','_time'])
print(benign_data.shape[0])
malicious_data = df[df['label'] == 1].copy()
malicious_data = malicious_data.sort_values(['imeisv','_time'])
print(malicious_data.shape[0])

In [None]:
train_data_loader, val_data_loader, mal_data_loader = create_ds_loader(benign_data, malicious_data, 120, 40, feature_columns, 32)

## Recurrent Autoencoder

In [None]:
rae_model = LSTMAutoencoder(
    input_dim = len(feature_columns), 
    hidden_dim1 = 25, 
    hidden_dim2 = 50, 
    output_dim = len(feature_columns), 
    dropout = 0.2, 
    layer_norm_flag = True
)

rae_model.to(device)

early_stopping = EarlyStopping(patience=7, min_delta=0.)
criterion = nn.L1Loss()


In [None]:
history = rae_model.train_model(
    num_epochs = 52, 
    early_stopping = early_stopping, 
    train_data_loader = train_data_loader, 
    val_data_loader = val_data_loader, 
    mal_data_loader = mal_data_loader, 
    device = device, 
    criterion = criterion,  
    lr = 0.001
)

In [None]:
plot_train_val_loss(history.train_losses, history.val_losses)

In [None]:
# test_batch_size = 1
# benign_test_data_loader, mal_test_data_loader = create_test_ds_loaders(
#     benign_data, 
#     malicious_data, 
#     250, 
#     30, 
#     features = feature_columns, 
#     batch_size = test_batch_size
# )

In [None]:
#benign_test_losses, mal_test_losses = evaluate(rae_model, criterion, benign_test_data_loader, mal_test_data_loader, device) len(val_data_loader), len(mal_data_loader)
benign_test_losses, mal_test_losses = evaluate(rae_model, criterion, val_data_loader, mal_data_loader, device)

In [None]:
plot_scatter_plot_rec_loss(benign_test_losses, mal_test_losses)

In [None]:
fpr, tpr, thresholds, roc_auc, optimal_threshold = calculate_threshold(benign_test_losses, mal_test_losses)

In [None]:
plot_roc_curve(fpr, tpr, thresholds , roc_auc)

In [None]:
accuracy, precision, recall, f1, tp_rate, tn_rate, fp_rate, fn_rate = infer(benign_test_losses, mal_test_losses, optimal_threshold)