In [None]:
import torch
import pandas as pd
import numpy as np

import sys
sys.path.append('../')


from dataloader.dataloader import *
from training.training import *
from models.rae import *
from utils.utils import *
from visualizations.visualizations import *
from evaluation.evaluation import *

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Training on GPU.")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Training on CPU.")

In [None]:
data_folder = "../Data/Data v5"

In [None]:
df = pd.read_csv(os.path.join(data_folder, "amari_ue_data_final_v5_no_outliers_scaled.csv"))
df = df.sort_values(["imeisv", "_time"], ascending = True)

benign_data_starting_point = "2024-03-20 14:14:50.19"
benign_data_ending_point = "2024-03-23 16:26:19.00"


filter_1 = (df['_time'].between(benign_data_starting_point, benign_data_ending_point))
filter_2 = (~df['imeisv'].isin(['8642840401594200', '8642840401612300','8642840401624200','3557821101183501']))

df.drop(df[filter_1 & filter_2].index, inplace = True)

In [None]:
benign_data = df[df['label'] == 0].copy()
benign_data = benign_data.sort_values(['imeisv','_time'])
print(benign_data.shape[0])
malicious_data = df[df['label'] == 1].copy()
malicious_data = malicious_data.sort_values(['imeisv','_time'])
print(malicious_data.shape[0])

In [None]:
f = open("../results/experiments_metadata.json")
exp_metadata = json.load(f)

In [None]:
data_for_df = {}
for exp_id, exp_info in exp_metadata.items():
    parameters = exp_info['parameters']
    flattened_info = {
        **parameters,
        'epochs_trained':exp_info['epochs_trained'], 
        'results_file': exp_info['results_file'], 
        'timestamp': exp_info['timestamp'],
        'min_train_loss': exp_info['min_train_loss'],
        'min_val_loss': exp_info['min_val_loss'],
        'min_train_val_gap': exp_info['min_train_val_gap'],
        'features': exp_info['feature_columns'],
        'rolling_avg': exp_info['rolling_avg'],
        'dataset_used': exp_info['dataset_used']
        }
    data_for_df[exp_id] = flattened_info

exp_df = pd.DataFrame.from_dict(data_for_df, orient='index')

In [None]:
exp_df.sort_values('min_val_loss', ascending = True).head(3)

In [None]:
exp_parameters = exp_metadata['5bfa52f8-e8c6-4899-963d-3ebd80be60f9']

In [None]:
exp_hist = load_history_with_pickle('../results/5bfa52f8-e8c6-4899-963d-3ebd80be60f9_history.pkl') 

### Plot training validation loss

In [None]:
plot_train_val_loss(exp_hist.train_losses, exp_hist.val_losses)

### Plot rec loss as scatterplot

In [None]:
test_batch_size = 1
benign_test_data_loader, mal_test_data_loader = create_test_ds_loaders(
    benign_data, 
    malicious_data, 
    120, 
    30, 
    features = ['ul_bitrate'], 
    batch_size = test_batch_size
)

In [None]:
rae_model = LSTMAutoencoder(
    input_dim = len(exp_parameters['feature_columns']), 
    hidden_dim1 = exp_parameters['parameters']['hidden_dim1'], 
    hidden_dim2 = exp_parameters['parameters']['hidden_dim2'], 
    output_dim = len(exp_parameters['feature_columns']), 
    dropout = exp_parameters['parameters']['dropout'], 
    layer_norm_flag = exp_parameters['parameters']['layer_norm_flag']
)

rae_model.load_state_dict(exp_hist.model_weights)
rae_model.to(device)

criterion = nn.L1Loss() if exp_parameters['parameters']['loss_function'] == 'L1Loss' else nn.MSELoss()

In [None]:
benign_test_losses, mal_test_losses = evaluate(rae_model, criterion, benign_test_data_loader, mal_test_data_loader, device)

In [None]:
plot_scatter_plot_rec_loss(benign_test_losses, mal_test_losses)

### Plot Roc Curve

In [None]:
fpr, tpr, thresholds, roc_auc, optimal_threshold = calculate_threshold(benign_test_losses, mal_test_losses)

In [None]:
plot_roc_curve(fpr, tpr, thresholds , roc_auc)

### Threshold selection & Inference

In [None]:
optimal_threshold

In [None]:
accuracy, precision, recall, f1, tp_rate, tn_rate, fp_rate, fn_rate = infer(benign_test_losses, mal_test_losses, optimal_threshold)

### Reconstructed Time Series Visualization

In [None]:
ue_benign_df = benign_data[
    benign_data['imeisv'] == 8628490433231157
].copy()

ue_malicious_df = malicious_data[
    malicious_data['imeisv'] == 8628490433231157
].copy()

In [None]:
ue_benign_data_loader, ue_mal_data_loader = create_test_ds_loaders(
    ue_benign_df, 
    ue_malicious_df, 
    120, 
    120, 
    features = ['ul_bitrate'], 
    batch_size = 1
)

In [None]:
ue_benign_rec = torch.cat([*map(lambda x: rae_model(x[0].to(device)).view(-1), ue_benign_data_loader)]).to('cpu').detach().numpy()
ue_benign = torch.cat([batch.view(-1) for batch, _ in ue_benign_data_loader]).to('cpu').detach().numpy()

ue_mal_rec = torch.cat([*map(lambda x: rae_model(x[0].to(device)).view(-1), ue_mal_data_loader)]).to('cpu').detach().numpy()
ue_mal = torch.cat([batch.view(-1) for batch, _ in ue_mal_data_loader]).to('cpu').detach().numpy()

In [None]:
plot_ts(ue_benign, ue_benign_rec, 'True Benign', 'Benign rec', range = [-5, 5])

In [None]:
plot_ts(ue_mal, ue_mal_rec, 'True Malicious', 'Malicious rec', range = [None, None])