In [None]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler

import os
import sys
sys.path.append('../')

from dataloader.dataloader import *
from training.training import *
from utils.utils import *
from visualizations.visualizations import *
from evaluation.evaluation import *

In [None]:
data_folder = "../Data/Data v5"

df = pd.read_csv(os.path.join(data_folder, "amari_ue_data_final_v5.csv"))
df = df.sort_values(["imeisv", "_time"], ascending = True)

In [None]:
df['imeisv'] = df['imeisv'].astype(str)

In [None]:
feature_columns = [
    'dl_bitrate','ul_bitrate', 
    'cell_x_dl_retx', 'cell_x_dl_tx',
    'cell_x_ul_retx', 'cell_x_ul_tx',
    'ul_total_bytes_non_incr', 'dl_total_bytes_non_incr'
    ]

store_columns = ['_time', 'imeisv'] + feature_columns

### Apply Smoothing

In [None]:
imeisv_df_for_ma = {}

for imeisv, imeisv_df in df.groupby('imeisv'):
    imeisv_df[feature_columns] = imeisv_df[feature_columns].rolling(window=10).mean()

    imeisv_df_for_ma[str(imeisv)] = imeisv_df
            
imeisv_ma_df = pd.concat(list(imeisv_df_for_ma.values()))

In [None]:
imeisv_ma_df.to_csv(os.path.join(data_folder, "amari_ue_data_final_v5_smoothed.csv"), index = False)

### Scaling TS

In [None]:
imeisv_dfs_scaled = {}

for imeisv, imeisv_df in df.groupby('imeisv'):
    scaler = StandardScaler()
    
    scaled_data = scaler.fit_transform(imeisv_df[feature_columns])
    imeisv_df[feature_columns] = scaled_data
        
    imeisv_dfs_scaled[imeisv] = imeisv_df

In [None]:
final_df = pd.concat(list(imeisv_dfs_scaled.values()))

In [None]:
final_df.to_csv(os.path.join(data_folder, "amari_ue_data_final_v5_smoothed_scaled.csv"), index = False)

### Scaling TS (separately)

In [None]:
imeisv_df_no_outliers_scaled_sep = {}

for imeisv, df in imeisv_df_for_ma.items():
    benign_df = df[df['label'] == 0].copy()
    malicious_df = df[df['label'] == 1].copy()
    
    benign_scaler = StandardScaler()
    malicious_scaler = StandardScaler()
    
    scaled_benign_data = benign_scaler.fit_transform(benign_df[feature_columns])
    benign_df[feature_columns] = scaled_benign_data
    
    if malicious_df.shape[0] > 0:
        scaled_malicious_data = malicious_scaler.fit_transform(malicious_df[feature_columns])
        malicious_df[feature_columns] = scaled_malicious_data
    
    imeisv_df_no_outliers_scaled_sep[imeisv] = pd.concat(
        [benign_df,malicious_df], axis = 0
        ).sort_values(['imeisv','_time'], ascending= True)

In [None]:
final_df = pd.concat(list(imeisv_df_no_outliers_scaled_sep.values()))

In [None]:
final_df.to_csv(os.path.join(data_folder, "amari_ue_data_final_v5_smoothed_scaled_sep.csv"), index = False)