# USAD

## Environment

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split

from utils import *
from usad import *

In [None]:
!nvidia-smi -L

device = get_default_device()

## EDA - Data Pre-Processing

### Download dataset

In [None]:
file = "Station2_IP"
dataset = pd.read_csv(f'data/{file}.csv', nrows=30000)

dataset.head(2)

In [None]:
dataset["label"] = 0
dataset["Time"] = pd.to_datetime(dataset["Time"])
dataset["Time"] = dataset["Time"].diff()
dataset["Time"] = pd.to_numeric(dataset["Time"])/1e6 #time in milliseconds
dataset = dataset.drop(index=0)

normal, attack = train_test_split(dataset, test_size=0.5, shuffle=False)

### Normal period

In [None]:
normal.head(5)

In [None]:
print(f'{normal.memory_usage(deep=True).sum()} bytes')
print(f'{normal.memory_usage(deep=True).sum() / 2**20} MB')

In [None]:
# Transform all columns into float64
col = lambda column : [ord(char) for char in column]
for i in list(normal):
    normal[i]=normal[i].apply(lambda column: ''.join(map(str, col(str(column))))  if not str(column).replace(".", "").isdigit() else str(column).replace("," , "."))
normal = normal.astype(float)

In [None]:
print(f'{normal.memory_usage(deep=True).sum()} bytes')
print(f'{normal.memory_usage(deep=True).sum() / 2**20} MB')

In [None]:
normal.head(5)

In [None]:
temp = normal.drop(["label"], axis=1)
temp.to_csv(f'data/{file}_train.csv', header=False, index=False)

#### Normalization

In [None]:
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()

x = normal.values
x_scaled = min_max_scaler.fit_transform(x)
normal = pd.DataFrame(x_scaled)

In [None]:
normal.head(2)

### Attack

In [None]:
attack.head(5)

In [None]:
#Read data
labels = attack['label'].values

In [None]:
attack.head(5)

In [None]:
# Transform all columns into float64
col = lambda column : [ord(char) for char in column]
for i in list(attack):
    attack[i]=attack[i].apply(lambda column: ''.join(map(str, col(str(column))))  if not str(column).replace(".", "").isdigit() else str(column).replace("," , "."))
attack = attack.astype(float)

In [None]:
attack.head(5)

In [None]:
temp = attack.drop(["label"], axis=1)
temp.to_csv(f'data/{file}_test.csv', header=False, index=False)

#### Normalization

In [None]:
from sklearn import preprocessing

x = attack.values 
x_scaled = min_max_scaler.transform(x)
attack = pd.DataFrame(x_scaled)

In [None]:
attack.head(2)

### Windows

In [None]:
window_size=12

In [None]:
windows_normal=normal.values[np.arange(window_size)[None, :] + np.arange(normal.shape[0]-window_size)[:, None]]
windows_normal.shape

In [None]:
windows_attack=attack.values[np.arange(window_size)[None, :] + np.arange(attack.shape[0]-window_size)[:, None]]
windows_attack.shape

## Training

In [None]:
import torch.utils.data as data_utils

BATCH_SIZE =  7919
N_EPOCHS = 100
hidden_size = 100

w_size=windows_normal.shape[1]*windows_normal.shape[2]
z_size=windows_normal.shape[1]*hidden_size

windows_normal_train = windows_normal[:int(np.floor(.8 *  windows_normal.shape[0]))]
windows_normal_val = windows_normal[int(np.floor(.8 *  windows_normal.shape[0])):int(np.floor(windows_normal.shape[0]))]

train_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
    torch.from_numpy(windows_normal_train).float().view(([windows_normal_train.shape[0],w_size]))
) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

val_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
    torch.from_numpy(windows_normal_val).float().view(([windows_normal_val.shape[0],w_size]))
) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

test_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
    torch.from_numpy(windows_attack).float().view(([windows_attack.shape[0],w_size]))
) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

model = UsadModel(w_size, z_size)
model = to_device(model,device)

In [None]:
history = training(N_EPOCHS,model,train_loader,val_loader)

In [None]:
plot_history(history)

In [None]:
torch.save({
            'encoder': model.encoder.state_dict(),
            'decoder1': model.decoder1.state_dict(),
            'decoder2': model.decoder2.state_dict()
            }, "model.pth")

## Testing

In [None]:
checkpoint = torch.load("model.pth")

model.encoder.load_state_dict(checkpoint['encoder'])
model.decoder1.load_state_dict(checkpoint['decoder1'])
model.decoder2.load_state_dict(checkpoint['decoder2'])

In [None]:
results=testing(model,test_loader)

In [None]:
windows_labels=[]
for i in range(len(labels)-window_size):
    windows_labels.append(list(np.int_(labels[i:i+window_size])))

In [None]:
y_test = [1.0 if (np.sum(window) > 0) else 0 for window in windows_labels ]

In [None]:
y_pred=np.concatenate([torch.stack(results[:-1]).flatten().detach().cpu().numpy(),
                              results[-1].flatten().detach().cpu().numpy()])

In [None]:
y_pred[:5]

In [None]:
y_test[:5]

In [None]:
threshold=ROC(y_test,y_pred)