In [None]:
import os

os.chdir('..')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch

import tqdm

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import average_precision_score

from torch.utils.data import DataLoader

from src.nids.memae import MemAE, EntropyLoss

from data.utils import load_kddcup99

## Hyperparameters

Hyperparameters were set as reported in the original paper, where possible, or copied from the authors' [repository](https://github.com/donggong1/memae-anomaly-detection). However, it should be noted that the code for experiments on KDD99 was not reported.

In [None]:
# Experimental setup

random_seed = 42
test_size = 0.5

# Hyperparameters

# From the code
num_epochs = 100

# From the original paper: [...] trained using the optimizer Adam [17] with a learning rate of 0.0001
learning_rate = 1e-4
# learning_rate = 1e-3

# Not reported in the original paper. In the video dataset, the authors used a batch size of 14, which is small for IDS datasets
batch_size = 16

mem_dim = 50

# From the original paper "In practice, alpha = 0.0002 leads to desirable results in all our experiments"
alpha = 0.0002

# From the code
shrink_thres = 0.0025

# NOTE: Results should be averaged across 20 runs


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
np.random.seed(random_seed)
torch.manual_seed(random_seed);

## KDD99

### Subsample selection

In the original paper, the authors report:
"Following the settings in [48], 80% of the samples labeled as “attack” in the original dataset are treated as normal samples."
This statement does not make a lot of sense: treating "attack" samples as normal sample would lead an anomaly detection model to learn wrong behavior.

The referenced paper is:
[48]: <i>Bo Zong, Qi Song, Martin Renqiang Min, Wei Cheng, Cristian Lumezanu, Daeki Cho, and Haifeng Chen. Deep autoencoding gaussian mixture model for unsupervised anomaly detection. In International Conference on Learning Representations, 2018.</i>

This paper reports: "We keep all the data samples labeled as “normal” and randomly draw samples labeled as “attack” so that the ratio between “normal” and “attack” is 4 : 1. In this way, we obtain a dataset with anomaly ratio 0.2" This seems more sensible.




In [None]:
# TODO: Use percent10=False to load the full dataset
X, y = load_kddcup99(percent10=True)

idx_normal = np.argwhere(y == 0).T[0]
idx_anomaly = np.argwhere(y == 1).T[0]

idx_anomaly_keep = np.random.choice(idx_anomaly, size=int(0.25 * idx_normal.shape[0]), replace=False)

idx = np.concatenate((idx_normal, idx_anomaly_keep))
idx = np.sort(idx)

X = X.iloc[idx].reset_index(drop=True)
y = y.iloc[idx].reset_index(drop=True)

y.value_counts()

In [None]:
y = y.to_numpy()

In [None]:
categorical_cols = ['protocol_type', 'service', 'flag']

In [None]:
## NOTE: The authors may have used this (wrong) preprocessing procedure

# X_cat = X[categorical_cols].to_numpy()
# X_num = X.drop(columns=categorical_cols).to_numpy()

# encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
# X_cat = encoder.fit_transform(X_cat)

# X = np.concatenate((X_cat, X_num), axis=1)

# print(X.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_seed)

# NOTE: This is how correct preprocessing should be implemented, but old papers often use the wrong one (above)

encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)

X_train_cat = X_train[categorical_cols].to_numpy()
X_train_num = X_train.drop(columns=categorical_cols).to_numpy()
X_train_cat = encoder.fit_transform(X_train_cat)


X_test_cat = X_test[categorical_cols].to_numpy()
X_test_num = X_test.drop(columns=categorical_cols).to_numpy()
X_test_cat = encoder.transform(X_test_cat)

X_train = np.concatenate((X_train_cat, X_train_num), axis=1).astype(np.float32)
X_test = np.concatenate((X_test_cat, X_test_num), axis=1).astype(np.float32)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(X_train.shape)

In [None]:
train_loader = DataLoader(torch.from_numpy(X_train), batch_size=batch_size, shuffle=True)
x_test_tensor = torch.from_numpy(X_test).to(device)

In [None]:
model = MemAE(
    num_features=X_train.shape[1],
    mem_dim=mem_dim,
    shrink_thres=shrink_thres
)

model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
mse_loss = torch.nn.MSELoss()
entropy_loss = EntropyLoss()
criterion = lambda pred, x: mse_loss(pred['output'], x) + entropy_loss(pred['att'])

In [None]:
for epoch in range(num_epochs):


    pbar = tqdm.tqdm(enumerate(train_loader), desc=f"Epoch {epoch + 1}/{num_epochs}")
    running_loss = 0.0
    for i, x_batch in pbar:
        x_batch = x_batch.to(device)

        optimizer.zero_grad()
        x_hat = model(x_batch)
        loss = criterion(x_hat, x_batch)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        pbar.set_postfix(loss=running_loss / (i + 1))

    with torch.no_grad():
        scores = model.evaluate_errors(x_test_tensor).cpu().numpy()
        average_precision = average_precision_score(y_test, scores)

    print(f"Epoch {epoch + 1}/{num_epochs} - Average precision: {average_precision:.4f}")