In [1]:
import torch
from torch import nn
import data_utils
from training.MADGAN_train import MadGanTrainingPipeline
from models.MADGAN import Generator, Discriminator, AnomalyDetector
from utils import evaluation

# Parameters

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
model_type = "MAD-GAN"
num_generated_features = 6
seq_length = 30
seq_stride = 10

random_seed = 0
num_epochs = 100
batch_size = 256
lr = 0.001
latent_dim = 15
hidden_dim = 100

# Load data

In [4]:
train_dl, test_dl = data_utils.load_kdd99(seq_length, seq_stride, num_generated_features,batch_size)

load kdd99_train from .npy
load kdd99_test from .npy


# Model

In [5]:
pipeline = MadGanTrainingPipeline()

In [6]:
generator = Generator(
    latent_space_dim=latent_dim,
    hidden_units=hidden_dim,
    output_dim=num_generated_features)
generator.to(DEVICE)

Generator(
  (lstm): LSTM(15, 100, num_layers=2, batch_first=True, dropout=0.1)
  (linear): Linear(in_features=100, out_features=6, bias=True)
)

In [7]:
discriminator = Discriminator(input_dim=num_generated_features,
    hidden_units=hidden_dim,
    add_batch_mean=False)
discriminator.to(DEVICE)

Discriminator(
  (lstm): LSTM(6, 100, num_layers=2, batch_first=True, dropout=0.1)
  (linear): Linear(in_features=100, out_features=1, bias=True)
  (activation): Sigmoid()
)

# Loss and Optimizer

In [8]:
def loss_function(inputs, targets):
    return nn.BCELoss()(inputs, targets)

In [9]:
discriminator_optim = torch.optim.Adam(discriminator.parameters(), lr=lr)
generator_optim = torch.optim.Adam(generator.parameters(), lr=lr)

# Train

In [10]:
pipeline.train(seq_length, latent_dim, train_dl, test_dl, discriminator, generator, discriminator_optim, generator_optim, 
                loss_function, random_seed, num_epochs, DEVICE)

Epoch 0training:
G_loss: 1.7593513584949754, D_loss_real: 0.7358834815465591, D_loss_fake: 0.6745875462889671
Evaluation metrics: {'D_loss': 3.2228572689807478, 'G_acc': 0.7926544252435623, 'D_acc': 0.7041554832010689}
Epoch 1training:
G_loss: 1.166362568329681, D_loss_real: 0.658841934190555, D_loss_fake: 0.6912643522024154
Evaluation metrics: {'D_loss': 2.166107392681695, 'G_acc': 0.7824344720131684, 'D_acc': 0.42945125023472497}
Epoch 2training:
G_loss: 1.8515564753250642, D_loss_real: 0.5888545121658932, D_loss_fake: 0.5103977670723742
Evaluation metrics: {'D_loss': 2.8735473304214874, 'G_acc': 0.6969164020811338, 'D_acc': 0.4478878229856491}
Epoch 3training:
G_loss: 1.3480521487918766, D_loss_real: 0.6355280322107402, D_loss_fake: 0.5964607914740389
Evaluation metrics: {'D_loss': 1.4254059559940675, 'G_acc': 0.2972450924197627, 'D_acc': 1.3267245119717455}
Epoch 4training:
G_loss: 1.851536323265596, D_loss_real: 0.5340758550573479, D_loss_fake: 0.43534032485701823
Evaluation metri

KeyboardInterrupt: 

# Evaluation

In [20]:
import numpy as np

In [11]:
def scoring_function(model, data):
    x = torch.tensor(data, dtype=torch.float32).unsqueeze(dim=0)
    out = model.predict(x).squeeze()
    return out.numpy()

In [12]:
def torch_scoring_function(model, data):
    return model.predict(data)

In [23]:
def torch_emmv_scores(trained_model, x, scoring_func=None, n_generated=10000, alpha_min=0.9, alpha_max=0.999,
                      t_max=0.9):
    # Get limits and volume support.
    lim_inf = torch.min(x.view(-1, 6), dim=0)[0]
    lim_sup = torch.max(x.view(-1, 6), dim=0)[0]
    offset = 1e-60  # to prevent division by 0

    # Volume support
    volume_support = torch.prod(lim_sup - lim_inf).item() + offset

    # Determine EM and MV parameters
    t = np.arange(0, 100 / volume_support, 0.01 / volume_support)
    axis_alpha = np.arange(alpha_min, alpha_max, 0.0001)

    unif = torch.rand(n_generated, x.size(1), x.size(2))
    m = lim_sup - lim_inf
    unif = unif * m
    unif = unif + lim_inf

    # Get anomaly scores
    anomaly_score = scoring_func(trained_model, x).view(-1, 1).detach().numpy()
    s_unif = scoring_func(trained_model, unif).view(-1, 1).detach().numpy()
    print(anomaly_score.shape,s_unif.shape)
    
    # Get EM and MV scores
    AUC_em, em, amax = evaluation.excess_mass(t, t_max, volume_support, s_unif, anomaly_score, n_generated)
    AUC_mv, mv = evaluation.mass_volume(axis_alpha, volume_support, s_unif, anomaly_score, n_generated)

    # Return a dataframe containing EMMV information
    scores = {
        'em': np.mean(em),
        'mv': np.mean(mv),
    }
    return scores

In [None]:
ad = AnomalyDetector(discriminator=discriminator, generator=generator, latent_space_dim=latent_dim, anomaly_threshold=0.5)

In [24]:
total_em = total_mv = total_acc = total_precision = total_recall = 0
for X, Y, P, PL in test_dl:
    prediction = ad.predict(X)
    true_positives = true_negatives = false_positives = false_negatives = 0
    for i in range(Y.size(0)):
        for j in range(Y.size(1)):
            cur_label = Y[i][j].item()
            cur_pred = prediction[i][j].item()
            if cur_label == 1 and cur_pred == 1:
                true_positives += 1
            elif cur_label == 1 and cur_pred == 0:
                false_negatives += 1
            elif cur_label == 0 and cur_pred == 1:
                false_positives += 1
            else:
                true_negatives += 1
    acc = (true_positives+true_negatives) / (Y.size(0)*Y.size(1))
    total_acc += acc
    if true_positives+false_positives > 0:
        precision = true_positives / (true_positives+false_positives)
        total_precision += precision
    if true_positives+false_negatives > 0:
        recall = true_positives / (true_positives+false_negatives)
        total_recall += recall

    scores = torch_emmv_scores(ad,X,torch_scoring_function)
    print(scores)
    
    total_em += scores['em']
    total_mv += scores['mv']
print(total_em/len(test_dl),total_mv/len(test_dl))
print(total_acc/len(test_dl),total_precision/len(test_dl),total_recall/len(test_dl))

abc
(7680, 1) (300000, 1)
{'em': 0.0001, 'mv': 14253.036804199219}
abc
(7680, 1) (300000, 1)
{'em': 0.0001, 'mv': 11815.361022949219}
abc
(7680, 1) (300000, 1)
{'em': 0.0001, 'mv': 8953.750305175781}
abc
(7680, 1) (300000, 1)
{'em': 0.0011373735166666664, 'mv': 6839.769287109375}
abc
(7680, 1) (300000, 1)
{'em': 0.0002725416333333333, 'mv': 7061.930943310547}
abc


KeyboardInterrupt: 