In [1]:
import torch
from torch import nn
import data_utils
from training.MADGAN_train import MadGanTrainingPipeline
from models.MADGAN import Generator, Discriminator, AnomalyDetector
from utils import evaluation
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Hyperparameters

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE)

cuda


In [3]:
model_type = "MAD-GAN"
num_features = 6
seq_len = 30
seq_stride = 10
gen_seq_len = seq_len

random_seed = 0
num_epochs = 100
batch_size = 256
lr = 1e-5
wd = 5e-7
latent_dim = 250
hidden_dim = 500
'''num_generated_features = 121
seq_length = 30
seq_stride = 10
gen_seq_length = seq_length
batch_size = 256
latent_dim = 32
hidden_dim = 100
num_epochs = 50'''

'num_generated_features = 121\nseq_length = 30\nseq_stride = 10\ngen_seq_length = seq_length\nbatch_size = 256\nlatent_dim = 32\nhidden_dim = 100\nnum_epochs = 50'

# Load data

In [4]:
dataset = "kdd99_small"

In [5]:
if dataset == "kdd99_small":
    train_dl, test_dl = data_utils.kdd99(seq_len, seq_stride, num_features, gen_seq_len, batch_size)
elif dataset == "kdd99_large":
    train_dl_normal = data_utils.large_kdd99('data/kdd99/X_train_normal.npy', seq_length, seq_stride, num_features, gen_seq_len,batch_size)
    train_dl_anomaly = data_utils.large_kdd99('data/kdd99/X_train_anomaly.npy', seq_length, seq_stride, num_features, gen_seq_len,batch_size)
    test_dl_normal = data_utils.large_kdd99('data/kdd99/X_test_normal.npy', seq_length, seq_stride, num_features, gen_seq_len,batch_size)
    test_dl_anomaly = data_utils.large_kdd99('data/kdd99/X_test_anomaly.npy', seq_length, seq_stride, num_features, gen_seq_len,batch_size)
elif dataset == "apple":
    file_path = './data/Stocks/aapl.us.txt'
    tscv_dl_list = data_utils.load_stock_as_crossvalidated_timeseries(file_path, seq_length, seq_stride, gen_seq_len, batch_size, normalise=True)

# Model

In [6]:
generator = Generator(
    latent_space_dim=latent_dim,
    hidden_units=hidden_dim,
    output_dim=num_features)
generator.to(device=DEVICE)

Generator(
  (lstm): LSTM(250, 500, num_layers=2, batch_first=True, dropout=0.1)
  (linear): Linear(in_features=500, out_features=6, bias=True)
)

In [7]:
discriminator = Discriminator(input_dim=num_features,
    hidden_units=hidden_dim,
    add_batch_mean=False)
discriminator.to(device=DEVICE)

Discriminator(
  (lstm): LSTM(6, 500, num_layers=2, batch_first=True, dropout=0.1)
  (linear): Linear(in_features=500, out_features=1, bias=True)
  (activation): Sigmoid()
)

# Loss and Optimizer

In [8]:
def loss_function(inputs, targets):
    return nn.BCELoss()(inputs, targets)

In [9]:
discriminator_optim = torch.optim.Adam(discriminator.parameters(), lr=lr, weight_decay=wd)
generator_optim = torch.optim.Adam(generator.parameters(), lr=lr, weight_decay=wd)

# Train

In [10]:
pipeline = MadGanTrainingPipeline()

In [None]:
pipeline.train_kdd99(seq_len, latent_dim, train_dl, test_dl, discriminator, generator, discriminator_optim, generator_optim, loss_function, random_seed, num_epochs, DEVICE)

Epoch 0 training:
G_loss: 0.8755987904288552, D_loss_real: 0.6730850502171299, D_loss_fake: 0.642115441235629
Epoch 1 training:
G_loss: 0.7226960534399206, D_loss_real: 0.6414711528203704, D_loss_fake: 0.7275869418274272
Epoch 2 training:
G_loss: 0.7890111153775995, D_loss_real: 0.8685352173718539, D_loss_fake: 0.6837180996483022
Epoch 3 training:
G_loss: 0.7276195895942774, D_loss_real: 0.5772994385524229, D_loss_fake: 0.7857272851196202
Epoch 4 training:
G_loss: 0.8343150458552621, D_loss_real: 0.8043478930538351, D_loss_fake: 0.6124852528626269
Epoch 5 training:
G_loss: 0.6839621858163314, D_loss_real: 0.6324284875934775, D_loss_fake: 0.7519195212559266
Epoch 6 training:
G_loss: 0.7711682853373614, D_loss_real: 0.7660209235819904, D_loss_fake: 0.6532013196836818
Epoch 7 training:
G_loss: 0.7274317023429003, D_loss_real: 0.6635470749302343, D_loss_fake: 0.7017906961115924
Epoch 8 training:
G_loss: 0.7561856724999168, D_loss_real: 0.7215988039970398, D_loss_fake: 0.6707007857886228
Ep

In [None]:
from models.MADGAN import AnomalyDetector
ad = AnomalyDetector(discriminator=discriminator, generator=generator, latent_space_dim=latent_dim, anomaly_threshold=0.5, DEVICE=DEVICE)

In [None]:
from utils.evaluation import accuracy, precision, recall, metric_calc
def evaluate(model, test_dl, label, DEVICE):
    total_em = total_mv = total_acc = total_pre = total_rec = 0
    for X, Y in test_dl:
        prediction = model.predict(X.to(DEVICE))
        true_positives, true_negatives, false_positives, false_negatives = metric_calc(prediction.squeeze(dim=2), Y.squeeze(dim=2), label)
        total_acc += accuracy(true_positives, true_negatives, Y)
        if (true_positives+false_positives) > 0:
            total_pre += precision(true_positives, false_positives)
        if (true_positives+false_negatives) > 0:
            total_rec += recall(true_positives, false_negatives)
        em, mv = pipeline.emmv(model, X.to(DEVICE), DEVICE=DEVICE)
        total_mv += mv
        total_em += em
    print("Acc: {0}, Pre: {1}, Rec: {2}".format(total_acc/len(test_dl), total_pre/len(test_dl), total_rec/len(test_dl)))
    print("EM: {0}, MV: {1}".format(total_em/len(test_dl), total_mv/len(test_dl)))

In [None]:
evaluate(ad, test_dl, label=1, DEVICE=DEVICE)