In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

from util.TrainRoutine import AutoEncTrainRoutine
from util.DPMERFGenerator import DPMERFGenerator
from util.Evaluator import Evaluator

  from .autonotebook import tqdm as notebook_tqdm


We want to use the DPMERF algorithm on time series data. But we will not work on time series directly, because it does not model the temporal relation correctly. Instead we will train an AE first and use DPMERF on the encoding space, where there is no notion of time and ordering.

In [2]:
emb_dim = 32

In [3]:
ae = AutoEncTrainRoutine(emb_dim=emb_dim)

------------------------------
Initialising Autoencoder with:
RecurrentAutoencoder(
  (encoder): Encoder(
    (rnn1): LSTM(1, 64, batch_first=True)
    (rnn2): LSTM(64, 32, batch_first=True)
  )
  (decoder): Decoder(
    (rnn1): LSTM(32, 32, batch_first=True)
    (rnn2): LSTM(32, 64, batch_first=True)
    (output_layer): Linear(in_features=64, out_features=1, bias=True)
  )
)
Training on cuda
------------------------------


In [None]:
model, history = ae.train_model()

In [None]:
ae.save_model(f"lstmae_180_embed{emb_dim}.pth")

In [4]:
ae.load_model(f"lstmae_180_embed{emb_dim}.pth")

loading AE model from models/lstmae_180_embed32.pth


In [None]:
plt.plot(history["train"])
plt.plot(history["val"])

In [5]:
ae.encode_train_data("data/normal_train_180.csv", f"data/normal_training_encoded_embed{emb_dim}.csv")

saving encoded training data in data/normal_training_encoded_embed32.csv


array([[ 0.0017655 ,  0.0117736 ,  0.00665318, ..., -0.01667845,
        -0.01867547, -0.01862446],
       [ 0.12170709,  0.09850525,  0.10154928, ...,  0.04912138,
         0.05809647,  0.0795287 ],
       [ 0.03858255,  0.04192922,  0.04189727, ...,  0.04460893,
         0.04526051,  0.03903734],
       ...,
       [ 0.18670821,  0.13367577,  0.1391294 , ...,  0.12226385,
         0.11944252,  0.10852771],
       [ 0.04104304,  0.07378402,  0.0562855 , ...,  0.02511665,
         0.01644596,  0.011526  ],
       [ 0.0239025 ,  0.02184634,  0.01873173, ...,  0.0328071 ,
         0.02557669,  0.02134343]], dtype=float32)

In [7]:
dpmerfgen_npriv = DPMERFGenerator(input_size=20)

In [8]:
dpmerfgen_npriv.train_generator(mini_batch_size=0.1, lr=1e-2)

TypeError: train_generator() missing 1 required positional argument: 'data'

In [None]:
dpmerfgen_npriv.generate(fname=f"enc_gen_embed{emb_dim}.csv")

In [None]:
decoded_gen_embed = ae.decode_data(path_encoded_data=f"data/generated/enc_gen_embed{emb_dim}.csv")
pd.DataFrame(decoded_gen_embed).to_csv(f"data/generated/normal_train_180_embed{emb_dim}_dpmerf.csv")

In [None]:
plt.plot(decoded_gen_embed[30])

In [None]:
ae_gen = AutoEncTrainRoutine(training_data_path=f"data/generated/normal_train_180_embed{emb_dim}_dpmerf.csv")

In [None]:
model, history = ae_gen.train_model()

In [None]:
plt.plot(history["train"])
plt.plot(history["val"])

In [None]:
ae_gen.save_model(f"dpmerf_embed{emb_dim}.pth")

In [None]:
evaluator = Evaluator(ae_gen.test_normal_ds, ae_gen.test_anomalie_ds, ae_gen.model)

In [None]:
plt.figure()

plt.subplot(2,1,1)
plt.hist(evaluator.loss_normal, bins=100)
plt.title("Normal")

plt.subplot(2,1,2)
plt.hist(evaluator.loss_anomaly, bins=100)
plt.title("Anomaly")

plt.show()

In [None]:
corr_normal, corr_anomaly = evaluator.find_threshold()
plt.plot(np.linspace(0,5,21), corr_normal, marker="o")
plt.plot(np.linspace(0,5,21), corr_anomaly, marker="x")
plt.xlabel("Threshold")
plt.ylabel("Percentage correct")
plt.legend(["Anomaly", "Normal"])
plt.show()

In [None]:
predictions_normal, predictions_anomaly = evaluator.predict_class(3.75)


In [None]:
evaluator.evaluate(predictions_normal, predictions_anomaly)