In [None]:
import pandas as pd
import numpy as np

from utils import read_data, iterate_batches, apply_adjustment, sliding_window_anomaly_detection, get_precision_recall_f1

window_size = 50  # size of the window
dataset_list = ["MSL", "PSM", "SMAP", "SMD", "SWAT"]
dataset = dataset_list[2]

# **** First train AE or VAE
flag_train_composite = True
flag_AE = False # True for AE, False for VAE

# # **** Then you are ready for AE-FAR or VAE-FAR!
# flag_train_composite = True
# flag_AE = True

n_epochs = 100

if flag_train_composite:
    if flag_AE:
        MODEL_PATH = "models/" + dataset + "/" + dataset + "-AE-FAR"
    else:
        MODEL_PATH = "models/" + dataset + "/" + dataset + "-VAE-FAR"
else:
    if flag_AE:
        MODEL_PATH = "models/" + dataset + "/" + dataset + "-AE"
    else:
        MODEL_PATH = "models/" + dataset + "/" + dataset + "-VAE"


train_data, test_data, val_data, test_labels = read_data(dataset)
input_dim = train_data.shape[1]

print("# of train: ", train_data.shape)
print("# of test: ", test_data.shape)
print("# of labels: ", test_labels.shape)

# of train:  (58317, 55)
# of test:  (73729, 55)
# of labels:  (73729,)


In [17]:
df_test_0 = test_labels[test_labels == 0]
df_test_1 = test_labels[test_labels == 1]

print("number of 1s in test: ", len(df_test_1))
print("number of 0s in test: ", len(df_test_0))

number of 1s in test:  7766
number of 0s in test:  65963


In [None]:
import torch
import torch.nn as nn


import torch.optim as optim
import tqdm
import time
import numpy as np


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def loss_function(recon_x, x, mu, log_var):
    x_data = x[:, window_size-1, :]
    reconstruction_loss = nn.functional.mse_loss(recon_x, x_data.view(-1, input_dim), reduction='sum')

    kl_divergence = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
    return reconstruction_loss + 0.002 * kl_divergence


class AttentionLayer(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(AttentionLayer, self).__init__()
        self.attention = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.Tanh(),
            nn.Linear(hidden_dim, 1),
            nn.Softmax(dim=1)
        )

    def forward(self, inputs):
        attention_weights = self.attention(inputs)
        weighted_input = inputs * attention_weights
        return weighted_input, attention_weights

class MSEFeedbackRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(MSEFeedbackRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        batch_size = x.size(0)
        x = x.unsqueeze(1)
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = self.fc(out)  # Apply the fully connected layer to each time step
        return out

# CompositeModel: AE-FAR or VAE-FAR according to the autoencoder type
class CompositeModel(nn.Module):
    def __init__(self, autoencoder, rnn, attention_dim=64):
        super(CompositeModel, self).__init__()
        self.autoencoder = autoencoder
        self.rnn = rnn
        self.attention = AttentionLayer(input_dim= input_dim+1, hidden_dim=attention_dim)

    def forward(self, x, y):
        reconstructed, mean, log = self.autoencoder(x) # For VAE
        # reconstructed = self.autoencoder(x) #For AE
        mse_error = ((y - reconstructed) ** 2).mean(dim=1, keepdim=True)

        combined_input = torch.cat((reconstructed, mse_error), dim=1)
        combined_input, attention_weights = self.attention(combined_input)

        rnn_output = self.rnn(combined_input)
        rnn_output = rnn_output.squeeze(1)
        
        adjusted_reconstructed = reconstructed + rnn_output
        return adjusted_reconstructed, mse_error, rnn_output



if flag_train_composite:
    autoencoder = torch.load(MODEL_PATH[:-4] + ".pt", map_location=device)
    for param in autoencoder.parameters():
        param.requires_grad = False

    rnn_hidden_size = input_dim//2
    rnn_output_size = input_dim

    rnn = MSEFeedbackRNN(input_size= input_dim + 1, hidden_size=rnn_hidden_size, output_size=rnn_output_size, num_layers=8)

    # AR-FAR or VAE-FAR according to the selected AutoEncoder
    model = CompositeModel(autoencoder, rnn)

else:
    if flag_AE:
        from my_models import CustomModel
        model = CustomModel(window_size, input_dim)
    else:
        # ******* TRAIN only VAh_catE **************************
        from my_models import LSTM_VAE
        model = LSTM_VAE(window_size, input_size=input_dim, hidden_size=64, latent_size=32)


model.to(device)


X_train = torch.tensor(train_data, dtype=torch.float32).to(device)
X_valid = torch.tensor(val_data, dtype=torch.float32).to(device)

print(X_train.shape)

# loss function and optimizer
loss_fn = nn.MSELoss()  # mean square error
optimizer = optim.Adam(model.parameters(), lr=0.0001)


# training parameters
batch_size = 128  # size of each batch

# Hold the best model
best_loss_val = np.inf   # init to infinity
best_weights = None
history_train, history_val = [], []

stop_counter, early_stop = 0, 10
# training loop
for epoch in range(n_epochs):
    model.train()
    loss_train = []
    start_time = time.time()
    with tqdm.tqdm(iterate_batches(X_train, window_size, batch_size), unit="batch", mininterval=0, disable=True) as bar:
        bar.set_description(f"Epoch {epoch}")
        for X_batch, y_batch in bar:
            optimizer.zero_grad()
            
            if flag_train_composite:
                reconstructed, mse_error, rnn_output = model(X_batch, y_batch)
                loss = loss_fn(reconstructed, y_batch)
            elif flag_AE:
                 # ***** Simple AE ********
                y_pred = model(X_batch)
                loss = loss_fn(y_pred, y_batch)
            else:
            # ***** VAE ********
                y_pred, mean, log_var = model(X_batch)
                loss = loss_function(y_pred, X_batch, mean, log_var)

           

            loss_train.append(loss.item())
            # backward pass
            loss.backward()
            # update weights
            optimizer.step()
            # print progress
            bar.set_postfix(mse=float(loss))

    # evaluate accuracy at end of each epoch
    model.eval()
    loss_valid = []
    for X_batch, y_batch in iterate_batches(X_valid, window_size, batch_size):
         
        if flag_train_composite:
            reconstructed, mse_error, rnn_output = model(X_batch, y_batch)
            loss = loss_fn(reconstructed, y_batch)
        elif flag_AE:
                # ***** Simple AE ********
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
        else:
        # ***** VAE ********
            y_pred, mean, log_var = model(X_batch)
            loss = loss_function(y_pred, X_batch, mean, log_var)

        loss_valid.append(loss.item())

    loss_valid = np.mean(loss_valid)
    loss_train = np.mean(loss_train)
    history_train.append(loss_train)
    history_val.append(loss_valid)
    print("{0} - {1} - train loss: {2}  val loss: {3}".format(epoch, time.time()-start_time, loss_train, loss_valid))
    if loss_valid < best_loss_val:
        best_loss_val = loss_valid
        torch.save(model, MODEL_PATH + ".pt")
        stop_counter = 0
    else:
        stop_counter +=1

    if stop_counter > early_stop:
        print("early stopping......")
        break

# restore model with best accuracy
model = torch.load(MODEL_PATH + ".pt")

import pickle

with open(MODEL_PATH + ".history", 'wb') as f:
    pickle.dump([history_train, history_val], f)

torch.Size([58317, 55])


  autoencoder = torch.load("models/MSL/MSL-model.pt", map_location=device)


0 - 1.2466344833374023 - train loss: 0.5425175858340879  val loss: 0.49428130594686004
1 - 1.2491939067840576 - train loss: 0.5356970922166829  val loss: 0.49044155508886184
2 - 1.2135224342346191 - train loss: 0.5307539100073898  val loss: 0.4839992885475282
3 - 1.1617608070373535 - train loss: 0.5264139387431338  val loss: 0.4795556391114727
4 - 1.1401846408843994 - train loss: 0.5239369118518656  val loss: 0.4768094210021945
5 - 1.1518685817718506 - train loss: 0.521661089011902  val loss: 0.4734702498324543
6 - 1.1960818767547607 - train loss: 0.5182752672813525  val loss: 0.46840256189154766
7 - 1.184812068939209 - train loss: 0.5144706927621564  val loss: 0.46446636297284616
8 - 1.1750593185424805 - train loss: 0.5119602879305858  val loss: 0.4618076448134341
9 - 1.1142182350158691 - train loss: 0.5103117869289954  val loss: 0.4597504752966995
10 - 1.082627534866333 - train loss: 0.5090342382932329  val loss: 0.4578418268254129
11 - 1.177184820175171 - train loss: 0.5079460163862

  model = torch.load(MODEL_NAME + ".pt")


In [None]:
import torch
import numpy as np

X_test = torch.tensor(test_data, dtype=torch.float32).to("cuda")

predictions = None
for batch, y_batch in iterate_batches(X_test, window_size, batch_size):
    
    if flag_train_composite:
        y_pred, mse_error, rnn_output = model(batch, y_batch)
    elif flag_AE:
        y_pred = model(batch)
    else:
        y_pred, mean, log_var = model(batch)    
    
    y_pred = y_pred.cpu().detach().numpy()

    if predictions is None:
        predictions = y_pred
    else:
        predictions = np.concatenate((predictions, y_pred), axis=0)

print("X_test ", len(X_test))
print("predictions ", len(predictions))

test_data_tmp = test_data[window_size:]
true_labels = test_labels[window_size:]

print("test_data_tmp ", len(test_data_tmp))
print("true_labels ", len(true_labels))

mse = np.mean(np.power(test_data_tmp - predictions, 2), axis=1)

X_test  73729
predictions  73679


In [None]:
if "SWAT" in MODEL_PATH:
    th_factor = 5.0
elif "SMD" in MODEL_PATH:
    th_factor = 5.0
elif "MSL" in MODEL_PATH:
    th_factor = 6.5
elif "PSM" in MODEL_PATH:
    th_factor = 4.0
elif "SMAP" in MODEL_PATH:
    th_factor = 6.5

print("dataset: ", dataset)
print("model: ", MODEL_PATH)
print("threshold: ", th_factor)


pred_y, dynamic_threshold = sliding_window_anomaly_detection(mse, window_size, threshold_factor=th_factor)
gt, pred_adjusted = apply_adjustment(true_labels, pred_y)
print("adjusted with sliding: ", get_precision_recall_f1(gt, pred_adjusted))