In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import precision_recall_fscore_support

from utils import read_data, iterate_batches, apply_adjustment, sliding_window_anomaly_detection, get_precision_recall_f1

window_size = 50  # size of the window
dataset_list = ["MSL", "PSM", "SMAP", "SMD", "SWAT"]
dataset = dataset_list[2]

flag_train_composite = True
flag_AE = True

if flag_train_composite:
    if flag_AE:
        MODEL_PATH = "models/" + dataset + "/" + dataset + "-AE-FAR"
    else:
        MODEL_PATH = "models/" + dataset + "/" + dataset + "-VAE-FAR"
else:
    if flag_AE:
        MODEL_PATH = "models/" + dataset + "/" + dataset + "-AE"
    else:
        MODEL_PATH = "models/" + dataset + "/" + dataset + "-VAE"


train_data, test_data, val_data, test_labels = read_data(dataset)
input_dim = train_data.shape[1]

print("# of train: ", train_data.shape)
print("# of test: ", test_data.shape)
print("# of labels: ", test_labels.shape)

# of train:  708405
# of test:  708420
number of 1s in test:  29444
number of 0s in test:  678976


In [None]:
import torch
import torch.nn as nn

window_size = 50  # size of the window

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


class AttentionLayer(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(AttentionLayer, self).__init__()
        self.attention = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.Tanh(),
            nn.Linear(hidden_dim, 1),
            nn.Softmax(dim=1)
        )

    def forward(self, inputs):
        attention_weights = self.attention(inputs)
        weighted_input = inputs * attention_weights
        return weighted_input, attention_weights

class MSEFeedbackRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(MSEFeedbackRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        batch_size = x.size(0)
        x = x.unsqueeze(1)
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = self.fc(out)  # Apply the fully connected layer to each time step
        return out

# CompositeModel: AE-FAR or VAE-FAR according to the autoencoder type
class CompositeModel(nn.Module):
    def __init__(self, autoencoder, rnn, attention_dim=64):
        super(CompositeModel, self).__init__()
        self.autoencoder = autoencoder
        self.rnn = rnn
        self.attention = AttentionLayer(input_dim= input_dim+1, hidden_dim=attention_dim)

    def forward(self, x, y):
        # reconstructed, mean, log = self.autoencoder(x) # For VAE
        reconstructed = self.autoencoder(x) #For AE
        mse_error = ((y - reconstructed) ** 2).mean(dim=1, keepdim=True)

        combined_input = torch.cat((reconstructed, mse_error), dim=1)
        combined_input, attention_weights = self.attention(combined_input)

        rnn_output = self.rnn(combined_input)
        rnn_output = rnn_output.squeeze(1)
        
        adjusted_reconstructed = reconstructed + rnn_output
        return adjusted_reconstructed, mse_error, rnn_output


model = torch.load(MODEL_PATH + ".pt", map_location=device)


model.to(device)
batch_size = 128  # size of each batch


X_test = torch.tensor(test_data, dtype=torch.float32).to(device)

predictions = None
for batch, y_batch in iterate_batches(X_test, window_size, batch_size):
    
    if flag_train_composite:
        y_pred, mse_error, rnn_output = model(batch, y_batch)
    elif flag_AE:
        y_pred = model(batch)
    else:
        y_pred, mean, log_var = model(batch)    
    
    y_pred = y_pred.cpu().detach().numpy()
    if predictions is None:
        predictions = y_pred
    else:
        predictions = np.concatenate((predictions, y_pred), axis=0)

print("X_test ", len(X_test))
print("predictions ", len(predictions))

X_test  708420
predictions  708370


In [None]:
if "SWAT" in MODEL_PATH:
    th_factor = 5.0
elif "SMD" in MODEL_PATH:
    th_factor = 5.0
elif "MSL" in MODEL_PATH:
    th_factor = 6.5
elif "PSM" in MODEL_PATH:
    th_factor = 4.0
elif "SMAP" in MODEL_PATH:
    th_factor = 6.5


test_data_tmp = test_data[window_size:]
true_labels = test_labels[window_size:]

mse = np.mean(np.power(test_data_tmp - predictions, 2), axis=1)

print("dataset: ", dataset)
print("model: ", MODEL_PATH)
print("threshold: ", th_factor)


pred_y, dynamic_threshold = sliding_window_anomaly_detection(mse, window_size, threshold_factor=th_factor)
gt, pred_adjusted = apply_adjustment(true_labels, pred_y)
print("adjusted with sliding: ", get_precision_recall_f1(gt, pred_adjusted))

threshold:  0.12
adjusted:  (0.6272, 0.5201, 0.5686)
adjusted with sliding:  (0.9201, 0.882, 0.9006)
