In [1]:
#This is the class for encoder 
#This is the class for encoder 
import torch
import torch.nn as nn 


import time

t = time.localtime()
current_time = time.strftime("%H:%M:%S", t)
print(current_time)
    
class Encoder(nn.Module):
    
    def __init__(self,n_features,embedding_dim=16,device=None):
        super(Encoder,self).__init__()
        self.in_dim=n_features
        self.device=device
        
        self.embedding_dim, self.hidden_dim = embedding_dim, 2 * embedding_dim
        self.lstm1 = nn.LSTM(
                    input_size=n_features,
                    hidden_size=self.hidden_dim,
                    num_layers=1,
                    batch_first=True
                    )
        self.lstm2 = nn.LSTM(
                    input_size=self.hidden_dim,
                    hidden_size=embedding_dim,
                    num_layers=1,
                    batch_first=True
                    )
       
        
    def forward(self,input):
        
        batch_size,seq_len=input.size(0),input.size(1)
        
        #h is the hidden state at time t and c is the cell state at time t
        h_0 = torch.zeros(1, batch_size, 2 * self.embedding_dim).to(self.device)
        c_0 = torch.zeros(1, batch_size, 2 * self.embedding_dim).to(self.device)
        
        recurrent_features,(h_1,c_1) = self.lstm1(input,(h_0,c_0))
        recurrent_features,_ = self.lstm2(recurrent_features)
        
        outputs=recurrent_features.view(batch_size,seq_len,self.embedding_dim)
        
        return outputs,recurrent_features

00:41:50


In [2]:
#This is also the Generator

class Decoder(nn.Module):
    """An LSTM based generator. It expects a sequence of noise vectors as input.
    Args:
        in_dim: Input noise dimensionality
        out_dim: Output dimensionality
        n_layers: number of lstm layers
        hidden_dim: dimensionality of the hidden layer of lstms
    Input: noise of shape (batch_size, seq_len, in_dim)
    Output: sequence of shape (batch_size, seq_len, out_dim)
    """

    def __init__(self, in_dim, out_dim, device=None):
        super().__init__()
        self.out_dim = out_dim
        self.device = device

        self.lstm0 = nn.LSTM(in_dim, hidden_size=32, num_layers=1, batch_first=True)
        self.lstm1 = nn.LSTM(input_size=32, hidden_size=64, num_layers=1, batch_first=True)
        self.lstm2 = nn.LSTM(input_size=64, hidden_size=128, num_layers=1, batch_first=True)
        
        self.linear = nn.Sequential(nn.Linear(in_features=128, out_features=out_dim), nn.Tanh())

    def forward(self, input):
        batch_size, seq_len = input.size(0), input.size(1)
        h_0 = torch.zeros(1, batch_size, 32).to(self.device)
        c_0 = torch.zeros(1, batch_size, 32).to(self.device)

        recurrent_features, (h_1, c_1) = self.lstm0(input, (h_0, c_0))
        recurrent_features, (h_2, c_2) = self.lstm1(recurrent_features)
        recurrent_features, _ = self.lstm2(recurrent_features)
        
        outputs = self.linear(recurrent_features.contiguous().view(batch_size*seq_len, 128))
        outputs = outputs.view(batch_size, seq_len, self.out_dim)
        return outputs, recurrent_features
    

In [3]:
class Critic(nn.Module):

    def __init__(self,in_dim,device=None):
        super().__init__()
        self.device=device

        self.lstm=nn.LSTM(input_size=in_dim,hidden_size=100,num_layers=1,batch_first=True)
        self.linear = nn.Sequential(nn.Linear(100,1))

    def forward(self,input):
        batch_size,seq_len = input.size(0), input.size(1)
        h_0 = torch.zeros(1, batch_size, 100).to(self.device)
        c_0 = torch.zeros(1, batch_size, 100).to(self.device)

        recurrent_features, _ = self.lstm(input, (h_0, c_0))
        outputs = self.linear(recurrent_features.contiguous().view(batch_size*seq_len, 100))
        return outputs

In [4]:
import math
import pandas as pd
import numpy as np
import os
from torch.utils.data import Dataset
from sklearn.metrics import roc_auc_score
from sklearn.decomposition import PCA

def get_chan_name(chan_list_str):
    if len(chan_list_str) > 2:
        chan_list_str = chan_list_str[2:-2]
        chan_list = chan_list_str.split("', '")
        return chan_list
    else:
        return []
    
class Wadi(Dataset,):
    def __init__(self,seed: int,data_settings=None,remove_unique=False, entity=None, verbose=False, one_hot=False):
        super().__init__()
        self._data = None
        self.one_hot=one_hot
        self.verbose=verbose
        self.remove_unique=remove_unique
        self.train=data_settings.train
        self.dataset_training_name =data_settings.dataset_training_name
        self.dataset_test_name =data_settings.dataset_test_name
        self.dataset_anomaly_name =data_settings.dataset_anomaly_name
        self.window_length=data_settings.window_length
        if data_settings.train:
            self.stride=1
        else:
            self.stride=self.window_length
        
        
        
        '''
        x,y=self.load()
        self.n_feature = len(x.columns)
        x,y=self.unroll(x[:1000],y[:1000])
        self.x = torch.from_numpy(x).float()
        self.y=torch.from_numpy(np.array([1 if sum(y_i) > 0 else 0 for y_i in y])).float()
        
        self.data_len = x.shape[0]  
       '''
    def data(self) -> (pd.DataFrame, pd.Series, pd.DataFrame, pd.Series):
        """Return data, load if necessary"""
        if self._data is None:
            self.load()
        return self._data    
         
    
    def __len__(self):
        return self.data_len
    
    def standardize(self,X_train, X_test, remove=False, verbose=False, max_clip=5, min_clip=-4):

        mini = X_train.min()
        maxi = X_train.max()
        for col in X_train.columns:
            if maxi[col] != mini[col]:
                X_train[col] = (X_train[col] - mini[col]) / (maxi[col] - mini[col])
                X_test[col] = (X_test[col] - mini[col]) / (maxi[col] - mini[col])
                X_test[col] = np.clip(X_test[col], a_min=min_clip, a_max=max_clip)
            else:
                assert X_train[col].nunique() == 1
                if remove:
                    if verbose:
                        print("Column {} has the same min and max value in train. Will remove this column".format(col))
                    X_train = X_train.drop(col, axis=1)
                    X_test = X_test.drop(col, axis=1)
                else:
                    if verbose:
                        print("Column {} has the same min and max value in train. Will scale to 1".format(col))
                    if mini[col] != 0:
                        X_train[col] = X_train[col] / mini[col]  # Redundant operation, just for consistency
                        X_test[col] = X_test[col] / mini[col]
                    if verbose:
                        print("After transformation, train unique vals: {}, test unique vals: {}".format(
                        X_train[col].unique(),
                        X_test[col].unique()))
        return X_train, X_test

    def format_data(self, train_df, test_df, OUTLIER_CLASS=1, verbose=False):
        train_only_cols = set(train_df.columns).difference(set(test_df.columns))
        if verbose:
            print("Columns {} present only in the training set, removing them")
        train_df = train_df.drop(train_only_cols, axis=1)

        test_only_cols = set(test_df.columns).difference(set(train_df.columns))
        if verbose:
            print("Columns {} present only in the test set, removing them")
        test_df = test_df.drop(test_only_cols, axis=1)

        train_anomalies = train_df[train_df["y"] == OUTLIER_CLASS]
        test_anomalies: pd.DataFrame = test_df[test_df["y"] == OUTLIER_CLASS]
        print("Total Number of anomalies in train set = {}".format(len(train_anomalies)))
        print("Total Number of anomalies in test set = {}".format(len(test_anomalies)))
        print("% of anomalies in the test set = {}".format(len(test_anomalies) / len(test_df) * 100))
        print("number of anomalous events = {}".format(len(self.get_events(y_test=test_df["y"].values,outlier=1,normal=0))))
        # Remove the labels from the data
        X_train = train_df.drop(["y"], axis=1)
        y_train = train_df["y"]
        X_test = test_df.drop(["y"], axis=1)
        y_test = test_df["y"]
        self.y_test = y_test
        return X_train, y_train, X_test, y_test
    
    def __getitem__(self,idx):
        return self.x[idx],self.y[idx]  
    
    def get_events(self,y_test, outlier=1, normal=0, breaks=[]):
        events = dict()
        label_prev = normal
        event = 0  # corresponds to no event
        event_start = 0
        for tim, label in enumerate(y_test):
            if label == outlier:
                if label_prev == normal:
                    event += 1
                    event_start = tim
                elif tim in breaks:
                    # A break point was hit, end current event and start new one
                    event_end = tim - 1
                    events[event] = (event_start, event_end)
                    event += 1
                    event_start = tim

            else:
                # event_by_time_true[tim] = 0
                if label_prev == outlier:
                    event_end = tim - 1
                    events[event] = (event_start, event_end)
            label_prev = label

        if label_prev == outlier:
            event_end = tim - 1
            events[event] = (event_start, event_end)
        return events
        
    def load(self):
        OUTLIER_CLASS = 1
        df_x_train: pd.DataFrame = pd.read_csv(self.dataset_training_name, header=3)
        df_x_test: pd.DataFrame = pd.read_csv(self.dataset_test_name, header=0)
        # Removing 4 columns who only contain nans (data missing from the csv file)
        nan_columns = [r'\\WIN-25J4RO10SBF\LOG_DATA\SUTD_WADI\LOG_DATA\2_LS_001_AL',
                       r'\\WIN-25J4RO10SBF\LOG_DATA\SUTD_WADI\LOG_DATA\2_LS_002_AL',
                       r'\\WIN-25J4RO10SBF\LOG_DATA\SUTD_WADI\LOG_DATA\2_P_001_STATUS',
                       r'\\WIN-25J4RO10SBF\LOG_DATA\SUTD_WADI\LOG_DATA\2_P_002_STATUS']        
        df_x_train=df_x_train.drop(nan_columns, axis=1)
        df_x_test = df_x_test.drop(nan_columns, axis=1)
        
        df_x_train = df_x_train.rename(columns={col: col.split('\\')[-1] for col in df_x_train.columns})
        df_x_test = df_x_test.rename(columns={col: col.split('\\')[-1] for col in df_x_test.columns})
        
        
        print(df_x_train.shape)
        ano_df = pd.read_csv(self.dataset_anomaly_name, header=0)
        df_x_train["y"] = np.zeros(df_x_train.shape[0])
        df_x_test["y"] = np.zeros(df_x_test.shape[0])
        
        pd.set_option('mode.chained_assignment', None) # This is to prevent error SettingWithCopyWarning:A value is trying to be set on a copy of a slice from a DataFrame
        for i in range(ano_df.shape[0]):
            ano = ano_df.iloc[i, :][["Start_time", "End_time", "Date"]]
            start_row = np.where((df_x_test["Time"].values == ano["Start_time"]) &
                                 (df_x_test["Date"].values == ano["Date"]))[0][0]
            end_row = np.where((df_x_test["Time"].values == ano["End_time"]) &
                               (df_x_test["Date"].values == ano["Date"]))[0][0]
            df_x_test["y"].iloc[start_row:(end_row + 1)] = np.ones(1 + end_row - start_row)
            
        df_x_train = df_x_train.drop(["Time", "Date", "Row"], axis=1)
        df_x_test = df_x_test.drop(["Time", "Date", "Row"], axis=1)
     
        if self.one_hot:
            # actuator colums (categoricals) with < 2 categories (all of these have 3 categories)
            one_hot_cols = ['1_MV_001_STATUS', '1_MV_002_STATUS', '1_MV_003_STATUS', '1_MV_004_STATUS', '2_MV_003_STATUS',
                            '2_MV_006_STATUS', '2_MV_101_STATUS', '2_MV_201_STATUS', '2_MV_301_STATUS', '2_MV_401_STATUS',
                            '2_MV_501_STATUS', '2_MV_601_STATUS']

            # combining before encoding because some categories only seen in test
            one_hot_encoded = Dataset.one_hot_encoding(pd.concat([df_x_train, df_x_test], axis=0, join="inner"),
                                                       col_names=one_hot_cols)
            df_x_train = one_hot_encoded.iloc[:len(df_x_train)]
            df_x_test = one_hot_encoded.iloc[len(df_x_train):]
        
        
        X_train, y_train, X_test, y_test = self.format_data(df_x_train, df_x_test, OUTLIER_CLASS, verbose=self.verbose)
        X_train, X_test = self.standardize(X_train, X_test)
        
        
        #X_train,y_train=self.unroll(X_train,y_train)
        
        #self.train=0
        #self.stride=self.window_length
        
        #X_test,y_test=self.unroll(X_test,y_test)
        self._data = tuple([X_train, y_train, X_test, y_test])
        #return X_train,y_train,X_test,y_test
        
    def unroll(self, data, labels):
        un_data = []
        un_labels = []
        seq_len = int(self.window_length)
        stride = int(self.stride)
        
        idx = 0
        while(idx < len(data) - seq_len):
            
            if self.train==1 and labels.loc[idx]==1.0:
              idx += stride
              continue
            un_data.append(data.iloc[idx:idx+seq_len].values)
            un_labels.append(labels.iloc[idx:idx+seq_len].values)
            idx += stride
        return np.array(un_data), np.array(un_labels)
    
    def get_root_causes(self):
        return self.causes
    
class Datasettings:
    
    def __init__(self):
        self.dataset_training_name="/home/jupyter/data/wadi/WADI_14days.csv"
        self.dataset_test_name="/home/jupyter/data/wadi/WADI_attackdata.csv"
        self.dataset_anomaly_name="/home/jupyter/data/wadi/WADI_anomalies.csv"
        self.train=True
        self.window_length=60

In [5]:
seed = 0
data_settings = Datasettings()
wadi = Wadi(seed,data_settings)

In [6]:
x_train, y_train, x_test, y_test = wadi.data()

(1209601, 126)
Total Number of anomalies in train set = 0
Total Number of anomalies in test set = 9948
% of anomalies in the test set = 5.7569111289865225
number of anomalous events = 14


In [7]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer = imputer.fit(x_train)
x_train = imputer.transform(x_train)

In [8]:
pca = PCA(n_components=3, svd_solver='full')
pca.fit(x_train)
x_train=pca.transform(x_train)

In [9]:
pca = PCA(n_components=3, svd_solver='full')
pca.fit(x_test)
x_test=pca.transform(x_test)


In [10]:
import torch
def get_sub_seqs(x_arr, y_arr, seq_len, stride=1, start_discont=np.array([])):
    """
    :param start_discont: the start points of each sub-part in case the x_arr is just multiple parts joined together
    :param x_arr: dim 0 is time, dim 1 is channels
    :param seq_len: size of window used to create subsequences from the data
    :param stride: number of time points the window will move between two subsequences
    :return:
    """
    excluded_starts = []
    [excluded_starts.extend(range((start - seq_len + 1), start)) for start in start_discont if start > seq_len]
    seq_starts = np.delete(np.arange(0, x_arr.shape[0] - seq_len + 1, stride), excluded_starts)
    x_seqs = np.array([x_arr[i:i + seq_len] for i in seq_starts])
    y_seqs=  np.array([y_arr[i:i + seq_len] for i in seq_starts])
    y=torch.from_numpy(np.array([1 if sum(y_i) > 0 else 0 for y_i in y_seqs])).float()
    return x_seqs,y

In [11]:
sequences,y_train_per_window = get_sub_seqs(x_train,y_train, seq_len=60, stride=1,
                                 start_discont=np.array([]))

In [12]:
from typing import List, Union
from torch.utils.data import DataLoader
def get_train_data_loaders(x_seqs: np.ndarray, batch_size: int, splits: List, seed: int, shuffle: bool = False,
    usetorch = True):
    """
    Splits the train data between train, val, etc. Creates and returns pytorch data loaders
    :param shuffle: boolean that determines whether samples are shuffled before splitting the data
    :param seed: seed used for the random shuffling (if shuffling there is)
    :param x_seqs: input data where each row is a sample (a sequence) and each column is a channel
    :param batch_size: number of samples per batch
    :param splits: list of split fractions, should sum up to 1.
    :param usetorch: if True returns dataloaders, otherwise return datasets
    :return: a tuple of data loaders as long as splits. If len_splits = 1, only 1 data loader is returned
    """
    if np.sum(splits) != 1:
        scale_factor = np.sum(splits)
        splits = [fraction/scale_factor for fraction in splits]
    if shuffle:
        np.random.seed(seed)
        x_seqs = x_seqs[np.random.permutation(len(x_seqs))]
        np.random.seed()
    split_points = [0]
    for i in range(len(splits)-1):
        split_points.append(split_points[-1] + int(splits[i]*len(x_seqs)))
    split_points.append(len(x_seqs))
    if usetorch:
        loaders = tuple([DataLoader(dataset=x_seqs[split_points[i]:split_points[i+1]], batch_size=batch_size,
            drop_last=False, pin_memory=True, shuffle=False) for i in range(len(splits))])
        return loaders
    else:
        # datasets = tuple([x_seqs[split_points[i]: 
        #     (split_points[i] + (split_points[i+1]-split_points[i])//batch_size*batch_size)] 
        #     for i in range(len(splits))])
        datasets = tuple([x_seqs[split_points[i]:split_points[i+1]]
            for i in range(len(splits))])
        return datasets

In [13]:
train_loader, train_val_loader = get_train_data_loaders(sequences, batch_size=32,
                                                                splits=[1 - 0.5,
                                                                        0.5], seed=0)

In [14]:
import os
import random
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision
import torch.nn.init as init
from torch.autograd import Variable
import datetime

In [15]:
class ArgsTrn:
    workers=4
    batch_size=32
    epochs=5
    lr=0.0002
    cuda = True
    manualSeed=2
    
opt_trn=ArgsTrn()

In [16]:
print("done")

done


In [17]:
import torch
import sys
print('__Python VERSION:', sys.version)
print('__pyTorch VERSION:', torch.__version__)
print('__CUDA VERSION')
from subprocess import call
# call(["nvcc", "--version"]) does not work
! nvcc --version
print('__CUDNN VERSION:', torch.backends.cudnn.version())
print('__Number CUDA Devices:', torch.cuda.device_count())
print('__Devices')
call(["nvidia-smi", "--format=csv", "--query-gpu=index,name,driver_version,memory.total,memory.used,memory.free"])
print('Active CUDA Device: GPU', torch.cuda.current_device())
print ('Available devices ', torch.cuda.device_count())
print ('Current cuda device ', torch.cuda.current_device())

__Python VERSION: 3.8.10 | packaged by conda-forge | (default, May 11 2021, 07:01:05) 
[GCC 9.3.0]
__pyTorch VERSION: 1.10.0a0+3fd9dcf
__CUDA VERSION
/bin/bash: /libraries/Default_env_55311/lib/libtinfo.so.6: no version information available (required by /bin/bash)
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2021 NVIDIA Corporation
Built on Wed_Jul_14_19:41:19_PDT_2021
Cuda compilation tools, release 11.4, V11.4.100
Build cuda_11.4.r11.4/compiler.30188945_0
__CUDNN VERSION: 8202
__Number CUDA Devices: 2
__Devices
index, name, driver_version, memory.total [MiB], memory.used [MiB], memory.free [MiB]
0, Tesla T4, 450.172.01, 15109 MiB, 1252 MiB, 13857 MiB
1, Tesla T4, 450.172.01, 15109 MiB, 1242 MiB, 13867 MiB
Active CUDA Device: GPU 0
Available devices  2
Current cuda device  0


In [18]:
import torch.optim as optim
embedding_dim=16
#device = torch.device("cuda:0" if opt_trn.cuda else "cpu") # select the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
seq_len = wadi.window_length # sequence length is equal to the window length
in_dim = 3 # input dimension is same as number of feature
n_features=3
criticX = Critic(in_dim=in_dim,device=device)
criticX=nn.DataParallel(criticX)
criticX=criticX.to(device)
optimizerCriticX = optim.Adam(criticX.parameters() , lr=opt_trn.lr)
decoder=Decoder(in_dim=in_dim, out_dim=in_dim,device=device)
decoder=nn.DataParallel(decoder)
decoder=decoder.to(device)
optimizerDecoder = optim.Adam(decoder.parameters(), lr=opt_trn.lr)

cuda


In [19]:
print("|Discriminator Architecture|\n", criticX)
print("|Generator Architecture|\n", decoder)

|Discriminator Architecture|
 DataParallel(
  (module): Critic(
    (lstm): LSTM(3, 100, batch_first=True)
    (linear): Sequential(
      (0): Linear(in_features=100, out_features=1, bias=True)
    )
  )
)
|Generator Architecture|
 DataParallel(
  (module): Decoder(
    (lstm0): LSTM(3, 32, batch_first=True)
    (lstm1): LSTM(32, 64, batch_first=True)
    (lstm2): LSTM(64, 128, batch_first=True)
    (linear): Sequential(
      (0): Linear(in_features=128, out_features=3, bias=True)
      (1): Tanh()
    )
  )
)


In [20]:
import torch.nn as nn
import torch.nn.functional as F
class Wasserstein(nn.Module):
    def forward(self, pred_real, pred_fake=None):
        if pred_fake is not None:
            loss_real = pred_real.mean()
            loss_fake = pred_fake.mean()
            loss = -loss_real + loss_fake
            return loss, loss_real, loss_fake
        else:
            loss = -pred_real.mean()
            return loss

class HingeLoss(nn.Module):
    def forward(self, pred_real, pred_fake=None):
        if pred_fake is not None:
            loss_real = F.relu(1 - pred_real).mean()
            loss_fake = F.relu(1 + pred_fake).mean()
            loss = loss_real + loss_fake
            return loss, loss_real, loss_fake
        else:
            loss = -pred_real.mean()
            return loss

In [None]:
import torch.autograd as autograd


def normalize_gradient(net_D, x, **kwargs):
    """
                     f
    f_hat = --------------------
            || grad_f || + | f |
    """
    x.requires_grad_(True)
    f = net_D(x, **kwargs)
    grad = torch.autograd.grad(
        f, [x], torch.ones_like(f), create_graph=True, retain_graph=True)[0]
    grad_norm = torch.norm(torch.flatten(grad, start_dim=2), p=2, dim=2)
    grad_norm = grad_norm.view(-1, *[1 for _ in range(len(f.shape) - 1)])
    
    f_hat = (f / (grad_norm + torch.abs(f)))
    return f_hat

history = dict(train=[], val=[])

loss_fn=Wasserstein()
for epoch in range(opt_trn.epochs):
    train_losses = []
    for x in train_loader:
        temp_train_losses = []
        batch_size, seq_len =x.shape[0],x.shape[1] 
        x=x.float().to(device)
        noise = Variable(init.normal(torch.Tensor(batch_size,seq_len,in_dim),mean=0,std=0.1))
        noise=noise.to(device)
        optimizerCriticX.zero_grad()
        for k in range(5):
            with torch.no_grad():
                outputOfDecoder,_ = decoder.forward(noise)
                #outputOfDecoder=outputOfDecoder.detach()
                outputOfDecoder=outputOfDecoder.to(device)
            real_fake = torch.cat([x, outputOfDecoder], dim=0)
            with torch.backends.cudnn.flags(enabled=False):
                pred = normalize_gradient(criticX, real_fake)
                pred_real, pred_fake = torch.split(pred, [x.shape[0]*x.shape[1], outputOfDecoder.shape[0]*outputOfDecoder.shape[1]])
                loss, loss_real, loss_fake = loss_fn(pred_real, pred_fake)
                loss.backward()    
                optimizerCriticX.step()
            temp_train_losses.append(loss.item())
        train_losses.append(np.mean(temp_train_losses))
    train_loss = np.mean(train_losses)
    history['train'].append(train_loss)    
    print(f'Epoch {epoch}: train loss {train_loss}')

  noise = Variable(init.normal(torch.Tensor(batch_size,seq_len,in_dim),mean=0,std=0.1))


Epoch 0: train loss -1.9232605569307448
Epoch 1: train loss -1.9983987408062767
Epoch 2: train loss -1.9993890828152812
Epoch 3: train loss -1.9996577859770055


In [None]:
criticX.save_weights('/home/jupyter/src/saved_models/wadi/criticZ')

In [None]:
print('asd')

In [None]:
criticZ = Critic(in_dim=in_dim,device=device)
criticZ=nn.DataParallel(criticZ)
criticZ=criticZ.to(device)
optimizerCriticZ = optim.Adam(criticZ.parameters() , lr=opt_trn.lr)
encoder=Encoder(n_features=in_dim, embedding_dim=in_dim,device=device)
encoder=nn.DataParallel(encoder)
encoder=encoder.to(device)
optimizerEncoder = optim.Adam(encoder.parameters(), lr=opt_trn.lr)

In [None]:
history = dict(train=[], val=[])

for epoch in range(opt_trn.epochs):
    train_losses = []
    for x in train_loader:
        temp_train_losses = []
        batch_size, seq_len =x.shape[0],x.shape[1] 
        x=x.float().to(device)
        noise = Variable(init.normal(torch.Tensor(batch_size,seq_len,in_dim),mean=0,std=0.1))
        noise=noise.to(device)
        optimizerCriticZ.zero_grad()
        for _ in range(5):
            with torch.no_grad():
                outputOfEncoder,_ = encoder.forward(x)
                outputOfEncoder=outputOfEncoder.detach()
                outputOfEncoder=outputOfEncoder.to(device)
            real_fake = torch.cat([noise, outputOfEncoder], dim=0)
            with torch.backends.cudnn.flags(enabled=False):
                pred = normalize_gradient(criticZ, real_fake)
                pred_real, pred_fake = torch.split(pred, [x.shape[0]*x.shape[1], outputOfEncoder.shape[0]*outputOfEncoder.shape[1]])
                loss, loss_real, loss_fake = loss_fn(pred_real, pred_fake)
                loss.backward()    
                optimizerCriticZ.step()
            temp_train_losses.append(loss.item())
        train_losses.append(np.mean(temp_train_losses))
    train_loss = np.mean(train_losses)
    history['train'].append(train_loss)    
    print(f'Epoch {epoch}: train loss {train_loss}')

In [None]:
criticZ.save_weights('/home/jupyter/src/saved_models/wadi/criticZ')

In [None]:
mse_loss = torch.nn.MSELoss()


In [None]:
def consistency_normalize_gradient(net_D, x, **kwargs):
    """
                     f
    f_hat = --------------------
            || grad_f || + | f |
    """
    x.requires_grad_(True)
    f = net_D(x, **kwargs)
    grad = torch.autograd.grad(
        f, [x], torch.ones_like(f), create_graph=True, retain_graph=True)[0]
    grad_norm = torch.norm(torch.flatten(grad, start_dim=2), p=2, dim=2)
    grad_norm = grad_norm.view(-1, *[1 for _ in range(len(f.shape) - 1)])
    
    f_hat = (f / (grad_norm + torch.abs(f)))
    return f_hat

history = dict(train=[], val=[])
for epoch in range(opt_trn.epochs):
#for epoch in range(2):
    train_losses = []
    for x in train_loader:
        batch_size, seq_len =x.shape[0],x.shape[1]
        x=x.float().to(device)
        optimizerDecoder.zero_grad()
        noise = Variable(init.normal(torch.Tensor(batch_size,seq_len,in_dim),mean=0,std=0.1))
        noise=noise.to(device)
        outputOfDecoder,_ = decoder.forward(noise)
        outputOfDecoder=outputOfDecoder.detach()
        outputOfDecoder=outputOfDecoder.to(device)
        with torch.backends.cudnn.flags(enabled=False):
            pred1 = normalize_gradient(criticX, outputOfDecoder)
            enc_z,_=encoder.forward(x)
            dec_x,_=decoder.forward(enc_z)

        mse1=mse_loss(x,dec_x)
        loss1=loss_fn(pred1)+mse1
        optimizerEncoder.zero_grad()
        
        outputOfEncoder,_ = encoder.forward(x)
        outputOfEncoder=outputOfEncoder.detach()
        outputOfEncoder=outputOfEncoder.to(device)
        with torch.backends.cudnn.flags(enabled=False):
            pred2 = normalize_gradient(criticZ, outputOfEncoder)
            dec_x,_=decoder.forward(noise)
            enc_z,_=encoder.forward(dec_x)
        
        mse2=mse_loss(noise,enc_z)
        loss2=loss_fn(pred2)+mse2
        err=loss1+loss2
        err.backward()
        optimizerDecoder.step()
        optimizerEncoder.step()
        
        train_losses.append(err.item())
    train_loss = np.mean(train_losses)
    history['train'].append(train_loss)
    print(f'Epoch {epoch}: train loss {train_loss}')

In [None]:
encoder.save_weights('/home/jupyter/src/saved_models/wadi/encoder')
decoder.save_weights('/home/jupyter/src/saved_models/wadi/decoder')

In [None]:
sequences,y_test_per_window = get_sub_seqs(x_test,y_test, seq_len=60, stride=60, start_discont=np.array([]))
test_loader = DataLoader(dataset=sequences, batch_size=1,  num_workers=4,
                                 shuffle=False)

In [None]:
from src.utils.tf_dtw import SoftDTW
criterion_dtw = SoftDTW(gamma=1.0, normalize=True) # just like nn.MSELoss()

losses = []
criterion =  nn.L1Loss(reduction='sum')
for x in test_loader:
    x=x.float().to(device)
    enc_z,_=encoder.forward(x)
    enc_z=enc_z.to(device)
    dec_x,_=decoder.forward(enc_z)
    dec_x=dec_x.to(device)
    err1=criterion(x,dec_x)
    
    distance1=criterion_dtw(x, dec_x)
    pred1=criticX.forward(x)
    criticx_loss=loss_fn(pred1)
    
    enc_x,_=decoder.forward(enc_z)
    enc_x=enc_x.to(device)
    dec_enc_z,_=encoder.forward(enc_x)
    dec_enc_z=dec_enc_z.to(device)
    
    pred2=criticZ.forward(enc_z)
    criticz_loss=loss_fn(pred2)
    
    err2=criterion(enc_z,dec_enc_z)
    distance2=criterion_dtw(enc_z, dec_enc_z)
    err=err1+err2+criticz_loss+criticx_loss
    losses.append(err.item())

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
plt.figure(figsize=(16,9), dpi=80)
plt.title('Loss Distribution', fontsize=16)
sns.histplot(losses, bins = 20, kde= False, color = 'blue');
#sns.distplot(losses, bins = 20, kde= True, color = 'blue');

In [None]:
THRESHOLD =6.59

test_score_df = pd.DataFrame(index=range(len(losses)))
test_score_df['loss'] = [loss/60 for loss in losses]
test_score_df['y'] = y_test_per_window
test_score_df['threshold'] = THRESHOLD
test_score_df['anomaly'] = test_score_df.loss > test_score_df.threshold
#test_score_df['t'] = [x[59].item() for x in sequences]

plt.plot( test_score_df.loss, label='loss')
plt.plot( test_score_df.threshold, label='threshold')
plt.plot( test_score_df.y, label='y')
plt.xticks(rotation=25)
plt.legend();

In [None]:
'''
import seaborn as sns

anomalies = test_score_df[test_score_df.anomaly == True]

plt.plot(
  test_score_df['t'], 
  label='value'
);

sns.scatterplot(
 
  anomalies.t,
  color=sns.color_palette()[3],
  s=52,
  label='anomaly'
)

plt.plot(
  test_score_df['y'],
  label='y'
)

plt.xticks(rotation=25)
plt.legend();
'''

In [None]:
import numpy as np
start_end = []
state = 0
for idx in test_score_df.index:
    if state==0 and test_score_df.loc[idx, 'y']==1:
        state=1
        start = idx
    if state==1 and test_score_df.loc[idx, 'y']==0:
        state = 0
        end = idx
        start_end.append((start, end))

for s_e in start_end:
    if sum(test_score_df[s_e[0]:s_e[1]+1]['anomaly'])>0:
        for i in range(s_e[0], s_e[1]+1):
            test_score_df.loc[i, 'anomaly'] = 1
            
actual = np.array(test_score_df['y'])
predicted = np.array([int(a) for a in test_score_df['anomaly']])

In [None]:
import numpy as np
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import roc_curve, auc, roc_auc_score

predicted = np.array(predicted)
actual = np.array(actual)

tp = np.count_nonzero(predicted * actual)
tn = np.count_nonzero((predicted - 1) * (actual - 1))
fp = np.count_nonzero(predicted * (actual - 1))
fn = np.count_nonzero((predicted - 1) * actual)

print('True Positive\t', tp)
print('True Negative\t', tn)
print('False Positive\t', fp)
print('False Negative\t', fn)

accuracy = (tp + tn) / (tp + fp + fn + tn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)
fmeasure = (2 * precision * recall) / (precision + recall)
cohen_kappa_score = cohen_kappa_score(predicted, actual)
false_positive_rate, true_positive_rate, thresholds = roc_curve(actual, predicted)
auc_val = auc(false_positive_rate, true_positive_rate)
roc_auc_val = roc_auc_score(actual, predicted)

print('Accuracy\t', accuracy)
print('Precision\t', precision)
print('Recall\t', recall)
print('f-measure\t', fmeasure)
print('cohen_kappa_score\t', cohen_kappa_score)
print('auc\t', auc_val)
print('roc_auc\t', roc_auc_val)

In [None]:
t = time.localtime()
current_time = time.strftime("%H:%M:%S", t)
print(current_time)