# Model Define

In [10]:

#%%
import torch.nn as nn
import torch.nn.functional as F
from models.NF import MAF, RealNVP
import torch

class GNN(nn.Module):
    """
    The GNN module applied in GANF
    """
    def __init__(self, input_size, hidden_size):

        super(GNN, self).__init__()
        self.lin_n = nn.Linear(input_size, hidden_size)
        self.lin_r = nn.Linear(input_size, hidden_size, bias=False)
        self.lin_2 = nn.Linear(hidden_size, hidden_size)

    def forward(self, h, A):
        ## A: K X K
        ## H: N X K  X L X D

        h_n = self.lin_n(torch.einsum('nkld,kj->njld',h,A))
        h_r = self.lin_r(h[:,:,:-1])
        h_n[:,:,1:] += h_r
        h = self.lin_2(F.relu(h_n))

        return h


class GANF(nn.Module):

    def __init__ (self, n_blocks, input_size, hidden_size, n_hidden ,dropout = 0.1, model="MAF", batch_norm=True):
        super(GANF, self).__init__()

        self.rnn = nn.LSTM(input_size=input_size,hidden_size=hidden_size,batch_first=True, dropout=dropout)
        self.gcn = GNN(input_size=hidden_size, hidden_size=hidden_size)
        if model=="MAF":
            self.nf = MAF(n_blocks, input_size, hidden_size, n_hidden, cond_label_size=hidden_size, batch_norm=batch_norm,activation='tanh')
        else:
            self.nf = RealNVP(n_blocks, input_size, hidden_size, n_hidden, cond_label_size=hidden_size, batch_norm=batch_norm)

    def forward(self, x, A):

        return self.test(x, A).mean()

    def test(self, x, A):
        # x: N X K X L X D: example: [512, 44, 60, 1]
        full_shape = x.shape
        print(f"> x Input: {x.shape}")

        # reshape: N*K, L, D
        x = x.reshape((x.shape[0]*x.shape[1], x.shape[2], x.shape[3]))
        h,_ = self.rnn(x)
        print(f'''> h,_ = self.rnn(x)
        input x reshaped: x = x.reshape((x.shape[0]*x.shape[1], x.shape[2], x.shape[3]))
        x.shape: {x.shape}
        h.shape: {h.shape}
        ''')

        # resahpe: N, K, L, H
        h = h.reshape((full_shape[0], full_shape[1], h.shape[1], h.shape[2]))
        print(f'''> h = h.reshape((full_shape[0], full_shape[1], h.shape[1], h.shape[2]))
        h.shape:{h.shape}
        ''')


        h = self.gcn(h, A)
        print(f'''> h = self.gcn(h, A)
        input h:{h.shape}, A {A.shape}
        output h{h.shape}
        ''')

        # reshappe N*K*L,H
        h = h.reshape((-1,h.shape[3]))
        x = x.reshape((-1,full_shape[3]))

        print(f'''
        > h = h.reshape((-1,h.shape[3]))
        > x = x.reshape((-1,full_shape[3]))
        h.shape: {h.shape}
        x.shape: {x.shape}
        ''')

        tmp_rst = self.nf.log_prob(x,h)
        log_prob = tmp_rst.reshape([full_shape[0],-1])#*full_shape[1]*full_shape[2]
        print(f'''
        > log_prob = self.nf.log_prob(x,h).reshape([full_shape[0],-1])
            self.nf.log_prob(x,h) = {tmp_rst.shape}
          log_prob = {log_prob.shape}
        ''')
        
        log_prob = log_prob.mean(dim=1)
        print(f'''
        > log_prob = log_prob.mean(dim=1)
        log_prob = {log_prob.shape}
        ''')

        return log_prob
    
    def locate(self, x, A):
        # x: N X K X L X D 
        full_shape = x.shape

        # reshape: N*K, L, D
        x = x.reshape((x.shape[0]*x.shape[1], x.shape[2], x.shape[3]))
        h,_ = self.rnn(x)

        # resahpe: N, K, L, H
        h = h.reshape((full_shape[0], full_shape[1], h.shape[1], h.shape[2]))


        h = self.gcn(h, A)

        # reshappe N*K*L,H
        h = h.reshape((-1,h.shape[3]))
        x = x.reshape((-1,full_shape[3]))

        log_prob = self.nf.log_prob(x,h).reshape([full_shape[0],full_shape[1],-1])#*full_shape[1]*full_shape[2]
        log_prob = log_prob.mean(dim=2)

        return log_prob


# test GANF

In [11]:
# GANF n_blocks:  1 
# GANF input_size:  1 
# GANF hidden_size:  32 
# GANF n_hidden:  1


ganf =  GANF(1, 1, 32, 1, dropout=0.0, batch_norm=False) # default batch_norma = False

In [12]:
print(ganf)

GANF(
  (rnn): LSTM(1, 32, batch_first=True)
  (gcn): GNN(
    (lin_n): Linear(in_features=32, out_features=32, bias=True)
    (lin_r): Linear(in_features=32, out_features=32, bias=False)
    (lin_2): Linear(in_features=32, out_features=32, bias=True)
  )
  (nf): MAF(
    (net): FlowSequential(
      (0): MADE(
        (net_input): MaskedLinear(in_features=1, out_features=32, bias=True, cond_features=32)
        (net): Sequential(
          (0): Tanh()
          (1): MaskedLinear(in_features=32, out_features=32, bias=True)
          (2): Tanh()
          (3): MaskedLinear(in_features=32, out_features=2, bias=True)
        )
      )
    )
  )
)


In [13]:
x = torch.zeros([512, 44, 60, 1])
A = torch.zeros([44,44])

# print(x, A)
loss = ganf(x,  A)

> x Input: torch.Size([512, 44, 60, 1])
> h,_ = self.rnn(x)
        input x reshaped: x = x.reshape((x.shape[0]*x.shape[1], x.shape[2], x.shape[3]))
        x.shape: torch.Size([22528, 60, 1])
        h.shape: torch.Size([22528, 60, 32])
        
> h = h.reshape((full_shape[0], full_shape[1], h.shape[1], h.shape[2]))
        h.shape:torch.Size([512, 44, 60, 32])
        
> h = self.gcn(h, A)
        input h:torch.Size([512, 44, 60, 32]), A torch.Size([44, 44])
        output htorch.Size([512, 44, 60, 32])
        

        > h = h.reshape((-1,h.shape[3]))
        > x = x.reshape((-1,full_shape[3]))
        h.shape: torch.Size([1351680, 32])
        x.shape: torch.Size([1351680, 1])
        

        > log_prob = self.nf.log_prob(x,h).reshape([full_shape[0],-1])
            self.nf.log_prob(x,h) = torch.Size([1351680])
          log_prob = torch.Size([512, 2640])
        

        > log_prob = log_prob.mean(dim=1)
        log_prob = torch.Size([512])
        


In [14]:
loss

tensor(-0.9350, grad_fn=<MeanBackward0>)

In [20]:
from dataset import load_water

train_loader, val_loader, test_loader, n_sensor = load_water('/root/zengzihui/ISST/GANF/data/SWaT_Dataset_Attack_v0.csv', 512)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.label[data.label!="Normal"]=1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.label[data.label=="Normal"]=0


In [26]:
from sklearn.metrics import roc_auc_score
import numpy as np

# roc_val = roc_auc_score(np.asarray(val_loader.dataset.label.values,dtype=int),loss_val)
# roc_test = roc_auc_score(np.asarray(test_loader.dataset.label.values,dtype=int),loss_test)

tmp = np.asarray(val_loader.dataset.label.values,dtype=int)
# len(val_loader.dataset.label.to_list())

In [29]:
tmp[tmp == 1].shape

(225,)