In [None]:
from utils.tools import dotdict
from exp.exp_informer import Exp_Informer
import torch
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [None]:
# here is the detailed code of function predict

def predict(exp, setting, load=False):
    pred_data, pred_loader = exp._get_data(flag='test')
        
    if load:
        path = os.path.join(exp.args.checkpoints, setting)
        best_model_path = path+'/'+'checkpoint.pth'
        exp.model.load_state_dict(torch.load(best_model_path))

    exp.model.eval()
        
    preds = []
        
    for i, (batch_x,batch_y,batch_x_mark,batch_y_mark) in enumerate(pred_loader):
        batch_x = batch_x.float().to(exp.device)
        batch_y = batch_y.float()
        batch_x_mark = batch_x_mark.float().to(exp.device)
        batch_y_mark = batch_y_mark.float().to(exp.device)

        # decoder input
        if exp.args.padding==0:
            dec_inp = torch.zeros([batch_y.shape[0], exp.args.pred_len, batch_y.shape[-1]]).float()
        elif exp.args.padding==1:
            dec_inp = torch.ones([batch_y.shape[0], exp.args.pred_len, batch_y.shape[-1]]).float()
        else:
            dec_inp = torch.zeros([batch_y.shape[0], exp.args.pred_len, batch_y.shape[-1]]).float()
        dec_inp = torch.cat([batch_y[:,:exp.args.label_len,:], dec_inp], dim=1).float().to(exp.device)
        # encoder - decoder
        if exp.args.use_amp:
            with torch.cuda.amp.autocast():
                if exp.args.output_attention:
                    outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                else:
                    outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        else:
            if exp.args.output_attention:
                outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
            else:
                outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        f_dim = -1 if exp.args.features=='MS' else 0
        batch_y = batch_y[:,-exp.args.pred_len:,f_dim:].to(exp.device)
        
        pred = outputs.detach().cpu().numpy()#.squeeze()
        
        preds.append(pred)

    preds = np.array(preds)
    preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
    
    # result save
    folder_path = './results/' + setting +'/'
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    
    np.save(folder_path+'real_prediction.npy', preds)
    
    return preds


In [None]:
args = dotdict()

args.model = 'informer' # model of experiment, options: [informer, informerstack, informerlight(TBD)]

args.data = 'custom' # data
args.root_path = './Dataset/SWAT/' # root path of data file
args.data_path = 'Normal.csv' # data file
args.features = 'M' # forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate
args.target = 'OT' # target feature in S or MS task
args.freq = 's' # freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h
args.checkpoints = './checkpoints' # location of model checkpoints

# args.data = 'ETTh1' # data
# args.root_path = './ETDataset/ETT-small/' # root path of data file
# args.data_path = 'ETTh1.csv' # data file
# args.features = 'M' # forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate
# args.target = 'OT' # target feature in S or MS task
# args.freq = 's' # freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h
# args.checkpoints = './informer_checkpoints' # location of model checkpoints

args.seq_len = 100 # input sequence length of Informer encoder
args.label_len = 50 # start token length of Informer decoder
args.pred_len = 1 # prediction sequence length
# Informer decoder input: concat[start token series(label_len), zero padding series(pred_len)]

args.enc_in = 7 # encoder input size
args.dec_in = 7 # decoder input size
args.c_out = 7 # output size
args.factor = 5 # probsparse attn factor
args.d_model = 512 # dimension of model
args.n_heads = 8 # num of heads
args.e_layers = 2 # num of encoder layers
args.d_layers = 1 # num of decoder layers
args.d_ff = 2048 # dimension of fcn in model
args.dropout = 0.05 # dropout
args.attn = 'prob' # attention used in encoder, options:[prob, full]
args.embed = 'timeF' # time features encoding, options:[timeF, fixed, learned]
args.activation = 'gelu' # activation
args.distil = True # whether to use distilling in encoder
args.output_attention = False # whether to output attention in ecoder
args.mix = True
args.padding = 0
args.freq = 's'

args.batch_size = 128
args.learning_rate = 0.0001
args.loss = 'mse'
args.lradj = 'type1'
args.use_amp = False # whether to use automatic mixed precision training

args.num_workers = 0
args.itr = 1
args.train_epochs = 1
args.patience = 3
args.des = 'exp'

args.use_gpu = True if torch.cuda.is_available() else False
args.gpu = 0

args.use_multi_gpu = False
args.devices = '0,1'


In [None]:
torch.cuda.is_available()

In [None]:
args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False

if args.use_gpu and args.use_multi_gpu:
    args.devices = args.devices.replace(' ','')
    device_ids = args.devices.split(',')
    args.device_ids = [int(id_) for id_ in device_ids]
    args.gpu = args.device_ids[0]

In [None]:
data_parser = {
    'custom':{'data':'Normal.csv','T':'FIT101','M':[37,37,37]},
}
if args.data in data_parser.keys():
    data_info = data_parser[args.data]
    args.data_path = data_info['data']
    args.target = data_info['T']
    args.enc_in, args.dec_in, args.c_out = data_info[args.features]

In [None]:
data_info

In [None]:
args.features

In [None]:
args.detail_freq = args.freq
args.freq = args.freq[-1:]

In [None]:
Exp = Exp_Informer

In [None]:
for ii in range(args.itr):
    # setting record of experiments
    setting = '{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_at{}_fc{}_eb{}_dt{}_mx{}_{}_{}'.format(args.model, args.data, args.features, 
                args.seq_len, args.label_len, args.pred_len,
                args.d_model, args.n_heads, args.e_layers, args.d_layers, args.d_ff, args.attn, args.factor, args.embed, args.distil, args.mix, args.des, ii)

    # set experiments
    exp = Exp(args)
    
    # train
    print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
    exp.train(setting)
    
#     # test
    print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
    exp.test(setting)

    torch.cuda.empty_cache()

In [None]:
prediction = np.load('./results/'+setting+'/pred.npy')

prediction.shape

In [None]:
exp = Exp(args)
setting = '{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_at{}_fc{}_eb{}_dt{}_mx{}_{}_{}'.format(args.model, args.data, args.features, 
                args.seq_len, args.label_len, args.pred_len,
                args.d_model, args.n_heads, args.e_layers, args.d_layers, args.d_ff, args.attn, args.factor, args.embed, args.distil, args.mix, args.des, 0)
prediction = predict(exp, setting, True)

In [None]:
preds = np.load('./results/'+setting+'/pred.npy')
trues = np.load('./results/'+setting+'/true.npy')




In [None]:
trues.shape

In [None]:
preds.shape

## Prediction

In [None]:
preds = np.load('./results/'+setting+'/pred.npy')
trues = np.load('./results/'+setting+'/true.npy')

In [None]:
det = np.mean((preds[:,0,:]-trues[:,0,:])**2,1)

In [None]:
plt.plot(det)

In [None]:
# for i in range(0,len(det),100):
#     if np.mean(det[i:i+100]) < 1:
#         det[i:i+100] = 0

In [None]:
df_a = pd.read_csv('./Attack.csv',skiprows=1)
labels = np.load('labels.npy')
GT2 = df_a.loc[df_a['Normal/Attack'] == 'Attack'].index
GT2 = [GT2.values][0]

In [None]:
def score_to_stat(score, d_alpha):
    stat = np.zeros((1, len(score)+1))
    for idx in range(len(score)):
        stat[0, idx+1] = np.max((0, stat[0, idx]+score[idx]-d_alpha))
    
    return stat
        
accumulated_score = score_to_stat(det, 0.4)

In [None]:
plt.plot(accumulated_score[0,:])

In [None]:
ranges = [[0,1756],[1757,2696],[2697,3070],[3071,3513],[3514,4922],[4923,5305],[5306,6461],[6462,6851],[6852,7257],[7258,7453],[7454,7707],[7708,8136],[8137,11412],[11413,12376],[12377,15382],[15383,16103],[16104,73802],[73803,74523],[74524,90687],[90688,90920],[90921,92142],[92143,92573],[92574,93447],[93448,93723],[93724,103094],[103095,103811],[103812,115845],[115846,116104],[116105,116145],[116146,116540],[116541,117002],[117003,117723],[117724,132920],[132921,133383],[133384,142956],[142957,143653],[143654,172270],[172271,172591],[172592,172912],[172913,173524],[173525,198298],[198299,199743],[199744,227830],[227831,263730],[263731,279122],[279123,279243],[279244,280062],[280063,281233],[281234,302655],[302656,303022],[303023,347681],[347682,348282],[348283,361193],[361194,361637],[361638,371481],[371482,371582],[371583,371857],[371858,372338],[372339,389682],[389683,390222],[390223,436543],[436544,437012],[437013,437419],[437420,437700],[437701,438149],[438150,438550],[438551,438623],[438624,438920],[438621,443503],[443504,445193],[445194,449921]]

In [None]:
add_f = list()
precision_f = list()
max_delay = 5000
max_window = 5000

for thr in range(0,20000,100):
    add = list()
    precision = list()
    tp = list()
    fp = list()
    for event in range(0,len(ranges)-1,2):

        stat = accumulated_score[0,ranges[event][0]:ranges[event+1][1]]
        anomaly_stat = accumulated_score[0,ranges[event+1][0]:ranges[event+1][1]]
        
        alarm = np.where(np.array(stat) >= thr)[0] + ranges[event][0]
        
        if len(alarm) and np.max(alarm) >= ranges[event+1][0] > 0:
            if alarm[0] < ranges[event][1]:
                fpc = 0
                for ev in range(0,ranges[event][1]-max_window-ranges[event][0],max_window):
                    if len(np.where(np.array(stat)[ev:ev+max_window] >= thr)[0]) > 0:
                        fpc+=1
                fp.append(fpc)
                tp.append(1) if np.max(alarm) >= ranges[event+1][0] else tp.append(0)
                add.append(np.where(np.array(anomaly_stat) >= thr)[0][0])
#                 precision.append(tp[0]/(tp[0]+fpc))
            else:
#                 print(alarm[0],thr)
                delay = alarm[0]-ranges[event][1]
                if delay < max_delay:
                    add.append(delay)
                    precision.append(1)
                    tp.append(1)
                    fp.append(0)
                else:
                    add.append(max_delay)
                    precision.append(0)
                    tp.append(0)
                    fp.append(1)
                
        else:
            add.append(max_delay)
            fp.append(0)
            tp.append(0)
            
    if np.mean(tp) + np.mean(fp) != 0:
        add_f.append(np.mean(add))
        precision_f.append(np.sum(tp)/(np.sum(tp)+np.sum(fp)))
#         precision_f.append(np.mean(precision))

In [None]:
from sklearn import metrics
metrics.auc(np.array(add_f)/max_delay, precision_f)

In [None]:
thr = 50
df_n = pd.read_csv('Normal.csv',skiprows=1)
df_a = pd.read_csv('Attack.csv',skiprows=1)

Alarms2 = (np.where(accumulated_score[0,:] >= thr))[0] 
N_Alarms2 = (np.where(accumulated_score[0,:] < thr))[0]

GT2 = df_a.loc[df_a['Normal/Attack'] == 'Attack'].index
GT2 = [GT2.values][0]

NT2 = df_a.loc[df_a['Normal/Attack'] == 'Normal'].index
NT2 = [NT2.values][0]


TP2 = list(set(Alarms2.tolist()) & set((GT2.tolist())))
TN2 = list(set(N_Alarms2.tolist()) & set((NT2.tolist())))
FP2 = list(set(Alarms2.tolist()) & set((NT2.tolist())))
FN2 = list(set(N_Alarms2.tolist()) & set((GT2.tolist())))

PRE2 = len(TP2)/(len(TP2)+len(FP2))
REC2 = len(TP2)/(len(TP2)+len(FN2))
F12 = 2*PRE2*REC2/(PRE2+REC2)

print(PRE2,REC2,F12)