In [1]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, random_split, Subset
from sklearn.preprocessing import MinMaxScaler
from util.env import get_device, set_device
from util.preprocess import build_loc_net, construct_data
from util.net_struct import get_feature_map, get_fc_graph_struc
from util.iostream import printsep
from datasets.TimeDataset import TimeDataset
from models.GDN import GDN
from train_model import train_model
from test_model  import test_model
from evaluate import get_err_scores, get_best_performance_data, get_val_performance_data, get_full_err_scores
import sys
from datetime import datetime
import os
import argparse
from pathlib import Path
import matplotlib.pyplot as plt
import json
import random

class Args:
    def __init__(self):
        self.batch = 32
        self.epoch = 50
        self.slide_win = 5
        self.dim = 64
        self.slide_stride = 5
        self.save_path_pattern = ''
        self.dataset = 'swat'
        self.device = 'cuda'
        self.random_seed = 0
        self.comment = ''
        self.out_layer_num = 1
        self.out_layer_inter_dim = 64
        self.decay = 0.0
        self.val_ratio = 0.1
        self.topk = 15
        self.report = 'best'
        self.load_model_path = ''

        # 추가된 변수들
        self.npy_save_path = './eval_loss_acc.npy'  # Path to save evaluation results
        self.pool_save_path = './prompt_pool.npy'  # Path to save prompt pool
        self.dataset_root = './Dataset'  # Root directory for datasets

args = Args()
print(args.device)

random.seed(args.random_seed)
np.random.seed(args.random_seed)
torch.manual_seed(args.random_seed)
torch.cuda.manual_seed(args.random_seed)
torch.cuda.manual_seed_all(args.random_seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
os.environ['PYTHONHASHSEED'] = str(args.random_seed)

cuda


In [2]:
train_config = {
    'batch': args.batch,
    'epoch': args.epoch,
    'slide_win': args.slide_win,
    'dim': args.dim,
    'slide_stride': args.slide_stride,
    'comment': args.comment,
    'seed': args.random_seed,
    'out_layer_num': args.out_layer_num,
    'out_layer_inter_dim': args.out_layer_inter_dim,
    'decay': args.decay,
    'val_ratio': args.val_ratio,
    'topk': args.topk,
}

env_config={
    'save_path': args.save_path_pattern,
    'dataset': args.dataset,
    'report': args.report,
    'device': args.device,
    'load_model_path': args.load_model_path
}

In [None]:
datestr = None

dataset = env_config['dataset'] 
train_orig = pd.read_csv(f'./data/{dataset}/train.csv', sep=',', index_col=0)
test_orig = pd.read_csv(f'./data/{dataset}/test.csv', sep=',', index_col=0)

train, test = train_orig, test_orig

if 'attack' in train.columns:
    train = train.drop(columns=['attack'])

feature_map = get_feature_map(dataset)
# Create Fully-Connected Graph Structure
fc_struc = get_fc_graph_struc(dataset)

set_device(env_config['device'])
device = get_device()

In [None]:
# Create Edge Connections
fc_edge_index = build_loc_net(fc_struc, list(train.columns), feature_map=feature_map)
fc_edge_index = torch.tensor(fc_edge_index, dtype = torch.long)

feature_map = feature_map

# Data List & Label List
train_dataset_indata = construct_data(train, feature_map, labels=0)
test_dataset_indata = construct_data(test, feature_map, labels=test.attack.tolist())


cfg = {
    'slide_win': train_config['slide_win'],
    'slide_stride': train_config['slide_stride'],
}

train_dataset = TimeDataset(train_dataset_indata, fc_edge_index, mode='train', config=cfg)
test_dataset = TimeDataset(test_dataset_indata, fc_edge_index, mode='test', config=cfg)

In [28]:
train_dataset.x.shape, train_dataset.y.shape, train_dataset.labels.shape

(torch.Size([9467, 51, 5]), torch.Size([9467, 51]), torch.Size([9467]))

In [29]:
def get_loaders(train_dataset, seed, batch, val_ratio=0.1):
    dataset_len = int(len(train_dataset))
    train_use_len = int(dataset_len * (1 - val_ratio))
    val_use_len = int(dataset_len * val_ratio)
    val_start_index = random.randrange(train_use_len)
    indices = torch.arange(dataset_len)

    train_sub_indices = torch.cat([indices[:val_start_index], indices[val_start_index+val_use_len:]])
    train_subset = Subset(train_dataset, train_sub_indices)

    val_sub_indices = indices[val_start_index:val_start_index+val_use_len]
    val_subset = Subset(train_dataset, val_sub_indices)


    train_dataloader = DataLoader(train_subset, batch_size=batch,
                            shuffle=True)

    val_dataloader = DataLoader(val_subset, batch_size=batch,
                            shuffle=False)

    return train_dataloader, val_dataloader

train_dataloader, val_dataloader = get_loaders(train_dataset, train_config['seed'], train_config['batch'], val_ratio = train_config['val_ratio'])

train_dataset = train_dataset
test_dataset = test_dataset


train_dataloader = train_dataloader
val_dataloader = val_dataloader
test_dataloader = DataLoader(test_dataset, batch_size=train_config['batch'],
                    shuffle=False, num_workers=0)


edge_index_sets = []
edge_index_sets.append(fc_edge_index)

model = GDN(edge_index_sets, len(feature_map), 
        dim=train_config['dim'], 
        input_dim=train_config['slide_win'],
        out_layer_num=train_config['out_layer_num'],
        out_layer_inter_dim=train_config['out_layer_inter_dim'],
        topk=train_config['topk']
    ).to(device)


In [6]:
def get_save_path(feature_name=''):

    dir_path = env_config['save_path']

    now = datetime.now()
    datestr = now.strftime('%m|%d-%H:%M:%S')
    datestr = datestr          

    paths = [
        f'./pretrained/{dir_path}/best_{datestr}.pt',
        f'./results/{dir_path}/{datestr}.csv',
    ]

    for path in paths:
        dirname = os.path.dirname(path)
        Path(dirname).mkdir(parents=True, exist_ok=True)

    return paths

def get_score(test_result, val_result):

    feature_num = len(test_result[0][0])
    np_test_result = np.array(test_result)
    np_val_result = np.array(val_result)

    test_labels = np_test_result[2, :, 0].tolist()

    test_scores, normal_scores = get_full_err_scores(test_result, val_result)

    top1_best_info = get_best_performance_data(test_scores, test_labels, topk=1) 
    top1_val_info = get_val_performance_data(test_scores, normal_scores, test_labels, topk=1)


    print('=========================** Result **============================\n')

    info = None
    if env_config['report'] == 'best':
        info = top1_best_info
    elif env_config['report'] == 'val':
        info = top1_val_info

    print(f'F1 score: {info[0]}')
    print(f'precision: {info[1]}')
    print(f'recall: {info[2]}\n')

In [7]:
if len(env_config['load_model_path']) > 0:
    model_save_path = env_config['load_model_path']
else:
    model_save_path = get_save_path()[0]

    train_log = train_model(model, model_save_path, 
        config = train_config,
        train_dataloader=train_dataloader,
        val_dataloader=val_dataloader, 
        feature_map=feature_map,
        test_dataloader=test_dataloader,
        test_dataset=test_dataset,
        train_dataset=train_dataset,
        dataset_name=env_config['dataset']
    )

# test            
model.load_state_dict(torch.load(model_save_path))
best_model = model.to(device)

_, test_result = test(best_model, test_dataloader)
_, val_result = test(best_model, val_dataloader)

get_score(test_result, val_result)

  gated_i = torch.arange(0, node_num).T.unsqueeze(1).repeat(1, topk_num).flatten().to(device).unsqueeze(0)


epoch (0 / 50) (Loss:0.07006755, ACU_loss:18.70803578)
epoch (1 / 50) (Loss:0.02286051, ACU_loss:6.10375535)
epoch (2 / 50) (Loss:0.01824087, ACU_loss:4.87031206)
epoch (3 / 50) (Loss:0.01361006, ACU_loss:3.63388505)
epoch (4 / 50) (Loss:0.01092744, ACU_loss:2.91762747)
epoch (5 / 50) (Loss:0.01150898, ACU_loss:3.07289832)
epoch (6 / 50) (Loss:0.01075816, ACU_loss:2.87242931)
epoch (7 / 50) (Loss:0.01040180, ACU_loss:2.77728179)
epoch (8 / 50) (Loss:0.00943377, ACU_loss:2.51881528)
epoch (9 / 50) (Loss:0.00917554, ACU_loss:2.44987046)
epoch (10 / 50) (Loss:0.00874509, ACU_loss:2.33494018)
epoch (11 / 50) (Loss:0.00830604, ACU_loss:2.21771378)
epoch (12 / 50) (Loss:0.00981769, ACU_loss:2.62132348)
epoch (13 / 50) (Loss:0.00902593, ACU_loss:2.40992302)
epoch (14 / 50) (Loss:0.00883357, ACU_loss:2.35856256)
epoch (15 / 50) (Loss:0.00845646, ACU_loss:2.25787515)
epoch (16 / 50) (Loss:0.00816758, ACU_loss:2.18074380)
epoch (17 / 50) (Loss:0.00779426, ACU_loss:2.08106692)
epoch (18 / 50) (Lo

KeyboardInterrupt: 