In [1]:
import os
from datetime import datetime
from pathlib import Path

In [2]:
import pandas as pd
import numpy as np
import torch
import dgl

dgl.__version__

Using backend: pytorch


'0.6.1'

In [3]:
import random

from torch.utils.data import DataLoader, Subset
import torch.nn.functional as F
import torch.nn as nn
from Dataset import TimeDataset
from config import configs
from model import GDN

In [4]:
train_dataset = TimeDataset(is_train=True, config=configs)
test_dataset = TimeDataset(is_train=False, config=configs)

dataset_len = int(len(train_dataset))
train_len = int(dataset_len * (1 - configs["val_ratio"]))
val_len = dataset_len - train_len
val_start_idx = random.randrange(train_len)
indices = torch.arange(dataset_len)
train_sub_indices = torch.cat(
    [indices[:val_start_idx], indices[val_start_idx + val_len :]]
)
train_subset = Subset(train_dataset, train_sub_indices)

val_sub_indices = indices[val_start_idx : val_start_idx + val_len]
val_subset = Subset(train_dataset, val_sub_indices)

train_dataloader = DataLoader(
    train_subset, batch_size=configs["batch_size"], shuffle=True, drop_last=True
)

val_dataloader = DataLoader(
    val_subset, batch_size=configs["batch_size"], shuffle=False, drop_last=True
)
test_dataloader = DataLoader(
    test_dataset,
    batch_size=configs["batch_size"],
    shuffle=False,
    num_workers=0,
    drop_last=True,
)

In [5]:
graph = train_dataset.getGraph()
graph

Graph(num_nodes=27, num_edges=702,
      ndata_schemes={}
      edata_schemes={})

In [6]:
model = GDN(
    graph,
    embed_dim=configs["embed_dim"],
    input_dim=configs["slide_win"],
    hidden_dim=configs["hidden_dim"],
    outlayer_hidden_dim=configs["outlayer_hidden_dim"],
    num_outlayers=configs["num_outlayers"],
    topk=configs["topk"],
    dropout=configs["dropout"],
    device=configs["device"]
)

In [7]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=configs['decay'])
train_loss_list = []

In [8]:
acu_loss = 0
min_loss = 1e+8
min_f1 = 0
min_pre = 0
best_prec = 0

In [9]:
def get_save_path( feature_name=""):
    
    dir_path = ""

#     if datestr is None:
    now = datetime.now()
    datestr = now.strftime("%m|%d-%H:%M:%S")
 

    paths = [
        f"./pretrained/{dir_path}/best_{datestr}.pt",
        f"./results/{dir_path}/{datestr}.csv",
    ]

    for path in paths:
        dirname = os.path.dirname(path)
        Path(dirname).mkdir(parents=True, exist_ok=True)

    return paths

In [10]:
def test(model, dataloader):
    # test
    loss_func = nn.MSELoss(reduction='mean')
    device = "cpu"

    test_loss_list = []
#     now = time.time()

    test_predicted_list = []
    test_ground_list = []
    test_labels_list = []

    t_test_predicted_list = []
    t_test_ground_list = []
    t_test_labels_list = []

    test_len = len(dataloader)

    model.eval()

    i = 0
    acu_loss = 0
    for x, y, labels  in dataloader:
        x, y, labels = [item.to(device).float() for item in [x, y, labels]]
        
        with torch.no_grad():
            predicted = model(x).float().to(device)
            print (predicted.shape,y.shape,x.shape)
            
            loss = loss_func(predicted, y)
            

            labels = labels.unsqueeze(1).repeat(1, predicted.shape[1])

            if len(t_test_predicted_list) <= 0:
                t_test_predicted_list = predicted
                t_test_ground_list = y
                t_test_labels_list = labels
            else:
                t_test_predicted_list = torch.cat((t_test_predicted_list, predicted), dim=0)
                t_test_ground_list = torch.cat((t_test_ground_list, y), dim=0)
                t_test_labels_list = torch.cat((t_test_labels_list, labels), dim=0)
        
        test_loss_list.append(loss.item())
        acu_loss += loss.item()
        
        i += 1

#         if i % 10000 == 1 and i > 1:
#             print(timeSincePlus(now, i / test_len))


    test_predicted_list = t_test_predicted_list.tolist()        
    test_ground_list = t_test_ground_list.tolist()        
    test_labels_list = t_test_labels_list.tolist()      
    
    avg_loss = sum(test_loss_list)/len(test_loss_list)

    return avg_loss, [test_predicted_list, test_ground_list, test_labels_list]



In [11]:
from evaluate import (
    get_err_scores,
    get_best_performance_data,
    get_val_performance_data,
    get_full_err_scores,
)
def get_score(test_result, val_result):

    feature_num = len(test_result[0][0])
    np_test_result = np.array(test_result)
    np_val_result = np.array(val_result)

    test_labels = np_test_result[2, :, 0].tolist()

    test_scores, normal_scores = get_full_err_scores(test_result, val_result)

    top1_best_info = get_best_performance_data(test_scores, test_labels, topk=1)
    top1_val_info = get_val_performance_data(
        test_scores, normal_scores, test_labels, topk=1
    )

    print("=========================** Result **============================\n")

    info = None
    #if self.env_config["report"] == "best":
    info = top1_best_info
#     elif self.env_config["report"] == "val":
#         info = top1_val_info

    print(f"F1 score: {info[0]}")
    print(f"precision: {info[1]}")
    print(f"recall: {info[2]}\n")


In [12]:
model.train()
train_loss = []
model_save_path = get_save_path()[0]
for i_epoch in range(configs["epoch"]):
    acu_loss = 0
    for x, labels, attack_labels in train_dataloader:
        x, labels = [item.float().to(configs['device']) for item in [x, labels]]
        optimizer.zero_grad()
        out = model(x).float().to(configs['device'])
        loss = F.mse_loss(out, labels, reduction="mean")
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
        acu_loss += loss.item()
        print(
            "epoch ({} / {}) (Loss:{:.8f}, ACU_loss:{:.8f})".format(
                i_epoch, configs["epoch"], acu_loss / len(train_dataloader), acu_loss
            ),
            flush=True,
        )

        # use val dataset to judge
        if val_dataloader is not None:

            val_loss, val_result = test(model, val_dataloader)

            if val_loss < min_loss:
                torch.save(model.state_dict(), model_save_path)

                min_loss = val_loss
                stop_improve_count = 0
            else:
                stop_improve_count += 1

            if stop_improve_count >= 15:
                break

        else:
            if acu_loss < min_loss:
                torch.save(model.state_dict(), save_path)
                min_loss = acu_loss
    model.load_state_dict(torch.load(model_save_path))
    best_model =model.to("cpu")

    _, test_result = test(best_model, test_dataloader)
    _, val_result = test(best_model, val_dataloader)

    get_score(test_result, val_result)

epoch (0 / 10) (Loss:0.24308949, ACU_loss:2.43089485)
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
epoch (0 / 10) (Loss:0.32112331, ACU_loss:3.21123314)
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
epoch (0 / 10) (Loss:0.40043521, ACU_loss:4.00435209)
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
epoch (0 / 10) (Loss:0.48008639, ACU_loss:4.80086386)
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
epoch (0 / 10) (Loss:0.55989443, ACU_loss:5.59894431)
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
epoch (0 / 10) (Loss:0.63919410, ACU_loss:6.39194101)
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
epoch (0 / 10) (Loss:0.71543733, ACU_loss:7.15437335)
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
epoch (0 / 10) (Loss:0.79193571, ACU_loss:7.91935712)
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
epoch (0 / 10) (

epoch (3 / 10) (Loss:0.49805835, ACU_loss:4.98058355)
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
epoch (3 / 10) (Loss:0.56756577, ACU_loss:5.67565769)
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
epoch (3 / 10) (Loss:0.63958453, ACU_loss:6.39584529)
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
epoch (3 / 10) (Loss:0.71106036, ACU_loss:7.11060357)
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) tor

torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])
torch.Size([128, 27]) torch.Size([128, 27]) torch.Size([128, 27, 3])

F1 score: 0.8690265486725663
precision: 0.768668407310705
recall: 0.9986431478968792

epoch (7 / 10) (Loss:0.06626404, ACU_loss:0.66264039)
torch.Size([128, 27]) torch.Siz


F1 score: 0.8690265486725663
precision: 0.768668407310705
recall: 0.9986431478968792

