In [1]:
import math
import numpy as np
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import time
from tqdm.notebook import tqdm

In [2]:
from utils import read_test_data
from utils import gen_graph
from utils import prepare_synthetic
from utils import shuffle_graph
from utils import preprocessing_data
from utils import get_pairwise_ids

from utils import prepare_test
from utils import top_n_acc
from utils import validate

In [3]:
RANDOM_STATE = 11
# SYNTHETIC_NUM = 16
SYNTHETIC_NUM = 1000


# number of gen nodes
# NUM_MIN = 4000
# NUM_MAX = 4001
NUM_MIN = 500
NUM_MAX = 501
IS_PARALLEL = True if NUM_MIN >= 1000 else False


MAX_EPOCHS = 10000
LEARNING_RATE = 1e-4
EMBEDDING_SIZE = 128
DEPTH = 5
BATCH_SIZE = 16

TEST1_NUM = 30

MODEL_SAVED_PATH = "saved_model/"

## Read Graph

In [4]:
test1_g, test1_bc, test1_edgeindex = read_test_data(0)

## Generate Synthetic Graph

In [5]:
train_g = gen_graph(500, 501)
print(len(train_g.edges()))

1983


In [6]:
# [train_g.degree(i) for i in range(train_g.number_of_nodes())]

In [7]:
# nx.betweenness_centrality(train_g)

In [8]:
# (np.array(list(train_g.edges())) + 100)[:10]

In [9]:
# nx.betweenness_centrality(train_g)

## DrBC

In [10]:
# from model1 import DrBC
from model import DrBC
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [11]:
model = DrBC().to(device)
optim = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_fn = torch.nn.BCEWithLogitsLoss(reduction='mean')

In [12]:
model.parameters

<bound method Module.parameters of DrBC(
  (linear0): Linear(in_features=3, out_features=128, bias=True)
  (gcn): GCNConv()
  (gru): GRUCell(128, 128)
  (mlp): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)>

In [13]:
# list(model.parameters())[9].grad.data

In [14]:
pm = list(model.parameters())

for i, p in enumerate(pm):
    print(f"pm{i} shape: {p.shape}")

pm0 shape: torch.Size([128, 3])
pm1 shape: torch.Size([128])
pm2 shape: torch.Size([384, 128])
pm3 shape: torch.Size([384, 128])
pm4 shape: torch.Size([384])
pm5 shape: torch.Size([384])
pm6 shape: torch.Size([64, 128])
pm7 shape: torch.Size([64])
pm8 shape: torch.Size([1, 64])
pm9 shape: torch.Size([1])


In [15]:
def train(model, optim, loss_fn, epochs:int):
    g_list, dg_list, bc_list  = prepare_synthetic(SYNTHETIC_NUM, (NUM_MIN, NUM_MAX), IS_PARALLEL)
    v_data = prepare_test(TEST1_NUM)
    ls_metric = []
    epoch_bar = tqdm(range(epochs), desc="Epochs")
    for e in epoch_bar:
        if (e % 5000 == 0) and (e != 0):
            # re generate synthetic graph
            g_list, dg_list, bc_list  = prepare_synthetic(SYNTHETIC_NUM, (NUM_MIN, NUM_MAX), IS_PARALLEL)
        model.train()
        g_list, dg_list, bc_list = shuffle_graph(g_list, dg_list, bc_list)
        train_g, train_dg, train_bc = g_list[:16], dg_list[:16], bc_list[:16]
        X, y, edge_index = preprocessing_data(train_g, train_dg, train_bc)
        X, y, edge_index = X.to(device), y.to(device), edge_index.to(device)
        out = model(X, edge_index)

        # pairwise-loss
        s_ids, t_ids = get_pairwise_ids(train_g)
        out_diff = out[s_ids] - out[t_ids]
        y_diff = y[s_ids] - y[t_ids]
        loss = loss_fn(out_diff, torch.sigmoid(y_diff))

        # optim
        optim.zero_grad()
        loss.backward()
        optim.step()

        epoch_bar.set_postfix(loss=loss.item())
        if e % 500 == 0:
            # validate
            val_acc1, val_acc5, val_acc10, val_kendall, time_spent = validate(model, v_data)
            ls_metric.append([e, val_acc1, val_acc5, val_acc10, val_kendall, time_spent])
            print(f"[{e}] Val Acc1: {val_acc1 * 100:.2f} % | Acc5: {val_acc5 * 100:.2f} % | Acc10: {val_acc10 * 100:.2f} % | KendallTau: {val_kendall:.4f} | spend: {time_spent} secs")
            print('-'*50)

    # last time 
    val_acc1, val_acc5, val_acc10, val_kendall, time_spent = validate(model, v_data)
    ls_metric.append([epochs, val_acc1, val_acc5, val_acc10, val_kendall, time_spent])
    print(f"[{epochs}] Val Acc1: {val_acc1 * 100:.2f} % | Acc5: {val_acc5 * 100:.2f} % | Acc10: {val_acc10 * 100:.2f} % | KendallTau: {val_kendall:.4f} | spend: {time_spent} secs")
    print('-'*50)

    return ls_metric

train_metric = train(model, optim, loss_fn, MAX_EPOCHS)

[Generating new training graph]:   0%|          | 0/1000 [00:00<?, ?it/s]

[Reading test1 graph]:   0%|          | 0/30 [00:00<?, ?it/s]

Epochs:   0%|          | 0/10000 [00:00<?, ?it/s]

[0] Val Acc1: 94.67 % | Acc5: 91.64 % | Acc10: 88.81 % | KendallTau: 0.6560 | spend: 0.19 secs
--------------------------------------------------
[500] Val Acc1: 95.80 % | Acc5: 93.07 % | Acc10: 91.15 % | KendallTau: 0.7683 | spend: 0.19 secs
--------------------------------------------------
[1000] Val Acc1: 95.87 % | Acc5: 92.93 % | Acc10: 90.95 % | KendallTau: 0.7616 | spend: 0.19 secs
--------------------------------------------------
[1500] Val Acc1: 96.00 % | Acc5: 92.84 % | Acc10: 90.85 % | KendallTau: 0.7671 | spend: 0.19 secs
--------------------------------------------------
[2000] Val Acc1: 96.00 % | Acc5: 92.91 % | Acc10: 91.11 % | KendallTau: 0.7761 | spend: 0.19 secs
--------------------------------------------------
[2500] Val Acc1: 95.87 % | Acc5: 92.85 % | Acc10: 91.00 % | KendallTau: 0.7766 | spend: 0.19 secs
--------------------------------------------------
[3000] Val Acc1: 95.93 % | Acc5: 92.59 % | Acc10: 90.63 % | KendallTau: 0.7706 | spend: 0.19 secs
------------

[Generating new training graph]:   0%|          | 0/1000 [00:00<?, ?it/s]

[5000] Val Acc1: 96.27 % | Acc5: 92.23 % | Acc10: 90.65 % | KendallTau: 0.7830 | spend: 0.19 secs
--------------------------------------------------
[5500] Val Acc1: 96.20 % | Acc5: 92.32 % | Acc10: 91.07 % | KendallTau: 0.7897 | spend: 0.19 secs
--------------------------------------------------
[6000] Val Acc1: 96.13 % | Acc5: 92.36 % | Acc10: 91.09 % | KendallTau: 0.7862 | spend: 0.19 secs
--------------------------------------------------
[6500] Val Acc1: 96.20 % | Acc5: 92.51 % | Acc10: 91.33 % | KendallTau: 0.7943 | spend: 0.19 secs
--------------------------------------------------
[7000] Val Acc1: 96.47 % | Acc5: 92.39 % | Acc10: 91.22 % | KendallTau: 0.7935 | spend: 0.19 secs
--------------------------------------------------
[7500] Val Acc1: 96.33 % | Acc5: 92.53 % | Acc10: 91.26 % | KendallTau: 0.7848 | spend: 0.19 secs
--------------------------------------------------
[8000] Val Acc1: 96.40 % | Acc5: 92.39 % | Acc10: 91.31 % | KendallTau: 0.7846 | spend: 0.19 secs
--------

### Save model / train_loss

In [16]:
model_saved_name = f'{MODEL_SAVED_PATH}DrBC_G{SYNTHETIC_NUM}_N{NUM_MIN}_E{MAX_EPOCHS}.pth'
torch.save(model.state_dict(), model_saved_name)

# train
df = pd.DataFrame(train_metric, columns=['epochs', 'val_acc1', 'val_acc5', 'val_acc10', 'val_kendall', 'time'])
df.to_csv(f"{MODEL_SAVED_PATH}train_metrics_G{SYNTHETIC_NUM}_N{NUM_MIN}_E{MAX_EPOCHS}.csv", index=False)
df

Unnamed: 0,epochs,val_acc1,val_acc5,val_acc10,val_kendall,time
0,0,0.946667,0.9164,0.888067,0.655963,0.19
1,500,0.958,0.930667,0.911467,0.768302,0.19
2,1000,0.958667,0.929333,0.909533,0.761626,0.19
3,1500,0.96,0.9284,0.908467,0.767122,0.19
4,2000,0.96,0.929067,0.911133,0.776095,0.19
5,2500,0.958667,0.928533,0.91,0.77662,0.19
6,3000,0.959333,0.925867,0.906333,0.770584,0.19
7,3500,0.96,0.927067,0.9108,0.782054,0.19
8,4000,0.958667,0.927467,0.910933,0.78285,0.19
9,4500,0.959333,0.9268,0.910667,0.790206,0.19


### Check loading model

In [17]:
model = DrBC().to(device)
model.load_state_dict(torch.load(model_saved_name))

<All keys matched successfully>

In [18]:
# G1000_N200_E10000
# acc: 
# 0.600546
# 0.601509
# 0.636026
# kendall:  0.527324

# G1000_N5000_E5000
# acc: 
# 0.61491
# 0.633318
# 0.66736
# kendall:  0.513669

In [19]:
# synthetic graph num: 100
# synthetic node num: 200
# epoch: 200


# with L2 norm
# acc: 
# 0.613588
# 0.495506
# 0.302029
# kendall:  -0.435382

# without L2 norm + bc apply log
# 0.615791
# 0.618709
# 0.643578
# kendall:  0.288244


## To-Do List
* (done) loss_fn 再加上 sigmoid
* (done) pairwise 目前跨圖了
* (done) h 要 normalized
* (done) aggregate 改成 MessagePassing
* (done) synthetic graph 後，shuffle graph 的順序
* (done) 加入 Epochs"
* (done) change to leaky relu
* (done) Metric: top1, 5, 10
* (done) Metric: kendall tau distance
* (done) wall-clock running time
* (done) test step
