In [1]:
import math
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import time
from tqdm import tqdm

In [2]:
from utils import read_test_data
from utils import gen_graph
from utils import prepare_synthetic
from utils import shuffle_graph
from utils import preprocessing_data
from utils import get_pairwise_ids

from utils import prepare_test
from utils import top_n_acc

In [3]:
RANDOM_STATE = 11
SYNTHETIC_NUM = 100
# SYNTHETIC_NUM = 1000

# number of gen nodes
# NUM_MIN = 4000
# NUM_MAX = 4001
NUM_MIN = 200
NUM_MAX = 201


MAX_EPOCHS = 10000
LEARNING_RATE = 1e-4
EMBEDDING_SIZE = 128
DEPTH = 5
BATCH_SIZE = 16

TEST1_NUM = 30

## Read Graph

In [4]:
test1_g, test1_bc, test1_edgeindex = read_test_data(0)

## Generate Synthetic Graph

In [5]:
train_g = gen_graph(500, 501)
print(len(train_g.edges()))

1983


In [6]:
# [train_g.degree(i) for i in range(train_g.number_of_nodes())]

In [7]:
# nx.betweenness_centrality(train_g)

In [8]:
(np.array(list(train_g.edges())) + 100)[:10]

array([[100, 104],
       [100, 105],
       [100, 106],
       [100, 107],
       [100, 109],
       [100, 110],
       [100, 111],
       [100, 112],
       [100, 122],
       [100, 130]])

In [9]:
# nx.betweenness_centrality(train_g)

## DrBC

In [10]:
from scipy import stats
# from model1 import DrBC
from model import DrBC
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [11]:
model = DrBC().to(device)
optim = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_fn = torch.nn.BCEWithLogitsLoss(reduction='mean')

In [12]:
model.parameters

<bound method Module.parameters of DrBC(
  (linear0): Linear(in_features=3, out_features=128, bias=True)
  (gcn): GCNConv()
  (gru): GRUCell(128, 128)
  (mlp): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)>

In [13]:
# list(model.parameters())[9].grad.data

In [14]:
pm = list(model.parameters())

for i, p in enumerate(pm):
    print(f"pm{i} shape: {p.shape}")

pm0 shape: torch.Size([128, 3])
pm1 shape: torch.Size([128])
pm2 shape: torch.Size([384, 128])
pm3 shape: torch.Size([384, 128])
pm4 shape: torch.Size([384])
pm5 shape: torch.Size([384])
pm6 shape: torch.Size([64, 128])
pm7 shape: torch.Size([64])
pm8 shape: torch.Size([1, 64])
pm9 shape: torch.Size([1])


In [15]:
def validate(model, v_data):
    model.eval()
    total_acc1 = 0.
    total_acc5 = 0.
    total_acc10 = 0.
    total_kendall = 0.
    start_time = time.time()
    for val_X, val_y, val_edge_index in v_data:
        val_X, val_edge_index = val_X.to(device), val_edge_index.to(device)
        
        with torch.no_grad():
            val_y_pred = model(val_X, val_edge_index)

        val_y_pred = val_y_pred.cpu().detach().numpy().astype('float64')
        val_y = val_y.detach().numpy().astype('float64')

        pred_index = val_y_pred.argsort()[::-1]
        true_index = val_y.argsort()[::-1]
        
        acc1 = top_n_acc(pred_index, true_index, n=1)
        acc5 = top_n_acc(pred_index, true_index, n=5)
        acc10 = top_n_acc(pred_index, true_index, n=10)
        kendall_t, _ = stats.kendalltau(val_y_pred, val_y)

        total_acc1 += acc1
        total_acc5 += acc5
        total_acc10 += acc10
        total_kendall += kendall_t

    total_acc1 /= len(v_data)
    total_acc5 /= len(v_data)
    total_acc10 /= len(v_data)
    total_kendall /= len(v_data)
    time_spent = round(time.time() - start_time, 1)
    print(f'val spend: {time_spent} secs')
    return round(total_acc1, 6), \
        round(total_acc5, 6), \
        round(total_acc10, 6), \
        round(total_kendall, 6), \
        time_spent
    

def train(model, optim, loss_fn, epochs:int):
    g_list, dg_list, bc_list  = prepare_synthetic(SYNTHETIC_NUM, (NUM_MIN, NUM_MAX))
    v_data = prepare_test(TEST1_NUM)
    
    ls_metric = []
    batch_cnt = len(g_list) // BATCH_SIZE
    for e in range(epochs + 1):
        model.train()
        g_list, dg_list, bc_list = shuffle_graph(g_list, dg_list, bc_list)
        batch_bar = tqdm(range(batch_cnt))
        batch_bar.set_description(f'Epochs {e:<5}')
        train_loss = 0
        pair_cnt = 0

        for i in batch_bar:
            # batch
            s_index, e_index = i*BATCH_SIZE, (i+1)*BATCH_SIZE
            train_g, train_dg, train_bc = g_list[s_index: e_index], dg_list[s_index: e_index], bc_list[s_index: e_index]
            X, y, edge_index = preprocessing_data(train_g, train_dg, train_bc)
            X, y, edge_index = X.to(device), y.to(device), edge_index.to(device)
            out = model(X, edge_index)

            # pairwise-loss
            s_ids, t_ids = get_pairwise_ids(train_g)
            out_diff = out[s_ids] - out[t_ids]
            y_diff = y[s_ids] - y[t_ids]
            loss = loss_fn(out_diff, torch.sigmoid(y_diff))

            # optim
            optim.zero_grad()
            loss.backward()
            optim.step()

            pair_cnt += s_ids.shape[0]
            train_loss += (loss.item() * s_ids.shape[0])
            if i == (batch_cnt-1):
                # last batch
                train_loss /= pair_cnt
                batch_bar.set_postfix(loss=train_loss)
        if e % 50 == 0:
            # validate
            val_acc1, val_acc5, val_acc10, val_kendall, time_spent = validate(model, v_data)
            ls_metric.append([e, val_acc1, val_acc5, val_acc10, val_kendall, time_spent])
            print(f"Val Acc1: {val_acc1 * 100:.2f} % | Acc5: {val_acc5 * 100:.2f} % | Acc10: {val_acc10 * 100:.2f} % | Val KendallTau: {val_kendall:.4f}")

_ = train(model, optim, loss_fn, 200)

[Generating new training graph]: 100%|██████████| 100/100 [00:11<00:00,  8.71it/s]
[Reading test1 graph]: 100%|██████████| 30/30 [00:05<00:00,  5.18it/s]
Epochs 0    : 100%|██████████| 6/6 [00:01<00:00,  4.73it/s, loss=0.678]


val spend: 0.2 secs
Val Acc1: 89.40 % | Acc5: 79.45 % | Acc10: 75.87 % | Val KendallTau: 0.1127


Epochs 1    : 100%|██████████| 6/6 [00:00<00:00, 47.78it/s, loss=0.631]
Epochs 2    : 100%|██████████| 6/6 [00:00<00:00, 47.12it/s, loss=0.601]
Epochs 3    : 100%|██████████| 6/6 [00:00<00:00, 47.84it/s, loss=0.58]
Epochs 4    : 100%|██████████| 6/6 [00:00<00:00, 47.70it/s, loss=0.564]
Epochs 5    : 100%|██████████| 6/6 [00:00<00:00, 40.97it/s, loss=0.553]
Epochs 6    : 100%|██████████| 6/6 [00:00<00:00, 51.45it/s, loss=0.545]
Epochs 7    : 100%|██████████| 6/6 [00:00<00:00, 44.56it/s, loss=0.538]
Epochs 8    : 100%|██████████| 6/6 [00:00<00:00, 45.51it/s, loss=0.533]
Epochs 9    : 100%|██████████| 6/6 [00:00<00:00, 45.56it/s, loss=0.529]
Epochs 10   : 100%|██████████| 6/6 [00:00<00:00, 47.60it/s, loss=0.525]
Epochs 11   : 100%|██████████| 6/6 [00:00<00:00, 48.63it/s, loss=0.523]
Epochs 12   : 100%|██████████| 6/6 [00:00<00:00, 44.69it/s, loss=0.521]
Epochs 13   : 100%|██████████| 6/6 [00:00<00:00, 50.43it/s, loss=0.519]
Epochs 14   : 100%|██████████| 6/6 [00:00<00:00, 47.61it/s, loss=

val spend: 0.2 secs
Val Acc1: 95.80 % | Acc5: 92.93 % | Acc10: 89.57 % | Val KendallTau: 0.6204


Epochs 51   : 100%|██████████| 6/6 [00:00<00:00, 49.03it/s, loss=0.503]
Epochs 52   : 100%|██████████| 6/6 [00:00<00:00, 49.74it/s, loss=0.502]
Epochs 53   : 100%|██████████| 6/6 [00:00<00:00, 46.50it/s, loss=0.503]
Epochs 54   : 100%|██████████| 6/6 [00:00<00:00, 46.39it/s, loss=0.503]
Epochs 55   : 100%|██████████| 6/6 [00:00<00:00, 46.92it/s, loss=0.502]
Epochs 56   : 100%|██████████| 6/6 [00:00<00:00, 44.63it/s, loss=0.501]
Epochs 57   : 100%|██████████| 6/6 [00:00<00:00, 46.63it/s, loss=0.502]
Epochs 58   : 100%|██████████| 6/6 [00:00<00:00, 46.95it/s, loss=0.501]
Epochs 59   : 100%|██████████| 6/6 [00:00<00:00, 45.14it/s, loss=0.502]
Epochs 60   : 100%|██████████| 6/6 [00:00<00:00, 46.29it/s, loss=0.501]
Epochs 61   : 100%|██████████| 6/6 [00:00<00:00, 47.89it/s, loss=0.502]
Epochs 62   : 100%|██████████| 6/6 [00:00<00:00, 47.59it/s, loss=0.502]
Epochs 63   : 100%|██████████| 6/6 [00:00<00:00, 45.83it/s, loss=0.501]
Epochs 64   : 100%|██████████| 6/6 [00:00<00:00, 48.65it/s, loss

val spend: 0.2 secs
Val Acc1: 95.80 % | Acc5: 91.95 % | Acc10: 88.62 % | Val KendallTau: 0.6652


Epochs 101  : 100%|██████████| 6/6 [00:00<00:00, 47.95it/s, loss=0.498]
Epochs 102  : 100%|██████████| 6/6 [00:00<00:00, 48.27it/s, loss=0.499]
Epochs 103  : 100%|██████████| 6/6 [00:00<00:00, 49.94it/s, loss=0.499]
Epochs 104  : 100%|██████████| 6/6 [00:00<00:00, 50.25it/s, loss=0.498]
Epochs 105  : 100%|██████████| 6/6 [00:00<00:00, 49.97it/s, loss=0.498]
Epochs 106  : 100%|██████████| 6/6 [00:00<00:00, 48.11it/s, loss=0.499]
Epochs 107  : 100%|██████████| 6/6 [00:00<00:00, 47.43it/s, loss=0.499]
Epochs 108  : 100%|██████████| 6/6 [00:00<00:00, 49.37it/s, loss=0.499]
Epochs 109  : 100%|██████████| 6/6 [00:00<00:00, 47.54it/s, loss=0.499]
Epochs 110  : 100%|██████████| 6/6 [00:00<00:00, 46.37it/s, loss=0.498]
Epochs 111  : 100%|██████████| 6/6 [00:00<00:00, 48.81it/s, loss=0.498]
Epochs 112  : 100%|██████████| 6/6 [00:00<00:00, 47.17it/s, loss=0.499]
Epochs 113  : 100%|██████████| 6/6 [00:00<00:00, 45.96it/s, loss=0.499]
Epochs 114  : 100%|██████████| 6/6 [00:00<00:00, 46.54it/s, loss

val spend: 0.2 secs
Val Acc1: 96.13 % | Acc5: 91.55 % | Acc10: 88.85 % | Val KendallTau: 0.6768


Epochs 151  : 100%|██████████| 6/6 [00:00<00:00, 45.09it/s, loss=0.498]
Epochs 152  : 100%|██████████| 6/6 [00:00<00:00, 46.66it/s, loss=0.498]
Epochs 153  : 100%|██████████| 6/6 [00:00<00:00, 49.32it/s, loss=0.499]
Epochs 154  : 100%|██████████| 6/6 [00:00<00:00, 50.47it/s, loss=0.499]
Epochs 155  : 100%|██████████| 6/6 [00:00<00:00, 47.48it/s, loss=0.498]
Epochs 156  : 100%|██████████| 6/6 [00:00<00:00, 47.57it/s, loss=0.498]
Epochs 157  : 100%|██████████| 6/6 [00:00<00:00, 47.29it/s, loss=0.499]
Epochs 158  : 100%|██████████| 6/6 [00:00<00:00, 49.46it/s, loss=0.498]
Epochs 159  : 100%|██████████| 6/6 [00:00<00:00, 47.39it/s, loss=0.499]
Epochs 160  : 100%|██████████| 6/6 [00:00<00:00, 45.49it/s, loss=0.499]
Epochs 161  : 100%|██████████| 6/6 [00:00<00:00, 46.46it/s, loss=0.499]
Epochs 162  : 100%|██████████| 6/6 [00:00<00:00, 47.23it/s, loss=0.499]
Epochs 163  : 100%|██████████| 6/6 [00:00<00:00, 47.47it/s, loss=0.498]
Epochs 164  : 100%|██████████| 6/6 [00:00<00:00, 50.59it/s, loss

val spend: 0.2 secs
Val Acc1: 96.20 % | Acc5: 91.52 % | Acc10: 88.95 % | Val KendallTau: 0.6845


In [16]:
t_data = prepare_test('y')
test_acc1, test_acc5, test_acc10, test_kendall, test_spend = validate(model, t_data) 

val spend: 0.7 secs


  (2 * xtie * ytie) / m + x0 * y0 / (9 * m * (size - 2)))


In [17]:
print('acc: ', test_acc1, test_acc5, test_acc10, sep='\n')
print('kendall: ', test_kendall)

acc: 
0.615791
0.618709
0.643578
kendall:  0.288244


In [18]:
# synthetic graph num: 100
# synthetic node num: 200
# epoch: 200


# with L2 norm
# acc: 
# 0.613588
# 0.495506
# 0.302029
# kendall:  -0.435382

# without L2 norm + bc apply log
# 0.615791
# 0.618709
# 0.643578
# kendall:  0.288244


In [19]:
test_kendall

0.288244

## To-Do List
* (done) loss_fn 再加上 sigmoid
* (done) pairwise 目前跨圖了
* (done) h 要 normalized
* (done) aggregate 改成 MessagePassing
* (done) synthetic graph 後，shuffle graph 的順序
* (done) 加入 Epochs"
* (done) change to leaky relu
* Metric: top1, 5, 10
* Metric: kendall tau distance
* wall-clock running time
* test step
