In [1]:
import math
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import time
from tqdm import tqdm

In [2]:
from utils import read_test1_data
from utils import gen_graph
from utils import prepare_synthetic
from utils import shuffle_graph
from utils import preprocessing_data
from utils import get_pairwise_ids

from utils import prepare_test1
from utils import top_n_acc

In [3]:
RANDOM_STATE = 11
SYNTHETIC_NUM = 50
# SYNTHETIC_NUM = 1000

# number of gen nodes
# NUM_MIN = 4000
# NUM_MAX = 4001
NUM_MIN = 200
NUM_MAX = 201


MAX_EPOCHS = 10000
LEARNING_RATE = 1e-4
EMBEDDING_SIZE = 128
DEPTH = 5
BATCH_SIZE = 16
# BATCH_SIZE = 1

TEST1_NUM = 1

## Read Graph

In [4]:
test1_X, test1_bc = read_test1_data(0)

## Generate Synthetic Graph

In [5]:
train_g = gen_graph(500, 501)
print(len(train_g.edges()))

1983


In [6]:
# nx.betweenness_centrality(train_g)

In [7]:
(np.array(list(train_g.edges())) + 100)[:10]

array([[100, 104],
       [100, 105],
       [100, 107],
       [100, 108],
       [100, 109],
       [100, 111],
       [100, 113],
       [100, 115],
       [100, 116],
       [100, 120]])

In [8]:
# nx.betweenness_centrality(train_g)

## DrBC

In [9]:
from scipy import stats
# from model1 import DrBC
from model import DrBC
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [10]:
model = DrBC().to(device)
optim = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_fn = torch.nn.BCEWithLogitsLoss(reduction='sum')

In [11]:
model.parameters

<bound method Module.parameters of DrBC(
  (linear0): Linear(in_features=3, out_features=128, bias=True)
  (gcn): GCNConv()
  (gru): GRUCell(128, 128)
  (mlp): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)>

In [12]:
# list(model.parameters())[9].grad.data

In [13]:
pm = list(model.parameters())

for i, p in enumerate(pm):
    print(f"pm{i} shape: {p.shape}")

pm0 shape: torch.Size([128, 3])
pm1 shape: torch.Size([128])
pm2 shape: torch.Size([384, 128])
pm3 shape: torch.Size([384, 128])
pm4 shape: torch.Size([384])
pm5 shape: torch.Size([384])
pm6 shape: torch.Size([64, 128])
pm7 shape: torch.Size([64])
pm8 shape: torch.Size([1, 64])
pm9 shape: torch.Size([1])


In [14]:
# list(dict(nx.degree(train_g)).values())
# list(dict(nx.degree(train_g)).values())
# list(dict(nx.betweenness_centrality(train_g)).values())

In [15]:
def validate(model, v_data):
    # model.eval()
    total_acc = 0.
    total_kendall = 0.
    for val_X, val_y, val_edge_index in v_data:
        val_X, val_edge_index = val_X.to(device), val_edge_index.to(device)
        with torch.no_grad():
            val_y_pred = model(val_X, val_edge_index)

        print('val_X: ', val_X[:5])
        print('pred_y: ', val_y_pred[:5])
        print('val_y: ', val_y[:5])
        # return
        pred_index = val_y_pred.cpu().detach().numpy().argsort()[::-1]
        true_index = val_y.detach().numpy().argsort()[::-1]
        

        
        
        acc = top_n_acc(pred_index, true_index)
        kendall_t, _ = stats.kendalltau(pred_index, true_index)

        total_acc += acc
        total_kendall += kendall_t

    total_acc /= len(v_data)
    total_kendall /= len(v_data)
    return total_acc, total_kendall
    

def train(model, optim, loss_fn, epochs:int):
    g_list, dg_list, bc_list  = prepare_synthetic(SYNTHETIC_NUM, (NUM_MIN, NUM_MAX))
    v_g_list, v_dg_list, v_bc_list = prepare_test1(TEST1_NUM)
    v_data = []
    for i in range(TEST1_NUM):
        val_X, val_y, val_edge_index = preprocessing_data([v_g_list[i]], [v_dg_list[i]], [v_bc_list[i]])
        v_data.append([val_X, val_y, val_edge_index])

    ls_metric = []
    batch_cnt = len(g_list) // BATCH_SIZE
    for e in range(epochs + 1):
        model.train()
        g_list, dg_list, bc_list = shuffle_graph(g_list, dg_list, bc_list)
        batch_bar = tqdm(range(batch_cnt))
        batch_bar.set_description(f'Epochs {e:<5}')
        train_loss = 0
        pair_cnt = 0
        for i in batch_bar:
            # batch
            s_index, e_index = i*BATCH_SIZE, (i+1)*BATCH_SIZE
            train_g, train_dg, train_bc = g_list[s_index: e_index], dg_list[s_index: e_index], bc_list[s_index: e_index]
            X, y, edge_index = preprocessing_data(train_g, train_dg, train_bc)
            X, y, edge_index = X.to(device), y.to(device), edge_index.to(device)
            out = model(X, edge_index)

            # pairwise-loss
            s_ids, t_ids = get_pairwise_ids(train_g)
            out_diff = out[s_ids] - out[t_ids]
            y_diff = y[s_ids] - y[t_ids]
            loss = loss_fn(out_diff, torch.sigmoid(y_diff))

            # optim
            optim.zero_grad()
            loss.backward()
            optim.step()

            pair_cnt += s_ids.shape[0]
            train_loss += (loss.item() * s_ids.shape[0])
            if i == (batch_cnt - 1):
                # last batch
                train_loss /= pair_cnt
                batch_bar.set_postfix(loss=round(train_loss, 6)) 

        if e % 50 == 0:
            # print('out: ', out[:10])
            val_acc, val_kendall = validate(model, v_data)
            ls_metric.append([e, val_acc, val_kendall])
            print(f"Val Acc: {val_acc * 100:.4f} % | Val KendallTau: {val_kendall:.4f}")
        

_ = train(model, optim, loss_fn, 200)

[Generating new training graph]: 100%|██████████| 50/50 [00:05<00:00,  8.83it/s]
[Reading test1 graph]: 100%|██████████| 1/1 [00:00<00:00, 17.33it/s]
Epochs 0    : 100%|██████████| 3/3 [00:01<00:00,  2.42it/s, loss=1.11e+4]


tensor([[239.,   1.,   1.],
        [196.,   1.,   1.],
        [220.,   1.,   1.],
        [ 76.,   1.,   1.],
        [102.,   1.,   1.]], device='cuda:0')
tensor([0.0478, 0.0470, 0.0450, 0.0417, 0.0477], device='cuda:0')
Val Acc: 32.0000 % | Val KendallTau: 0.1492


Epochs 1    : 100%|██████████| 3/3 [00:00<00:00, 50.48it/s, loss=1.11e+4]
Epochs 2    : 100%|██████████| 3/3 [00:00<00:00, 46.86it/s, loss=1.11e+4]
Epochs 3    : 100%|██████████| 3/3 [00:00<00:00, 49.01it/s, loss=1.11e+4]
Epochs 4    : 100%|██████████| 3/3 [00:00<00:00, 45.84it/s, loss=1.11e+4]
Epochs 5    : 100%|██████████| 3/3 [00:00<00:00, 48.39it/s, loss=1.11e+4]
Epochs 6    : 100%|██████████| 3/3 [00:00<00:00, 42.79it/s, loss=1.11e+4]
Epochs 7    : 100%|██████████| 3/3 [00:00<00:00, 51.31it/s, loss=1.11e+4]
Epochs 8    : 100%|██████████| 3/3 [00:00<00:00, 52.05it/s, loss=1.11e+4]
Epochs 9    : 100%|██████████| 3/3 [00:00<00:00, 52.14it/s, loss=1.11e+4]
Epochs 10   : 100%|██████████| 3/3 [00:00<00:00, 46.95it/s, loss=1.11e+4]
Epochs 11   : 100%|██████████| 3/3 [00:00<00:00, 45.44it/s, loss=1.11e+4]
Epochs 12   : 100%|██████████| 3/3 [00:00<00:00, 47.10it/s, loss=1.11e+4]
Epochs 13   : 100%|██████████| 3/3 [00:00<00:00, 48.67it/s, loss=1.11e+4]
Epochs 14   : 100%|██████████| 3/3 [00

tensor([[239.,   1.,   1.],
        [196.,   1.,   1.],
        [220.,   1.,   1.],
        [ 76.,   1.,   1.],
        [102.,   1.,   1.]], device='cuda:0')
tensor([0.0894, 0.0665, 0.0238, 0.0083, 0.0873], device='cuda:0')
Val Acc: 32.0000 % | Val KendallTau: 0.1250


Epochs 51   : 100%|██████████| 3/3 [00:00<00:00, 51.18it/s, loss=1.11e+4]
Epochs 52   : 100%|██████████| 3/3 [00:00<00:00, 50.07it/s, loss=1.11e+4]
Epochs 53   : 100%|██████████| 3/3 [00:00<00:00, 48.04it/s, loss=1.11e+4]
Epochs 54   : 100%|██████████| 3/3 [00:00<00:00, 43.48it/s, loss=1.11e+4]
Epochs 55   : 100%|██████████| 3/3 [00:00<00:00, 50.86it/s, loss=1.11e+4]
Epochs 56   : 100%|██████████| 3/3 [00:00<00:00, 47.04it/s, loss=1.11e+4]
Epochs 57   : 100%|██████████| 3/3 [00:00<00:00, 43.32it/s, loss=1.11e+4]
Epochs 58   : 100%|██████████| 3/3 [00:00<00:00, 47.92it/s, loss=1.11e+4]
Epochs 59   : 100%|██████████| 3/3 [00:00<00:00, 51.14it/s, loss=1.11e+4]
Epochs 60   : 100%|██████████| 3/3 [00:00<00:00, 50.26it/s, loss=1.11e+4]
Epochs 61   : 100%|██████████| 3/3 [00:00<00:00, 45.43it/s, loss=1.11e+4]
Epochs 62   : 100%|██████████| 3/3 [00:00<00:00, 43.17it/s, loss=1.11e+4]
Epochs 63   : 100%|██████████| 3/3 [00:00<00:00, 50.94it/s, loss=1.11e+4]
Epochs 64   : 100%|██████████| 3/3 [00

tensor([[239.,   1.,   1.],
        [196.,   1.,   1.],
        [220.,   1.,   1.],
        [ 76.,   1.,   1.],
        [102.,   1.,   1.]], device='cuda:0')
tensor([0.1346, 0.1594, 0.0733, 0.0183, 0.1245], device='cuda:0')
Val Acc: 34.0000 % | Val KendallTau: 0.1452


Epochs 101  : 100%|██████████| 3/3 [00:00<00:00, 41.33it/s, loss=1.11e+4]
Epochs 102  : 100%|██████████| 3/3 [00:00<00:00, 47.87it/s, loss=1.11e+4]
Epochs 103  : 100%|██████████| 3/3 [00:00<00:00, 51.87it/s, loss=1.11e+4]
Epochs 104  : 100%|██████████| 3/3 [00:00<00:00, 45.08it/s, loss=1.11e+4]
Epochs 105  : 100%|██████████| 3/3 [00:00<00:00, 45.90it/s, loss=1.11e+4]
Epochs 106  : 100%|██████████| 3/3 [00:00<00:00, 48.33it/s, loss=1.11e+4]
Epochs 107  : 100%|██████████| 3/3 [00:00<00:00, 47.35it/s, loss=1.11e+4]
Epochs 108  : 100%|██████████| 3/3 [00:00<00:00, 41.61it/s, loss=1.11e+4]
Epochs 109  : 100%|██████████| 3/3 [00:00<00:00, 50.37it/s, loss=1.11e+4]
Epochs 110  : 100%|██████████| 3/3 [00:00<00:00, 47.39it/s, loss=1.11e+4]
Epochs 111  : 100%|██████████| 3/3 [00:00<00:00, 49.74it/s, loss=1.11e+4]
Epochs 112  : 100%|██████████| 3/3 [00:00<00:00, 48.44it/s, loss=1.11e+4]
Epochs 113  : 100%|██████████| 3/3 [00:00<00:00, 47.24it/s, loss=1.11e+4]
Epochs 114  : 100%|██████████| 3/3 [00

tensor([[239.,   1.,   1.],
        [196.,   1.,   1.],
        [220.,   1.,   1.],
        [ 76.,   1.,   1.],
        [102.,   1.,   1.]], device='cuda:0')
tensor([0.1632, 0.1857, 0.0846, 0.0178, 0.1491], device='cuda:0')
Val Acc: 30.0000 % | Val KendallTau: 0.1475


Epochs 151  : 100%|██████████| 3/3 [00:00<00:00, 49.09it/s, loss=1.11e+4]
Epochs 152  : 100%|██████████| 3/3 [00:00<00:00, 50.36it/s, loss=1.11e+4]
Epochs 153  : 100%|██████████| 3/3 [00:00<00:00, 49.80it/s, loss=1.11e+4]
Epochs 154  : 100%|██████████| 3/3 [00:00<00:00, 50.94it/s, loss=1.11e+4]
Epochs 155  : 100%|██████████| 3/3 [00:00<00:00, 53.67it/s, loss=1.11e+4]
Epochs 156  : 100%|██████████| 3/3 [00:00<00:00, 52.21it/s, loss=1.11e+4]
Epochs 157  : 100%|██████████| 3/3 [00:00<00:00, 49.15it/s, loss=1.11e+4]
Epochs 158  : 100%|██████████| 3/3 [00:00<00:00, 51.43it/s, loss=1.11e+4]
Epochs 159  : 100%|██████████| 3/3 [00:00<00:00, 50.30it/s, loss=1.11e+4]
Epochs 160  : 100%|██████████| 3/3 [00:00<00:00, 52.91it/s, loss=1.11e+4]
Epochs 161  : 100%|██████████| 3/3 [00:00<00:00, 49.15it/s, loss=1.11e+4]
Epochs 162  : 100%|██████████| 3/3 [00:00<00:00, 50.44it/s, loss=1.11e+4]
Epochs 163  : 100%|██████████| 3/3 [00:00<00:00, 49.42it/s, loss=1.11e+4]
Epochs 164  : 100%|██████████| 3/3 [00

tensor([[239.,   1.,   1.],
        [196.,   1.,   1.],
        [220.,   1.,   1.],
        [ 76.,   1.,   1.],
        [102.,   1.,   1.]], device='cuda:0')
tensor([0.1761, 0.1950, 0.0884, 0.0153, 0.1628], device='cuda:0')
Val Acc: 30.0000 % | Val KendallTau: 0.1443


In [16]:
# Read file
import urllib.request  

class readFile():
  def __init__(self,file):
    if file == 'y':
      url1 = 'https://raw.githubusercontent.com/emschenn/mlg_hw1/master/hw1_data/youtube/com-youtube.txt' 
      url2 = 'https://raw.githubusercontent.com/emschenn/mlg_hw1/master/hw1_data/youtube/com-youtube_score.txt' 
    else:
      url1 = 'https://raw.githubusercontent.com/emschenn/mlg_hw1/master/hw1_data/Synthetic/5000/' + file + '.txt'
      url2 = 'https://raw.githubusercontent.com/emschenn/mlg_hw1/master/hw1_data/Synthetic/5000/' + file + '_score.txt'
    self.bc_value,s_list,t_list,self.deg_list,n = [],[],[],[],0
    for line in urllib.request.urlopen(url2):
      _,v = line.decode('utf-8').split()
      self.bc_value.append([n,float(v)])
      n += 1
    for x in range(len(self.bc_value)):
      self.deg_list.append([0,1,1])
    for line in urllib.request.urlopen(url1):
      s,t = line.decode('utf-8').split()
      s,t = int(s),int(t)
      s_list.append(s)
      t_list.append(t)
      self.deg_list[s][0]+=1
      self.deg_list[t][0]+=1
    # self.edge_index=[s_list+t_list,t_list+s_list]
    self.edge_index=[s_list,t_list]

  def get_deg_list(self):
    # print(self.deg_list)
    return torch.Tensor(self.deg_list).cuda()

  def get_edge_index(self):
    # print(self.edge_index)
    return torch.tensor(self.edge_index,dtype=torch.long).cuda()

  def get_bc_value(self):
    # print(self.bc_value)
    return self.bc_value


In [21]:
# Evaluation
f = readFile('0')
model = model
t = f.get_deg_list()
t1 = f.get_edge_index()
print(t.shape, t1.shape)
with torch.no_grad():
  outs = model(t,t1)
  print('val_X: ', t[:5])
  print('pred_y: ', outs[:5])

# Top-N % accuracy
def takeSecond(elem):
    return elem[1]

def topN_accuracy(file,outs,n):
  predict_value,bc_value = [],[]
  for i,j in enumerate(outs.tolist()):
    predict_value.append([i,j])
  bc_value = f.get_bc_value()
  print('val_y: ', bc_value[:5])
  bc_value.sort(key = takeSecond,reverse = True)
  predict_value.sort(key = takeSecond,reverse = True)
  p,t = [],[]
  for x in range(int(len(predict_value)*n/100)):
    p.append(predict_value[x][0])
    t.append(bc_value[x][0])
  # print(t)
  # print(p)
  return(len(set(t)&set(p)) / len(p))

print(topN_accuracy(f,outs,n=1))
print(topN_accuracy(f,outs,n=5))
print(topN_accuracy(f,outs,n=10))

# Kendall tau
import scipy.stats as stats
def kendall_tau(file,outs):
  predict_value,bc_value = [],[]
  for i,j in enumerate(outs.tolist()):
    predict_value.append(j)
  for i in file.get_bc_value():
    bc_value.append(i[1])
  # print(predict_value)
  # print(bc_value)
  tau, _ = stats.kendalltau(predict_value, bc_value)
  return(tau)

print(kendall_tau(f,outs))

torch.Size([5000, 3]) torch.Size([2, 19982])
val_X:  tensor([[239.,   1.,   1.],
        [178.,   1.,   1.],
        [149.,   1.,   1.],
        [ 90.,   1.,   1.],
        [196.,   1.,   1.]], device='cuda:0')
pred_y:  tensor([0.1761, 0.1734, 0.1704, 0.1547, 0.1854], device='cuda:0')
val_y:  [[0, 0.09417453090592563], [1, 0.05397079661985897], [2, 0.04434365787783783], [3, 0.022325672571532364], [4, 0.0764376504965615]]
0.98
val_y:  [[0, 0.09417453090592563], [5, 0.092789552991686], [4, 0.0764376504965615], [1, 0.05397079661985897], [6, 0.05002370607942536]]
0.888
val_y:  [[0, 0.09417453090592563], [5, 0.092789552991686], [4, 0.0764376504965615], [1, 0.05397079661985897], [6, 0.05002370607942536]]
0.852
0.5481042554863033


In [18]:
# x[499:505]

In [19]:
# g = _[2]
# g.degree(list(range(99, 105)))

## To-Do List
* (done) loss_fn 再加上 sigmoid
* (done) pairwise 目前跨圖了
* (done) h 要 normalized
* (done) aggregate 改成 MessagePassing
* (done) synthetic graph 後，shuffle graph 的順序
* (done) 加入 Epochs
* Metric: top1, 5, 10
* Metric: kendall tau distance
* wall-clock running time
* test step
* (done) change to leaky relu -> back to relu