In [1]:
import math
import numpy as np
import torch
import torch_geometric
import networkx as nx
import matplotlib.pyplot as plt
import time
from tqdm.notebook import tqdm


In [2]:
from utils import read_test1_data
from utils import gen_graph
from utils import prepare_synthetic
from utils import preprocessing_data
from utils import get_pairwise_ids

In [3]:
RANDOM_STATE = 11
SYNTHETIC_NUM = 1000

# number of gen nodes
NUM_MIN = 100
NUM_MAX = 200

MAX_EPOCHS = 10000
LEARNING_RATE = 1e-4
EMBEDDING_SIZE = 128
DEPTH = 5
BATCH_SIZE = 16


## Read Graph

In [4]:
test1_X, test1_bc = read_test1_data(0)

## Generate Synthetic Graph

In [5]:
train_g = gen_graph(NUM_MIN, NUM_MAX)
print(len(train_g.edges()))

584


In [6]:
(np.array(list(train_g.edges())) + 100)[:10]

array([[100, 104],
       [100, 105],
       [100, 107],
       [100, 110],
       [100, 119],
       [100, 122],
       [100, 123],
       [100, 134],
       [100, 171],
       [100, 178]])

In [7]:
list(train_g.neighbors(0))

[4, 5, 7, 10, 19, 22, 23, 34, 71, 78, 99, 132, 133, 138]

In [8]:
# nx.betweenness_centrality(train_g)

## DrBC

In [9]:
import torch
from torch.nn import Module, Linear, Parameter, GRUCell, Sequential, ReLU, functional as t_F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [11]:
class GCNConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(GCNConv, self).__init__(aggr='add')
        self.lin = Linear(in_channels, out_channels, bias=False)
        self.bias = Parameter(torch.Tensor(out_channels))
        self.reset_parameters()

    def reset_parameters(self):
        self.lin.reset_parameters()
        self.bias.data.zero_()
    
    def forward(self, x, edge_index):
        row, col = edge_index
        rc = torch.cat([row, col], axis=0)
        deg = degree(rc, x.shape[0], dtype=x.dtype)
        deg += 1
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        out = self.propagate(edge_index, x=x, norm=norm)
        return out

    def message(self, x_j, norm):
        return norm.view(-1, 1) * x_j

In [12]:
class DrBC(Module):
    def __init__(self, embedding_size=EMBEDDING_SIZE, depth=DEPTH):
        super(DrBC, self).__init__()
        self.embedding_size = embedding_size
        self.depth = depth
        self.linear0 = Linear(3, self.embedding_size)
        self.gcn = GCNConv(self.embedding_size, self.embedding_size)
        self.gru = GRUCell(self.embedding_size, self.embedding_size)
        # decoder
        self.mlp = Sequential(
            Linear(self.embedding_size, self.embedding_size // 2),
            ReLU(),
            Linear(self.embedding_size // 2, 1)
        )
        

    def forward(self, X, edge_index):
        all_h = []
        h = self.linear0(X)
        h = torch.relu(h)
        h = t_F.normalize(h, p=2, dim=-1) # l2-norm
        all_h.append(torch.unsqueeze(h, dim=0))
        # GRUCell
        for i in range(self.depth-1):
            # neighborhood aggregation
            h_aggre = self.gcn(h, edge_index)
            h = self.gru(h_aggre, h)
            h = t_F.normalize(h, p=2, dim=-1) # l2-norm
            all_h.append(torch.unsqueeze(h, dim=0))
        
        # max pooling
        all_h = torch.cat(all_h, dim=0)
        h_max = torch.max(all_h, dim=0).values
        # print('h_max shape: ', h_max.shape)

        # Decoder
        out = self.mlp(h_max)
        out = torch.squeeze(out)
        # print('out shape: ', out.shape)
        return out
        
        
model = DrBC().to(device)
optim = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_fn = torch.nn.BCEWithLogitsLoss()

In [13]:
model.parameters

<bound method Module.parameters of DrBC(
  (linear0): Linear(in_features=3, out_features=128, bias=True)
  (gcn): GCNConv()
  (gru): GRUCell(128, 128)
  (mlp): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)>

In [14]:
pm = list(model.parameters())

for i, p in enumerate(pm):
    print(f"pm{i} shape: {p.shape}")

pm0 shape: torch.Size([128, 3])
pm1 shape: torch.Size([128])
pm2 shape: torch.Size([128])
pm3 shape: torch.Size([128, 128])
pm4 shape: torch.Size([384, 128])
pm5 shape: torch.Size([384, 128])
pm6 shape: torch.Size([384])
pm7 shape: torch.Size([384])
pm8 shape: torch.Size([64, 128])
pm9 shape: torch.Size([64])
pm10 shape: torch.Size([1, 64])
pm11 shape: torch.Size([1])


In [16]:
def train(model, optim, loss_fn, epochs:int):
    g_list, dg_list, bc_list  = prepare_synthetic(SYNTHETIC_NUM, (NUM_MIN, NUM_MAX))
    print('-'*20, 'prepare systhetic done')
    for e in range(epochs):
        batch_cnt = len(g_list) // BATCH_SIZE
        batch_bar = tqdm(range(batch_cnt))
        batch_bar.set_description(f'Epochs {e+1:<5}: ')
        for i in batch_bar:
            # batch
            s_index, e_index = i*BATCH_SIZE, (i+1)*BATCH_SIZE
            train_g, train_dg, train_bc = g_list[s_index: e_index], dg_list[s_index: e_index], bc_list[s_index: e_index]
            X, y, edge_index = preprocessing_data(train_g, train_dg, train_bc)
            X, y, edge_index = X.to(device), y.to(device), edge_index.to(device)
            out = model(X, edge_index)

            # pairwise-loss
            s_ids, t_ids = get_pairwise_ids(train_g)
            out_diff = out[s_ids] - out[t_ids]
            y_diff = y[s_ids] - y[t_ids]
            loss = loss_fn(out_diff, torch.sigmoid(y_diff))

            # optim
            optim.zero_grad()
            loss.backward()
            optim.step()
            batch_bar.set_postfix(loss=loss.item())
            # print(f"Batch {i + 1}: Loss = {loss.item()}")
        
def validate():
    pass

_ = train(model, optim, loss_fn, 1000)

-------------------- prepare systhetic done


  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

## To-Do List
* (done) loss_fn 再加上 sigmoid
* (done) pairwise 目前跨圖了
* (done) h 要 normalized
* (doen) aggregate 改成 MessagePassing
* synthetic graph 後，shuffle graph 的順序
* 加入 Epochs
* Metric: top1, 5, 10
* Metric: kendall tau distance
* wall-clock running time
* test step