In [1]:
import math
import numpy as np
import torch
import torch_geometric
import networkx as nx
import matplotlib.pyplot as plt


In [2]:
from utils import read_test1_data
from utils import gen_graph

In [3]:
RANDOM_STATE = 11

SYNTHETIC_NUM = 3000
# SYNTHETIC_NUM = 100

# number of gen nodes
NUM_MIN = 100
NUM_MAX = 200

LEARNING_RATE = 1e-4
EMBEDDING_SIZE = 128
DEPTH = 5
BATCH_SIZE = 16


## Read Graph

In [4]:
test1_X, test1_bc = read_test1_data(0)

## Generate Synthetic Graph

In [5]:
train_g = gen_graph(NUM_MIN, NUM_MAX)
print(len(train_g.edges()))

416


In [6]:
np.array(list(train_g.edges())) + 100

array([[100, 104],
       [100, 105],
       [100, 107],
       [100, 134],
       [100, 162],
       [100, 168],
       [100, 176],
       [100, 194],
       [100, 200],
       [101, 104],
       [101, 106],
       [101, 109],
       [101, 110],
       [101, 112],
       [101, 118],
       [101, 131],
       [101, 140],
       [101, 152],
       [101, 170],
       [101, 187],
       [101, 193],
       [102, 104],
       [102, 105],
       [102, 106],
       [102, 107],
       [102, 109],
       [102, 111],
       [102, 112],
       [102, 113],
       [102, 114],
       [102, 115],
       [102, 116],
       [102, 117],
       [102, 120],
       [102, 122],
       [102, 123],
       [102, 124],
       [102, 126],
       [102, 127],
       [102, 128],
       [102, 130],
       [102, 131],
       [102, 132],
       [102, 135],
       [102, 140],
       [102, 142],
       [102, 143],
       [102, 144],
       [102, 148],
       [102, 150],
       [102, 154],
       [102, 157],
       [102,

In [7]:
list(train_g.neighbors(0))

[4, 5, 7, 34, 62, 68, 76, 94, 100]

In [8]:
ls = []
for node in list(train_g.nodes())[:5]:
    ls.append(list(train_g.neighbors(node)))


In [9]:
# nx.betweenness_centrality(train_g)

## DrBC

In [10]:
import torch
from torch.nn import Module, Linear, Parameter, GRUCell, Sequential, ReLU, functional as t_F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [12]:
def prepare_synthetic():
    g_list = []
    dg_list = []
    bc_list = []
    for i in range(SYNTHETIC_NUM):
        g = gen_graph(NUM_MIN, NUM_MAX)
        g_list.append(g)
        dg_list.append(list(dict(nx.degree(g)).values()))
        bc_list.append(list(nx.betweenness_centrality(g)))
        
    return g_list, dg_list, bc_list

def preprocessing_data(train_g:list, train_dg:list, train_bc:list):
    X = np.zeros(shape=(0, 3))
    y = np.zeros(shape=(0, ))
    edge_index = np.zeros(shape=(0, 2))
    pre_index = 0
    for i in range(len(train_bc)):
        assert len(train_dg[i]) == len(train_bc[i]) == len(train_g[i].nodes())
        # make suer is has same nodes number.
        num_node = len(train_dg[i])
        _X = np.expand_dims(np.array(train_dg[i]), axis=-1)
        _it = np.ones(shape=(_X.shape[0], 2))
        _X = np.hstack([_X, _it])
        X = np.append(X, _X, axis=0)

        _y = np.array(train_bc[i])
        y = np.append(y, _y, axis=0)

        _edge = np.array(list(train_g[i].edges())) + pre_index
        edge_index = np.append(edge_index, _edge, axis=0)

        pre_index += num_node
    X = torch.Tensor(X)
    y = torch.Tensor(y)
    edge_index = torch.Tensor(edge_index).T.to(torch.int64)
    # print(X.shape, y.shape, edge_index.shape)

    return X, y, edge_index

def get_pairwise_ids(g_list):
    s_ids = np.zeros(shape=(0, ), dtype=int)
    t_ids = np.zeros(shape=(0, ), dtype=int)
    pre_index = 0
    for g in g_list:
        num_node = len(g.nodes())
        ids_1 = np.repeat(np.arange(pre_index, pre_index+num_node), 5)
        ids_2 = np.repeat(np.arange(pre_index, pre_index+num_node), 5)

        np.random.shuffle(ids_1)
        np.random.shuffle(ids_2)

        s_ids = np.append(s_ids, ids_1, axis=0)
        t_ids = np.append(t_ids, ids_2, axis=0)
        pre_index += num_node
    return s_ids, t_ids

In [13]:
class GCNConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(GCNConv, self).__init__(aggr='add')
        self.lin = Linear(in_channels, out_channels, bias=False)
        self.bias = Parameter(torch.Tensor(out_channels))
        self.reset_parameters()

    def reset_parameters(self):
        self.lin.reset_parameters()
        self.bias.data.zero_()
    
    def forward(self, x, edge_index):
        # edge_index, _ = add_self_loops(edge_index, num_nodes=x.shape[0])
        # x = self.lin(x)

        row, col = edge_index
        rc = torch.cat([row, col], axis=0)
        deg = degree(rc, x.shape[0], dtype=x.dtype)
        deg += 1
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        out = self.propagate(edge_index, x=x, norm=norm)
        # out += self.bias
        return out

    def message(self, x_j, norm):
        return norm.view(-1, 1) * x_j



In [14]:
class DrBC(Module):
    def __init__(self, embedding_size=EMBEDDING_SIZE, depth=DEPTH):
        super(DrBC, self).__init__()
        self.embedding_size = embedding_size
        self.depth = depth
        self.linear0 = Linear(3, self.embedding_size)
        self.gcn = GCNConv(self.embedding_size, self.embedding_size)
        self.gru = GRUCell(self.embedding_size, self.embedding_size)
        # decoder
        self.mlp = Sequential(
            Linear(self.embedding_size, self.embedding_size // 2),
            ReLU(),
            Linear(self.embedding_size // 2, 1)
        )
        

    def forward(self, X, edge_index):
        all_h = []
        h = self.linear0(X)
        h = torch.relu(h)
        h = t_F.normalize(h, p=2, dim=-1) # l2-norm
        all_h.append(torch.unsqueeze(h, dim=0))
        # GRUCell
        for i in range(self.depth-1):
            # neighborhood aggregation
            h_aggre = self.gcn(h, edge_index)
            h = self.gru(h_aggre, h)
            h = t_F.normalize(h, p=2, dim=-1) # l2-norm
            all_h.append(torch.unsqueeze(h, dim=0))
        
        # max pooling
        all_h = torch.cat(all_h, dim=0)
        h_max = torch.max(all_h, dim=0).values
        # print('h_max shape: ', h_max.shape)

        # Decoder
        out = self.mlp(h_max)
        out = torch.squeeze(out)
        # print('out shape: ', out.shape)
        return out
        


        
model = DrBC().to(device)
optim = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_fn = torch.nn.BCEWithLogitsLoss()

In [15]:
model.parameters

<bound method Module.parameters of DrBC(
  (linear0): Linear(in_features=3, out_features=128, bias=True)
  (gcn): GCNConv()
  (gru): GRUCell(128, 128)
  (mlp): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)>

In [16]:
pm = list(model.parameters())

pm[0].shape # W0
pm[1].shape # gru
pm[2].shape # 384, 128
pm[3].shape # 384, 128
pm[4].shape # 384
pm[5].shape # 384

for i, p in enumerate(pm):
    print(f"pm{i} shape: {p.shape}")

pm0 shape: torch.Size([128, 3])
pm1 shape: torch.Size([128])
pm2 shape: torch.Size([128])
pm3 shape: torch.Size([128, 128])
pm4 shape: torch.Size([384, 128])
pm5 shape: torch.Size([384, 128])
pm6 shape: torch.Size([384])
pm7 shape: torch.Size([384])
pm8 shape: torch.Size([64, 128])
pm9 shape: torch.Size([64])
pm10 shape: torch.Size([1, 64])
pm11 shape: torch.Size([1])


In [17]:
def train():
    g_list, dg_list, bc_list  = prepare_synthetic()
    print('-'*20, 'prepare systhetic done')
    batch_cnt = len(g_list) // BATCH_SIZE

    for i in range(batch_cnt):
        s_index = i*BATCH_SIZE
        e_index = (i+1)*BATCH_SIZE
        train_g, train_dg, train_bc = g_list[s_index: e_index], dg_list[s_index: e_index], bc_list[s_index: e_index]
        X, y, edge_index = preprocessing_data(train_g, train_dg, train_bc)
        X, y, edge_index = X.to(device), y.to(device), edge_index.to(device)
        out = model(X, edge_index)

        # pairwise-loss
        s_ids, t_ids = get_pairwise_ids(train_g)
        out_diff = out[s_ids] - out[t_ids]
        y_diff = y[s_ids] - y[t_ids]
        loss = loss_fn(out_diff, torch.sigmoid(y_diff))

        # optim
        optim.zero_grad()
        loss.backward()
        optim.step()
        print(f"Batch {i + 1}: Loss = {loss.item()}")
        # print('pm0: ', list(model.parameters())[0])
        
def validate():
    pass

_ = train()

-------------------- prepare systhetic done
Batch 1: Loss = 0.693223237991333
Batch 2: Loss = 0.69290691614151
Batch 3: Loss = 0.6925891041755676
Batch 4: Loss = 0.6922805309295654
Batch 5: Loss = 0.6919754147529602
Batch 6: Loss = 0.6917043924331665
Batch 7: Loss = 0.6914026737213135
Batch 8: Loss = 0.6911582946777344
Batch 9: Loss = 0.6909435391426086
Batch 10: Loss = 0.6906291246414185
Batch 11: Loss = 0.6903756260871887
Batch 12: Loss = 0.6901136040687561
Batch 13: Loss = 0.68986976146698
Batch 14: Loss = 0.6895608305931091
Batch 15: Loss = 0.6893821358680725
Batch 16: Loss = 0.6890992522239685
Batch 17: Loss = 0.6888858079910278
Batch 18: Loss = 0.6886112093925476
Batch 19: Loss = 0.688306450843811
Batch 20: Loss = 0.6880044937133789
Batch 21: Loss = 0.6877583861351013
Batch 22: Loss = 0.6875669956207275
Batch 23: Loss = 0.6873530149459839
Batch 24: Loss = 0.6869648098945618
Batch 25: Loss = 0.6868781447410583
Batch 26: Loss = 0.6865797638893127
Batch 27: Loss = 0.6862043142318726

## To-Do List
* (done) loss_fn 再加上 sigmoid
* (done) pairwise 目前跨圖了
* (done) h 要 normalized
* aggregate 改成 MessagePassing