<a href="https://colab.research.google.com/github/emschenn/mlg_hw1/blob/master/DrBC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# prepare model
import torch
import torch.nn as nn
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree

class GCNConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(GCNConv, self).__init__(aggr='add')  # "Add" aggregation.
        self.lin = torch.nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index):
        # x has shape [N, in_channels]
        # edge_index has shape [2, E]

        # Step 1: Add self-loops to the adjacency matrix.
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
        # Step 2: Linearly transform node feature matrix.
        x = self.lin(x)
        # Step 3: Compute normalization
        row, col = edge_index
        deg = degree(row, x.size(0), dtype=x.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
        # Step 4-6: Start propagating messages.
        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x, norm=norm)

    def message(self, x_j, norm):
        # x_j has shape [E, out_channels]
        # Step 4: Normalize node features.
        return norm.view(-1, 1) * x_j

    def update(self, aggr_out):
        # aggr_out has shape [N, out_channels]
        # Step 6: Return new node embeddings.
        return aggr_out

class DrBC(nn.Module):
    def __init__(self,):
        super(DrBC, self).__init__()
        # Encoder
        self.fc1 = nn.Linear(3, 128)
        self.relu = nn.LeakyReLU()

        self.gcn1 = GCNConv(128, 128)
        self.gru1 = nn.GRU(128, 128)

        self.gcn2 = GCNConv(128, 128)
        self.gru2 = nn.GRU(128, 128)

        self.gcn3 = GCNConv(128, 128)
        self.gru3 = nn.GRU(128, 128)

        self.gcn4 = GCNConv(128, 128)
        self.gru4 = nn.GRU(128, 128)

        self.gcn5 = GCNConv(128, 128)
        self.gru5 = nn.GRU(128, 128)

        # Decoder
        self.fc2 = nn.Linear(128, 64)
        self.relu2 = nn.LeakyReLU()
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x, edge_index):
        x = self.fc1(x)
        x = self.relu(x)        
        x_n = self.gcn1(x, edge_index)
        x1, _ = self.gru1(x_n.view(1, *x_n.shape), x.view(1, *x.shape))
        x_n = self.gcn2(x1[0], edge_index)
        x2, _ = self.gru2(x_n.view(1, *x_n.shape), x1)
        x_n = self.gcn3(x2[0], edge_index)
        x3, _ = self.gru3(x_n.view(1, *x_n.shape), x2)
        x_n = self.gcn4(x3[0], edge_index)
        x4, _ = self.gru4(x_n.view(1, *x_n.shape), x3)
        x_n = self.gcn5(x4[0], edge_index)
        x5, _ = self.gru5(x_n.view(1, *x_n.shape), x4)
        
        # max
        l = [x1[0],x2[0],x3[0],x4[0],x5[0]]
        l = torch.stack(l)
        x = torch.max(l, dim=0).values
        # l = torch.tensor(l) 

        # decoder
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

model = DrBC()
x = torch.tensor([[2, 1, 1],[2, 1, 1],[2, 1, 1],[2, 1, 1]],dtype=torch.float)
    
edge_index = torch.tensor([[0,1,2],
                            [1,2,3]],dtype=torch.long)
print(model(x, edge_index))

tensor([[0.0353],
        [0.0383],
        [0.0375],
        [0.0450]], grad_fn=<AddmmBackward0>)


In [2]:
# prepare data
import numpy as np
import networkx as nx
import random 
import math

class Graph():
  def __init__(self,batch_size):
    self.graph_list=[]
    for x in range(batch_size):
      g = nx.powerlaw_cluster_graph(n=random.randint(150,200) , m=4, p=0.05, seed=None)
      self.graph_list.append(g)

  def get_deg_list(self):
    deg_list = []
    for g in self.graph_list:
      for x in range(g.number_of_nodes()):
        deg_list.append([g.degree[x],1,1])
    return torch.Tensor(deg_list).cuda()

  def get_edge_index(self):
    s_list,t_list,en = [],[],0
    for g in self.graph_list:
      for e in g.edges():
        s,t = e
        s_list.append(s+en)
        t_list.append(t+en)
      en += g.number_of_nodes()
    edge_index=[s_list+t_list,t_list+s_list]
    return torch.tensor(edge_index,dtype=torch.long).cuda()

  def get_bc_value(self):
    bc_value = []
    for g in self.graph_list:
      bc_value += list(nx.betweenness_centrality(g).values())
    for i, x in enumerate(bc_value):
      bc_value[i] = math.log(x+1e-8)
    return torch.Tensor(bc_value).cuda()

  def get_pair_index(self):
    n_list1,n_list2,pair_index,en = [],[],[],0
    for g in self.graph_list:
      for x in range(g.number_of_nodes()):
        n_list1 += [en+x,en+x,en+x,en+x,en+x]
        n_list2 += [en+x,en+x,en+x,en+x,en+x]
      random.shuffle(n_list1)
      random.shuffle(n_list2)
      for i,j in zip(n_list1,n_list2):
        pair_index.append([i,j])
      n_list1,n_list2=[],[]
    return torch.tensor(pair_index, dtype=torch.long).cuda()

"""
g = Graph(16)
print(g.get_deg_list().shape)
print(g.get_edge_index().shape)
print(g.get_bc_value().shape)
print(g.get_pair_index())
"""

'\ng = Graph(16)\nprint(g.get_deg_list().shape)\nprint(g.get_edge_index().shape)\nprint(g.get_bc_value().shape)\nprint(g.get_pair_index())\n'

In [3]:
# start training
from torch.optim import Adam
import torch.nn.functional as F
iteration = 501

def train():
  model = DrBC()
  model = model.cuda()
  optimizer = Adam(params=model.parameters(), lr=0.0001)
  g = Graph(16)
  for iter in range(iteration):
    if iter % 500 == 0:
      g = Graph(16)
      bc = g.get_bc_value()
    outs = model(g.get_deg_list(),g.get_edge_index())
    pair = g.get_pair_index()
    pred = outs[pair[:, 0]] - outs[pair[:, 1]]
    gt = torch.sigmoid((bc[pair[:, 0]] - bc[pair[:, 1]]))
    gt = gt.view(-1, 1)
    loss = F.binary_cross_entropy_with_logits(pred, gt, reduction="sum")
    if iter % 1000 == 0:
      print(outs[:10])
      print(bc[:10])
      print("[{}/{}] Loss:{:.4f}".format(iter, iteration, loss.item()))
    loss.backward()
    optimizer.step()
  torch.save(model.state_dict(), "./weight.pth")
  return model

model = train()

tensor([[ 0.7910],
        [-0.1227],
        [ 0.8191],
        [ 0.5104],
        [ 1.2235],
        [ 0.5936],
        [ 0.2875],
        [ 0.1840],
        [ 0.1133],
        [ 0.8620]], device='cuda:0', grad_fn=<SliceBackward0>)
tensor([ -2.3077, -18.4207,  -2.1099,  -2.6662,  -1.6539,  -2.5630,  -3.2885,
         -4.0480,  -4.4157,  -2.0790], device='cuda:0')
[0/501] Loss:8769.5449


In [4]:
# Read file
import urllib.request  

class readFile():
  def __init__(self,file):
    if file == 'y':
      url1 = 'https://raw.githubusercontent.com/emschenn/mlg_hw1/master/hw1_data/youtube/com-youtube.txt' 
      url2 = 'https://raw.githubusercontent.com/emschenn/mlg_hw1/master/hw1_data/youtube/com-youtube_score.txt' 
    else:
      url1 = 'https://raw.githubusercontent.com/emschenn/mlg_hw1/master/hw1_data/Synthetic/5000/' + file + '.txt'
      url2 = 'https://raw.githubusercontent.com/emschenn/mlg_hw1/master/hw1_data/Synthetic/5000/' + file + '_score.txt'
    self.bc_value,s_list,t_list,self.deg_list,n = [],[],[],[],0
    for line in urllib.request.urlopen(url2):
      _,v = line.decode('utf-8').split()
      self.bc_value.append([n,math.log(float(v)+1e-8)])
      n += 1
    for x in range(len(self.bc_value)):
      self.deg_list.append([0,1,1])
    for line in urllib.request.urlopen(url1):
      s,t = line.decode('utf-8').split()
      s,t = int(s),int(t)
      s_list.append(s)
      t_list.append(t)
      self.deg_list[s][0]+=1
      self.deg_list[t][0]+=1
    self.edge_index=[s_list+t_list,t_list+s_list]

  def get_deg_list(self):
    # print(self.deg_list)
    return torch.Tensor(self.deg_list).cuda()

  def get_edge_index(self):
    # print(self.edge_index)
    return torch.tensor(self.edge_index,dtype=torch.long).cuda()

  def get_bc_value(self):
    # print(self.bc_value)
    return self.bc_value.copy()


In [5]:
# Evaluation
f = readFile('y')
model = model.cpu()
t = f.get_deg_list().cpu()
t1 = f.get_edge_index().cpu()
with torch.no_grad():
  outs = model(t,t1)

# Top-N % accuracy
def takeSecond(elem):
    return elem[1]

def topN_accuracy(file,outs,n):
  predict_value,bc_value = [],[]
  for i,j in enumerate(outs.tolist()):
    predict_value.append([i,*j])
  bc_value = file.get_bc_value()
  bc_value.sort(key = takeSecond,reverse = True)
  predict_value.sort(key = takeSecond,reverse = True)
  p,t = [],[]
  for x in range(int(len(predict_value)*n/100)):
    p.append(predict_value[x][0])
    t.append(bc_value[x][0])
  # print(t)
  # print(p)
  return(len(set(t)&set(p)) / len(p))

print(topN_accuracy(f,outs,n=1))
print(topN_accuracy(f,outs,n=5))
print(topN_accuracy(f,outs,n=10))

# Kendall tau
import scipy.stats as stats
def kendall_tau(file,outs):
  predict_value,bc_value = [],[]
  for i,j in enumerate(outs.tolist()):
    predict_value.append(*j)
  for i in file.get_bc_value():
    bc_value.append(i[1])
  # print(predict_value)
  # print(bc_value)
  tau, _ = stats.kendalltau(predict_value, bc_value)
  return(tau)

print(kendall_tau(f,outs))

0.6049524145223828
0.6265332017482025
0.6428728775476037
0.6153405313188235


  (2 * xtie * ytie) / m + x0 * y0 / (9 * m * (size - 2)))
