In [1]:
import os
import os.path as osp

import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.metrics import roc_auc_score

import torch
from torch_geometric.data import InMemoryDataset
from torch_geometric.data import DataLoader
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
#import torch_geometric.nn as pyg_nn
import torch_geometric.utils as pyg_utils

  return torch._C._cuda_getDeviceCount() > 0


In [2]:
from dataset import Dec

In [3]:
dataset = Dec()

Processing...
Finished with creation of dictionaries
Finished creating features
Done!


In [4]:
data = dataset[0]

In [5]:
data.train_pos_edge_index = data.edge_index
data.val_pos_edge_index = data.edge_index
data.test_pos_edge_index = data.edge_index

In [6]:
data

Data(edge_index=[2, 43953], edge_type=[43953], test_pos_edge_index=[2, 43953], train_pos_edge_index=[2, 43953], val_pos_edge_index=[2, 43953], x=[7736, 10184])

In [34]:
data.x[:,380]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 

In [13]:
def neg_sampling(edge_index):
    struc_neg_sampl = pyg_utils.structured_negative_sampling(edge_index)
    i,j,k = struc_neg_sampl
    i = i.tolist()
    k = k.tolist()
    neg_edge_index = [i,k]
    neg_edge_index = torch.tensor(neg_edge_index)
    return neg_edge_index

In [14]:
def get_link_labels(pos_edge_index, neg_edge_index):
    E = pos_edge_index.size(1) + neg_edge_index.size(1)
    link_labels = torch.zeros(E, dtype=torch.float)
    link_labels[:pos_edge_index.size(1)] = 1.
    return link_labels

In [15]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 64)
        self.conv2 = GCNConv(64, 32)
        self.R = torch.empty(32, 32)
        self.D = torch.empty(32, 32)
        nn.init.xavier_uniform_(self.R, gain=nn.init.calculate_gain('relu'))
        nn.init.xavier_uniform_(self.D, gain=nn.init.calculate_gain('relu'))
        
    def encode(self):
        x = self.conv1(data.x, data.train_pos_edge_index)
        x = x.relu()
        x = self.conv2(x, data.train_pos_edge_index)
        return x 

    def decode(self, z, pos_edge_index, neg_edge_index):
        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)
        return logits
    
    def decode_decagon(self, z, pos_edge_index, neg_edge_index):
        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
        logits = z[pos_edge_index[0]]@self.D@self.R@self.D@z[pos_edge_index[1]].t()
        return logits
    
    def decode_all(self, z):
        prob_adj = z @ z.t()
        return (prob_adj > 0).nonzero(as_tuple=False).t()

In [16]:
def train():
    model.train()
    
    pos_edge_index = data.train_pos_edge_index
    neg_edge_index  = neg_sampling(data.train_pos_edge_index)
    optimizer.zero_grad()
    z = model.encode()
    link_logits = model.decode(z, pos_edge_index, neg_edge_index)
    link_labels = get_link_labels(pos_edge_index, neg_edge_index)
    loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
    loss.backward()
    optimizer.step()

    return loss

In [17]:
@torch.no_grad()
def test():
    model.eval()
    perfs = []

    pos_edge_index = data.val_pos_edge_index
    neg_edge_index = neg_sampling(data.val_pos_edge_index)

    z = model.encode()
    link_logits = model.decode(z, pos_edge_index, neg_edge_index)
    link_probs = link_logits.sigmoid()
    link_labels = get_link_labels(pos_edge_index, neg_edge_index)
    perfs.append(roc_auc_score(link_labels.cpu(), link_probs.cpu()))
    return perfs

In [18]:
model = Net()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

In [19]:
best_val_perf = test_perf = 0
for epoch in range(1, 10):
   
    train_loss = train()
    val_perf = test()
    log = 'Epoch: {:03d}, Loss: {:.4f}, Val: {:.4f}'
    print(log.format(epoch, train_loss, val_perf[0]))


Epoch: 001, Loss: 4.8870, Val: 0.4592
Epoch: 002, Loss: 124.0708, Val: 0.1806
Epoch: 003, Loss: 15.2232, Val: 0.1998
Epoch: 004, Loss: 7.3334, Val: 0.1965
Epoch: 005, Loss: 2.3630, Val: 0.8494
Epoch: 006, Loss: 0.7082, Val: 0.8357
Epoch: 007, Loss: 0.6844, Val: 0.8357
Epoch: 008, Loss: 0.6493, Val: 0.8372
Epoch: 009, Loss: 0.5664, Val: 0.8376


In [20]:
for epoch in range(10, 30):
   
    train_loss = train()
    val_perf = test()
    log = 'Epoch: {:03d}, Loss: {:.4f}, Val: {:.4f}'
    print(log.format(epoch, train_loss, val_perf[0]))


Epoch: 010, Loss: 0.4921, Val: 0.8384
Epoch: 011, Loss: 0.4854, Val: 0.8377
Epoch: 012, Loss: 0.5158, Val: 0.8387
Epoch: 013, Loss: 0.5323, Val: 0.8379
Epoch: 014, Loss: 0.5464, Val: 0.8381
Epoch: 015, Loss: 0.5522, Val: 0.8382
Epoch: 016, Loss: 0.5374, Val: 0.8382
Epoch: 017, Loss: 0.5207, Val: 0.8384
Epoch: 018, Loss: 0.5022, Val: 0.8386
Epoch: 019, Loss: 0.4871, Val: 0.8380
Epoch: 020, Loss: 0.4825, Val: 0.8387
Epoch: 021, Loss: 0.4922, Val: 0.9379
Epoch: 022, Loss: 0.5074, Val: 0.9513
Epoch: 023, Loss: 0.5115, Val: 0.9564
Epoch: 024, Loss: 0.5022, Val: 0.9605
Epoch: 025, Loss: 0.4898, Val: 0.9605
Epoch: 026, Loss: 0.4801, Val: 0.9628
Epoch: 027, Loss: 0.4807, Val: 0.9635
Epoch: 028, Loss: 0.4869, Val: 0.9633
Epoch: 029, Loss: 0.4869, Val: 0.9640


In [32]:
torch.set_printoptions(edgeitems=633)
z = model.encode()
print(z[:,22])


tensor([ 0.0137,  0.0320,  0.0415,  0.0157,  0.0193,  0.0637,  0.0996,  0.0450,
         0.0453,  0.0759,  0.0991,  0.0159,  0.0595,  0.0236,  0.0296,  0.1058,
         0.0614,  0.1262,  0.0426,  0.0398,  0.0866,  0.0457,  0.0494,  0.0525,
         0.0255,  0.0566,  0.0291,  0.0120,  0.0369,  0.0225,  0.0867,  0.0799,
         0.1061,  0.0739,  0.0440,  0.0278,  0.0954,  0.0448,  0.1055,  0.0296,
         0.0428,  0.0197,  0.0227,  0.0250,  0.1018,  0.0745,  0.0570,  0.0285,
         0.0484,  0.0535,  0.0328,  0.0882,  0.0413,  0.0473,  0.0531,  0.0680,
         0.0536,  0.0436,  0.0332,  0.1228,  0.0303,  0.0489,  0.0394,  0.0268,
         0.0229,  0.0283,  0.1316,  0.1226,  0.0391,  0.0971,  0.0507,  0.1098,
         0.0452,  0.0351,  0.1518,  0.0313,  0.0354,  0.0686,  0.0687,  0.0638,
         0.0541,  0.0953,  0.0640,  0.0946,  0.1093,  0.0337,  0.0935,  0.0480,
         0.0124,  0.0419,  0.0321,  0.0269,  0.0439,  0.1927,  0.0441,  0.0181,
         0.0299,  0.0448,  0.1218,  0.06