In [1]:
import torch
import warnings
import torch_geometric.nn
import numpy as np

import networkx as nx
import torch.nn.functional as F

from torch_geometric.nn import GCNConv
from numpy import genfromtxt

from torch_geometric.data import Data
from sklearn.metrics import roc_auc_score
from torch_geometric.utils import negative_sampling, train_test_split_edges
import ipywidgets as widgets

widgets.IntSlider()

IntSlider(value=0)

In [2]:
warnings.simplefilter(action='ignore', category=FutureWarning)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# Seed
torch.manual_seed(42)

<torch._C.Generator at 0x1066d1350>

### Load the graph and tranmit to torch geom Data object

In [4]:
node_features = genfromtxt('node_feat.txt', delimiter=' ')
edge_list = genfromtxt('train_edges.txt', delimiter=' ')

x = torch.as_tensor(node_features, dtype=torch.float32)
edge_index = torch.tensor(edge_list, dtype=torch.long)

graph = Data(x=x, edge_index=edge_index.t().contiguous(), num_classes=2)

In [5]:
x = graph.x.to(device)

transform = torch_geometric.transforms.RandomLinkSplit(
    num_val=0.0,
    num_test=0.0,
    is_undirected=False, 
    split_labels=True,
    key='connected',
    add_negative_train_samples=True,
)

data, _, _ = transform(graph)

In [6]:
data

Data(x=[12588, 32], edge_index=[2, 14322], num_classes=2, pos_connected=[14322], pos_connected_index=[2, 14322], neg_connected=[14322], neg_connected_index=[2, 14322])

### Helper functions

In [7]:
def get_link_labels(pos_edge_index, neg_edge_index):
    num_links = pos_edge_index.size(1) + neg_edge_index.size(1)
    link_labels = torch.zeros(num_links, dtype=torch.float)
    link_labels[:pos_edge_index.size(1)] = 1.
    return link_labels

In [8]:
def train():
    model.train()
    optimizer.zero_grad()

    neg_edge_index = negative_sampling(
        edge_index=data.pos_connected_index,
        num_nodes=data.num_nodes,
        num_neg_samples=data.pos_connected.shape[0])
    z = model.encode(data.x, data.pos_connected_index)
    link_logits = model.decode(z, data.pos_connected_index, neg_edge_index)
    link_labels = get_link_labels(data.pos_connected_index, neg_edge_index).to(data.x.device)
    loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)

    loss.backward()
    optimizer.step()
    return float(loss)

In [9]:
@torch.no_grad()
def test(data):
    model.eval()
    z = model.encode(data.x, data.pos_connected_index)
    link_logits = model.decode(z, data.pos_connected_index, data.neg_connected_index)
    link_labels = get_link_labels(data.pos_connected_index, data.neg_connected_index)
    return roc_auc_score(link_labels.cpu(), link_logits.cpu())

### Encoder

Added $tanh()$ to the dot product for the decoder.

In [10]:
class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(Net, self).__init__()
        self.conv_1 = GCNConv(in_channels, hidden_channels, cached=True)
        self.conv_2 = GCNConv(hidden_channels, out_channels, cached=True)
    
    def encode(self, x, edge_index):
        x = self.conv_1(x, edge_index)
        
        ####### Use Randomized LeakyRELU ###########
        x = F.rrelu(x)
        ####### Use Randomized LeakyRELU ###########

        x = self.conv_2(x, edge_index)
        return x

    def decode(self, z, pos_edge_index, neg_edge_index):
        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
        
        ####### Apply tanh after dot product ########  
        h = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1).tanh()
        return h

In [11]:
# Hyperparametes
IN_CHANNELS = 32
HIDDEN_CHANNELS = 512
OUT_CHANNELS = 512
LEARNING_RATE = 0.001

In [12]:
model = Net(IN_CHANNELS, HIDDEN_CHANNELS, OUT_CHANNELS).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [13]:
EPOCH = 2000
for epoch in range(1, EPOCH + 1):
    loss = train()
    auc = test(data)
    if epoch % 100 == 0:
        print(f'Epoch: {epoch:03d}, ROC-AUC: {auc:.4f}, Loss: {loss:.4f}')

Epoch: 100, ROC-AUC: 0.9331, Loss: 0.5004
Epoch: 200, ROC-AUC: 0.9288, Loss: 0.4933
Epoch: 300, ROC-AUC: 0.9230, Loss: 0.4834
Epoch: 400, ROC-AUC: 0.9204, Loss: 0.4739
Epoch: 500, ROC-AUC: 0.9072, Loss: 0.4739
Epoch: 600, ROC-AUC: 0.9164, Loss: 0.4641
Epoch: 700, ROC-AUC: 0.9133, Loss: 0.4572
Epoch: 800, ROC-AUC: 0.9125, Loss: 0.4573
Epoch: 900, ROC-AUC: 0.9114, Loss: 0.4545
Epoch: 1000, ROC-AUC: 0.9114, Loss: 0.4524
Epoch: 1100, ROC-AUC: 0.9100, Loss: 0.4477
Epoch: 1200, ROC-AUC: 0.9093, Loss: 0.4506
Epoch: 1300, ROC-AUC: 0.9091, Loss: 0.4471
Epoch: 1400, ROC-AUC: 0.9090, Loss: 0.4479
Epoch: 1500, ROC-AUC: 0.9078, Loss: 0.4423
Epoch: 1600, ROC-AUC: 0.9049, Loss: 0.4501
Epoch: 1700, ROC-AUC: 0.9071, Loss: 0.4438
Epoch: 1800, ROC-AUC: 0.9018, Loss: 0.4536
Epoch: 1900, ROC-AUC: 0.9067, Loss: 0.4391
Epoch: 2000, ROC-AUC: 0.9062, Loss: 0.4402


### Predicting links from the unlabeled edges file

In [18]:
Z = model.encode(data.x, data.pos_connected_index)

In [19]:
A = Z @ Z.t()

In [22]:
def output_link_preds(A, path, out):
    link_labels = list()
    with open(path) as f:
        for line in f.readlines():
            nodes_str = line.strip("\n").split(" ")
            nodes = int(nodes_str[0]), int(nodes_str[1])
            label = A[nodes] 
            if label > 0:
                link_labels.append(1)
            else:
                link_labels.append(0)

    np.savetxt(out, link_labels, fmt='%d')

In [23]:
output_link_preds(A, 'unlabeled_edges.txt', 'preds_submit.txt')