To install pytorch geometric run the cell below

In [None]:
#!pip install torch=='1.9.0'
#!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.9.0+cu102.html

# IMPORT

In [None]:
import torch
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score,average_precision_score

from torch_geometric.utils import negative_sampling
import torch_geometric.transforms as T
from torch_geometric.utils import train_test_split_edges
from torch_geometric.transforms import RandomLinkSplit,NormalizeFeatures,Constant,OneHotDegree
from torch_geometric.utils import from_networkx
from torch_geometric.nn import GCNConv,SAGEConv,GATConv
from scipy.stats import entropy

import torch
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

import copy
import itertools
import json

# LOAD DATASET

In [None]:
from torch_geometric.data import Data

current_data = Data()
current_data.x = torch.load("../data/gnn/node_feature_matrix.pt")
current_data.edge_index = torch.load("../data/gnn/edge_index.pt")

#NORMALIZATION (L1-Norm)

transform = NormalizeFeatures()
current_data = transform(current_data)

#TRAIN TEST SPLIT + NEGATIVE SAMPLING
transform = RandomLinkSplit(num_val=0.0,num_test=0.25)
train_data, val_data, current_test_data = transform(current_data)

In [None]:
future_data = Data()
future_data.x = torch.load("../data/gnn/future_node_feature_matrix.pt")
future_data.edge_index = torch.load("../data/gnn/future_edge_index.pt")

#NORMALIZATION
transform = NormalizeFeatures()
future_data = transform(future_data)

#NEGATIVE SAMPLING
future_neg_edge_index = negative_sampling(
        edge_index=future_data.edge_index, #positive edges
        num_nodes=future_data.num_nodes, # number of nodes
        num_neg_samples=future_data.edge_index.size(1)) # number of neg_sample equal to number of pos_edges

#edge index ok, edge_label cat, edge_label_index cat
num_pos_edge = future_data.edge_index.size(1)
future_data.edge_label = torch.Tensor(np.array([1 for i in range(num_pos_edge)] + [0 for i in range(num_pos_edge)]))
future_data.edge_label_index = torch.cat([future_data.edge_index, future_neg_edge_index], dim=-1)

In [None]:
train_data

In [None]:
future_data

# DATASET MANIPULATION UTILITIES

In [None]:
def getTrainTestNegEdgeIndex(dataset):
    num_pos = len(dataset.edge_index[0])
    neg_edge_index_src = dataset.edge_label_index[0][num_pos:]
    neg_edge_index_trg = dataset.edge_label_index[1][num_pos:]
    neg_edge_index = torch.Tensor(np.array([np.array(neg_edge_index_src),\
                                            np.array(neg_edge_index_trg)])).long()
    return neg_edge_index

In [None]:
def getValNegEdgeIndex(dataset):
    
    def posNegSplitPoint(edge_label):
        for i in range(1,len(edge_label)):
            if edge_label[i-1] != edge_label[i]:
                return i
        return -1
    
    num_pos = posNegSplitPoint(dataset.edge_label)
    neg_edge_index_src = dataset.edge_label_index[0][num_pos:]
    neg_edge_index_trg = dataset.edge_label_index[1][num_pos:]
    neg_edge_index = torch.Tensor(np.array([np.array(neg_edge_index_src),\
                                            np.array(neg_edge_index_trg)])).long()
    return neg_edge_index

# GAE MODULE

In [None]:
class GCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True) # cached only for transductive learning
        self.conv2 = GCNConv(2 * out_channels, out_channels, cached=True) # cached only for transductive learning

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)

In [None]:
def train(data):
    x = data.x.float().to(device)
    train_pos_edge_index = data.edge_index.to(device)
    model.train()
    optimizer.zero_grad()
    z = model.encode(x, train_pos_edge_index)
    loss = model.recon_loss(z, train_pos_edge_index)
    #if args.variational:
    #   loss = loss + (1 / data.num_nodes) * model.kl_loss()
    loss.backward()
    optimizer.step()
    return float(loss)


def test(data, pos_edge_index, neg_edge_index):
    x = data.x.float().to(device)
    current_pos_edge_index = data.edge_index.to(device)
    model.eval()
    with torch.no_grad():
        z = model.encode(x, current_pos_edge_index)
    return model.test(z, pos_edge_index, neg_edge_index)

In [None]:
train_neg_edge_index = getTrainTestNegEdgeIndex(train_data)
val_pos_edge_index = current_test_data.edge_index
val_neg_edge_index = getValNegEdgeIndex(current_test_data)
test_pos_edge_index = future_data.edge_index
test_neg_edge_index = future_neg_edge_index

In [None]:
from torch_geometric.nn import GAE

In [None]:
# parameters
out_channels = 2
num_features = train_data.num_node_features

# model
model = GAE(GCNEncoder(num_features, out_channels))
model.reset_parameters()

# move to GPU (if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
num_epochs = 100
best_epoch = 0
ap_max = 0.0

avgpr_trains = []
avgpr_vals = []
avgpr_tests = []

best_model = copy.deepcopy(model)

for epoch in range(1, num_epochs + 1):
    
    loss = train(train_data)
    
    auc_train, ap_train = test(train_data, train_data.edge_index, train_neg_edge_index)
    auc_val, ap_val = test(current_test_data, val_pos_edge_index, val_neg_edge_index)
    auc_test, ap_test = test(future_data, test_pos_edge_index, test_neg_edge_index)
    print('Epoch: {:03d}, AUC: train: {:.4f}, val: {:.4f}, test: {:.4f}, AP: train: {:.4f}, val: {:.4f}, test: {:.4f}'\
          .format(epoch, auc_train, auc_val, auc_test, ap_train, ap_val, ap_test))
    
    if ap_test >= ap_max:
        best_epoch = epoch
        ap_max = ap_test
        best_model = copy.deepcopy(model)
        
    avgpr_trains.append(ap_train)
    avgpr_vals.append(ap_val)
    avgpr_tests.append(ap_test)

"""
#train orange test blue val green
x = range(num_epochs)
plt.clf()
plt.plot(x, avgpr_trains, color='orange', label='avgpr_train')
plt.plot(x, avgpr_vals, color='green', label='avgpr_val')
plt.plot(x, avgpr_tests, color='blue', label = 'avgpr_test')
plt.xlabel('Epoch')
plt.ylabel('AVGPR-score')
plt.legend()
plt.ylim(top=1)
plt.grid()
plt.savefig(f'learningCurves/GAE/august2016/new_all.pdf'\
            ,bbox_inches='tight')
plt.clf()
"""

print(f'Best epoch: {best_epoch}')