# **Preliminaries:** Install and import modules

In [62]:
#@title [RUN] install
!pip install networkx
!pip install mycolorpy
!pip install colorama
!pip install ogb

import torch
import os
!pip install torch-geometric torch-scatter torch-sparse torch-cluster -f https://data.pyg.org/whl/torch-{torch.__version__}.html


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-1.13.1+cu116.html


In [63]:
#@title [RUN] Import modules
import numpy as np
import seaborn as sns
import math
import itertools
import scipy as sp
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch_geometric
from torch_geometric.datasets import Planetoid, Coauthor
from torch_scatter import scatter_mean, scatter_max, scatter_sum
from torch_geometric.utils import to_dense_adj
from torch.nn import Embedding
from torch_geometric.typing import Adj
from ogb.nodeproppred import PygNodePropPredDataset
from torch_geometric.loader import NeighborLoader
from torch_geometric.utils import to_scipy_sparse_matrix, degree

#For FastRP
from scipy.sparse import coo_matrix, csr_matrix, csc_matrix, spdiags
from sklearn.preprocessing import normalize, scale, MultiLabelBinarizer
from sklearn import random_projection


import pdb
from datetime import datetime

#for nice visualisations
import networkx as nx
import matplotlib.pyplot as plt

from mycolorpy import colorlist as mcp
import matplotlib.cm as cm

from typing import Mapping, Tuple, Sequence, List
import colorama

import scipy.linalg
from scipy.linalg import block_diag

In [64]:
####### PLOTS #######

def update_stats(training_stats, epoch_stats):
    """ Store metrics along the training
    Args:
      epoch_stats: dict containg metrics about one epoch
      training_stats: dict containing lists of metrics along training
    Returns:
      updated training_stats
    """
    if training_stats is None:
        training_stats = {}
        for key in epoch_stats.keys():
            training_stats[key] = []
    for key,val in epoch_stats.items():
        training_stats[key].append(val)
    return training_stats

def plot_stats(training_stats, figsize=(5, 5), name=""):
    """ Create one plot for each metric stored in training_stats
    """
    stats_names = [key[6:] for key in training_stats.keys() if key.startswith('train_')]
    f, ax = plt.subplots(len(stats_names), 1, figsize=figsize)
    if len(stats_names)==1:
        ax = np.array([ax])
    for key, axx in zip(stats_names, ax.reshape(-1,)):
        axx.plot(
            training_stats['epoch'],
            training_stats[f'train_{key}'],
            label=f"Training {key}")
        axx.plot(
            training_stats['epoch'],
            training_stats[f'val_{key}'],
            label=f"Validation {key}")
        axx.set_xlabel("Training epoch")
        axx.set_ylabel(key)
        axx.legend()
    plt.title(name)


def get_color_coded_str(i, color):
    return "\033[3{}m{}\033[0m".format(int(color), int(i))

def print_color_numpy(map, list_graphs):
    """ print matrix map in color according to list_graphs
    """
    list_blocks = []
    for i,graph in enumerate(list_graphs):
        block_i = (i+1)*np.ones((graph.num_nodes,graph.num_nodes))
        list_blocks += [block_i]
    block_color = block_diag(*list_blocks)
    
    map_modified = np.vectorize(get_color_coded_str)(map, block_color)
    print("\n".join([" ".join(["{}"]*map.shape[0])]*map.shape[1]).format(*[x for y in map_modified.tolist() for x in y]))

# Cora dataset



In [65]:
cora_dataset = Planetoid("/tmp/cora", name="cora", split="full")
cora_data = cora_dataset[0]
cora_data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [None]:
print("Training class sizes")
print(torch.bincount(cora_dataset[0].y[cora_dataset[0].train_mask]))
print("Validation class sizes")
print(torch.bincount(cora_dataset[0].y[cora_dataset[0].val_mask]))
print("Test class sizes")
print(torch.bincount(cora_dataset[0].y[cora_dataset[0].test_mask]))

# OBGN-ARVIX dataset

In [66]:
d_name = "ogbn-arxiv"

dataset = PygNodePropPredDataset(name = d_name)

split_idx = dataset.get_idx_split()
train_idx, valid_idx, test_idx = split_idx["train"], split_idx["valid"], split_idx["test"]
arxiv_data = dataset[0]
arxiv_data.y = arxiv_data.y.squeeze()
arxiv_data.node_year = arxiv_data.node_year.squeeze()
arxiv_data

Downloading http://snap.stanford.edu/ogb/data/nodeproppred/arxiv.zip


Downloaded 0.08 GB: 100%|██████████| 81/81 [00:09<00:00,  8.81it/s]


Extracting dataset/arxiv.zip


Processing...


Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 1/1 [00:00<00:00, 1988.76it/s]


Converting graphs into PyG objects...


100%|██████████| 1/1 [00:00<00:00, 4917.12it/s]

Saving...



Done!


Data(num_nodes=169343, edge_index=[2, 1166243], x=[169343, 128], node_year=[169343], y=[169343])

In [None]:
print("Training class sizes")
print(torch.bincount(arxiv_data.y[train_idx]))
print("Validation class sizes")
print(torch.bincount(arxiv_data.y[valid_idx]))
print("Test class sizes")
print(torch.bincount(arxiv_data.y[test_idx]))

#Coauthor dataset

In [67]:
cs_dataset = Coauthor("/tmp/coauthor", name="CS")
cs_data = cs_dataset[0]
cs_data

Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/ms_academic_cs.npz
Processing...
Done!


Data(x=[18333, 6805], edge_index=[2, 163788], y=[18333])

In [68]:
# Create manual split, do 60:20:20 across classes
num_classes_cs = 15
train_mask_cs_indices = []
val_mask_cs_indices = []
test_mask_cs_indices = []
cs_labels = cs_data.y
for i in range(num_classes_cs):

  class_i = np.where(cs_labels == i)[0]
  np.random.seed(0)
  np.random.shuffle(class_i)

  num_samples = len(class_i)
  train_mask_cs_indices += (class_i[:int(num_samples*0.6)]).tolist() 
  val_mask_cs_indices += (class_i[int(num_samples*0.6):int(num_samples*0.8)]).tolist() 
  test_mask_cs_indices += (class_i[int(num_samples*0.8):]).tolist() 

print(len(train_mask_cs_indices), len(val_mask_cs_indices), len(test_mask_cs_indices))
# Create the masks for training
# Test mask 
train_mask_cs = torch.full((len(cs_labels),), False)
train_mask_cs[train_mask_cs_indices] = True
# Val mask
val_mask_cs = torch.full((len(cs_labels),), False)
val_mask_cs[val_mask_cs_indices] = True
# Train mask
test_mask_cs = torch.full((len(cs_labels),), False)
test_mask_cs[test_mask_cs_indices] = True

10993 3668 3672


In [None]:
print("Training class sizes")
print(torch.bincount(cs_data.y[train_mask_cs]))
print("Validation class sizes")
print(torch.bincount(cs_data.y[val_mask_cs]))
print("Test class sizes")
print(torch.bincount(cs_data.y[test_mask_cs]))

# Data saving / loading

In [69]:
# use google drive for saving and loading information
from google.colab import drive
import pickle
import os

drive.mount('/content/drive')
file_path = '/content/drive/MyDrive/L45_project/'
# create folder if it does not exist already
if not os.path.exists(file_path):
  os.mkdir(file_path) 

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [70]:
def save_training_info(training_stats: dict, node_embedding: torch.Tensor, filename: str):
  # write training data info to a file
  with open(file_path + filename + ".pkl", 'wb') as fp:
    pickle.dump(training_stats, fp)
    print('Training stats saved successfully to file: ' + filename)
  # write node embedding to a file
  torch.save(node_embedding, file_path + filename + "_emb.pt")
  print('Node embedding saved successfully to file: ' + filename)


def load_training_info(filename: str):
  # load training stats dictionary 
  with open(file_path + filename + ".pkl", 'rb') as fp:
    train_stats = pickle.load(fp)
    print('Training stats successfully loaded from file: ' + filename)
  # load node embedding
  node_embedding = torch.load(file_path + filename + "_emb.pt")
  print('Node embedding successfully loaded from file: ' + filename)
  return train_stats, node_embedding

# Final results is a list [seed, test result, [test per class accuracy], [training per class accuracy], [val per class accuracy]]
def save_final_results(final_results: List, filename: str):
  # write training data info to a file
  with open(file_path + filename + ".pkl", 'ab') as fp:
    pickle.dump(final_results, fp)
    print('Final results saved successfully to file: ' + filename)

# Returns an iterator which contains all the results from our various runs
def load_final_results(filename: str):
  with open(file_path + filename + ".pkl", 'rb') as fp:
    print('Final results found in file: ' + filename)
    while True:
      try:
        # This notation creates a generator, which we can then iterate through
        yield pickle.load(fp)
      except EOFError:
        break


In [None]:
test_dict = {'c':[1,2,3], 'b':[4,5,6]}
test_tensor = torch.tensor([[1., -1.], [1., -1.]])
save_training_info(test_dict, test_tensor, "testing")
recovered_val1, recovered_val2 = load_training_info("testing")
print(recovered_val1, recovered_val2)

Training stats saved successfully to file: testing
Node embedding saved successfully to file: testing
Training stats successfully loaded from file: testing
Node embedding successfully loaded from file: testing
{'c': [1, 2, 3], 'b': [4, 5, 6]} tensor([[ 1., -1.],
        [ 1., -1.]])


# Model Wrappers

In [None]:
from torch_geometric.nn import GCN

class GCNModelWrapper(GCN):

  def __init__(self, in_channels: int, hidden_channels: int, num_layers: int, out_channels: int):
    # use one less layer as our final graph layer can downsize for us
    # super().__init__(in_channels, hidden_channels, num_layers-1)
    super().__init__(in_channels, hidden_channels, num_layers)
    self.out_channels = out_channels
    self.final_layer = nn.Linear(hidden_channels, out_channels)

  def forward(self, x: torch.Tensor, edge_index: Adj):
    x = super().forward(x, edge_index)
    output = self.final_layer(x)
    return output


In [None]:
from torch_geometric.nn import GAT

class GATModelWrapper(GAT):

  def __init__(self, in_channels: int, hidden_channels: int, num_layers: int, out_channels: int, v2: bool):
    # Create the model to extract the node embeddings then pass these through a linear layer for classification
    super().__init__(in_channels, hidden_channels, num_layers, v2=v2)
    self.out_channels = out_channels
    self.final_layer = nn.Linear(hidden_channels, out_channels)

  def forward(self, x: torch.Tensor, edge_index: Adj):
    x = super().forward(x, edge_index)
    output = self.final_layer(x)
    return output, x

In [None]:
from torch_geometric.nn import GraphSAGE

class GraphSAGEModelWrapper(GraphSAGE):

  def __init__(self, in_channels: int, hidden_channels: int, num_layers: int, out_channels: int):
    # Create the model to extract the node embeddings then pass these through a linear layer for classification
    super().__init__(in_channels, hidden_channels, num_layers)
    self.out_channels = out_channels
    self.final_layer = nn.Linear(hidden_channels, out_channels)

  def forward(self, x: torch.Tensor, edge_index: Adj):
    x = super().forward(x, edge_index)
    output = self.final_layer(x)
    return output, x

In [None]:
from torch_geometric.nn import Node2Vec
from torch import Tensor

class Node2VecWrapper(Node2Vec):
  def __init__(self, edge_index, embedding_size, walk_length, context_size, walks_per_node, num_negative_samples, p, q, sparse, out_channels):
    super().__init__(edge_index, embedding_dim=embedding_size, walk_length=walk_length,
                     context_size=context_size, walks_per_node=walks_per_node,
                     num_negative_samples=num_negative_samples, p=p, q=q, sparse=sparse)
    self.final_layer = nn.Linear(embedding_size, out_channels)
  def forward(self):
    x = super().forward()
    output = F.softmax(self.final_layer(x), dim=1)
    return output, x
  def test(
    self,
    train_z: Tensor,
    train_y: Tensor,
    test_z: Tensor,
    test_y: Tensor,
    solver: str = 'lbfgs',
    multi_class: str = 'auto',
    *args,
    **kwargs,
    ) -> float:
    r"""Evaluates latent space quality via a logistic regression downstream
    task."""
    from sklearn.linear_model import LogisticRegression

    clf = LogisticRegression(solver=solver, multi_class=multi_class, *args,
                            **kwargs).fit(train_z.detach().cpu().numpy(),
                                          train_y.detach().cpu().numpy())
    y_pred = clf.predict(test_z.detach().cpu().numpy())
    return y_pred

In [None]:
from torch_geometric.nn import GIN

class GINWrapper(GIN):

  def __init__(self, in_channels: int, hidden_channels: int, num_layers: int, out_channels: int):
    # Create the model to extract the node embeddings then pass these through a linear layer for classification
    super().__init__(in_channels, hidden_channels, num_layers)
    self.out_channels = out_channels
    self.final_layer = nn.Linear(hidden_channels, out_channels)

  def forward(self, x: torch.Tensor, edge_index: Adj):
    x = super().forward(x, edge_index)
    output = self.final_layer(x)
    return output, x

# Training code



In [71]:
# @title [RUN] Hyperparameters GNN

NUM_EPOCHS_CORA =  10 #@param {type:"integer"}
NUM_EPOCHS_ARVIX =  110 #@param {type:"integer"}
LR         = 0.01 #@param {type:"number"}
HIDDEN_DIM = 128  #@param {type:"integer"}


In [None]:
# Code taken from L45 practical notebook
def train_gnn(X, edge_indices, y, mask, model, optimiser, device):
    model.train()
    # Put data on device
    X = X.to(device)
    edge_indices = edge_indices.to(device)
    y = y.to(device)
    mask = mask.to(device)
    # Train
    optimiser.zero_grad()
    y_out, _ = model(X, edge_indices)
    y_hat = y_out[mask]
    loss = F.cross_entropy(y_hat, y)
    loss.backward()
    optimiser.step()
    return loss.data

# Training loop using subgraph batching from paper 'Inductive Representation Learning on Large Graphs' https://arxiv.org/pdf/1706.02216.pdf
def train_gnn_subgraph(data_batch, model, optimiser, device):
  total_loss = 0
  for batch in data_batch:
    # Put batch in device
    batch = batch.to(device)
    # Do training loop
    batch_size = batch.batch_size
    optimiser.zero_grad()
    y_out, _ = model(batch.x, batch.edge_index)
    y_out = y_out[:batch_size]
    batch_y = batch.y[:batch_size]
    batch_y = torch.reshape(batch_y, (-1,))
    loss = F.cross_entropy(y_out, batch_y)
    loss.backward()
    optimiser.step()
    # Keep a running total of the loss
    total_loss += float(loss)

  # Get the average loss across all the batches
  loss = total_loss / len(data_batch)
  return loss

def evaluate_gnn(X, edge_indices, y, mask, model, num_classes, device):
    model.eval()
    # Put data on device
    X = X.to(device)
    edge_indices = edge_indices.to(device)
    y = y.to(device)
    mask = mask.to(device)
    # Evaluate
    with torch.no_grad():
      y_out, node_embeddings = model(X, edge_indices)
    y_hat = y_out[mask]
    y_hat = y_hat.data.max(1)[1]
    num_correct = y_hat.eq(y.data).sum()
    num_total = len(y)
    accuracy = 100.0 * (num_correct/num_total)

    # calculate per class accuracy
    values, counts = torch.unique(y_hat[y_hat == y.data], return_counts=True)
    per_class_counts = torch.zeros(num_classes)
    # make sure per_class_counts is on the correct device
    per_class_counts = per_class_counts.to(device)
    # allocate the number of counts per class
    for i, x in enumerate(values):
      per_class_counts[x] = counts[i]
    # find total number of data points per class in the split
    total_per_class = torch.bincount(y.data)
    per_class_accuracy = torch.div(per_class_counts, total_per_class)

    return accuracy, per_class_accuracy, node_embeddings
    
# Training loop
def train_eval_loop_gnn(model, edge_indices, train_x, train_y, train_mask, valid_x, valid_y, valid_mask, 
                             test_x, test_y, test_mask, num_classes, seed, filename, device, Cora, subgraph_batches=None):
    optimiser = optim.Adam(model.parameters(), lr=LR)
    training_stats = None
    # Choose number of epochs
    NUM_EPOCHS = NUM_EPOCHS_CORA if Cora else NUM_EPOCHS_ARVIX
    # Training loop
    for epoch in range(NUM_EPOCHS):
        # If subgraph batching is not provided, use the full graph for training. Otherwise use subgraph batch training regime
        if subgraph_batches is None:
          train_loss = train_gnn(train_x, edge_indices, train_y, train_mask, model, optimiser, device)
        else:
          train_loss = train_gnn_subgraph(subgraph_batches, model, optimiser, device)
        # Calculate accuracy on full graph  
        train_acc, train_class_acc, _ = evaluate_gnn(train_x, edge_indices, train_y, train_mask, model, num_classes, device)
        valid_acc, valid_class_acc, _ = evaluate_gnn(valid_x, edge_indices, valid_y, valid_mask, model, num_classes, device)
        if epoch % 10 == 0 or epoch == (NUM_EPOCHS-1):
            print(f"Epoch {epoch} with train loss: {train_loss:.3f} train accuracy: {train_acc:.3f} validation accuracy: {valid_acc:.3f}")
            print("Per class train accuracy: ", train_class_acc)
            print("Per class val accuracy: ", valid_class_acc)
        # store the loss and the accuracy for the final plot
        epoch_stats = {'train_acc': train_acc, 'val_acc': valid_acc, 'epoch':epoch}
        training_stats = update_stats(training_stats, epoch_stats)

    # Lets look at our final test performance
    # Only need to get the node embeddings once, take from the training evaluation call
    test_acc, test_class_acc, node_embeddings = evaluate_gnn(test_x, edge_indices, test_y, test_mask, model, num_classes, device)
    print(f"Our final test accuracy for the GNN is: {test_acc:.3f}")
    print("Final per class accuracy on test set: ", test_class_acc)

    # Save training stats if on final iteration of the run
    save_training_info(training_stats, node_embeddings, filename+"_"+str(seed))
    # Save final results
    final_results_list = [seed, test_acc, test_class_acc, train_class_acc, valid_class_acc]
    save_final_results(final_results_list, filename)
    # Save final model weights incase we want to do further inference later
    torch.save(model.state_dict(), file_path+filename+"_" + str(seed) + "_model.pt")
    return training_stats

In [72]:
def set_seeds(seed):
  print("SETTING SEEDS TO: ", str(seed))
  # seed the potential sources of randomness
  torch.manual_seed(seed)
  np.random.seed(seed)
  random.seed(seed)

In [78]:
# CHANGE: To name of model being tested
filename = "FastRP-coauthor"
dataset = "Coauthor"
# use 30 seeds which have been randomly generated using seed_list = [np.random.randint(4294967296 - 1) for i in range(30)]
seeds = [4193977854, 1863727779, 170173784, 2342954646, 116846604, 2105922959, 2739899259, 1024258131, 806299656, 880019963, 1818027900, 2135956485, 3710910636, 1517964140, 4083009686, 2455059856, 400225693, 89475662, 361232447, 3647665043, 1221215631, 2036056847, 1860537279, 516507873, 3692371949, 3300171104, 2794978777, 3303475786, 2952735006, 572297925]

# create folder for saving all model info into if it does not exist already
if not os.path.exists(file_path+filename+"/"):
  os.mkdir(file_path+filename+"/")

if dataset == "Cora":
  print("Using Cora dataset")
  # Get the edge indices and node features for our model. General set up variables for running with all the models
  edge_indices = cora_data.edge_index
  node_features = cora_data.x
  neighbour_dataset = cora_data

  # Get masks and training labels for each split
  train_mask = cora_data.train_mask
  train_y = cora_data.y[train_mask]
  valid_mask = cora_data.val_mask
  valid_y = cora_data.y[valid_mask]
  test_mask = cora_data.test_mask
  test_y = cora_data.y[test_mask]

  num_classes = 7
  is_cora=True

elif dataset=="Coauthor":
  print("Using Coauthor dataset")
  # Get the edge indices and node features for our model. General set up variables for running with all the models
  edge_indices = cs_data.edge_index
  node_features = cs_data.x
  neighbour_dataset = cs_data

  # Get masks and training labels for each split
  train_mask = train_mask_cs
  train_y = cs_data.y[train_mask]
  valid_mask = val_mask_cs
  valid_y = cs_data.y[valid_mask]
  test_mask = test_mask_cs
  test_y = cs_data.y[test_mask]

  num_classes = 15
  is_cora=True

# Otherwise we are using arvix dataset
else:
  print("Using Arvix dataset")
  # Get the edge indices and node features for our model
  edge_indices = arxiv_data.edge_index
  node_features = arxiv_data.x
  neighbour_dataset = arxiv_data

  # Get masks and training labels for each split
  train_mask = train_idx
  train_y = arxiv_data.y[train_mask]
  valid_mask = valid_idx
  valid_y = arxiv_data.y[valid_mask]
  test_mask = test_idx
  test_y = arxiv_data.y[test_mask]

  num_classes = 40
  is_cora = False


Using Coauthor dataset


# Training Loops

In [None]:
# Use to flush GPU memory if it gets too full
import gc
torch.cuda.empty_cache()
gc.collect()

In [None]:
# General training loop for all models except GraphSAGE, using the whole graph in training instead of using subgraph batching
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for seed in seeds:
  set_seeds(seed)
  # Create the model
  model = GATModelWrapper(in_channels = node_features.shape[-1], hidden_channels = HIDDEN_DIM, num_layers=1, out_channels=num_classes, v2=True)
  model = model.to(device)

  # Run training loop
  print("TRAINING WITH SEED: ", str(seed))
  train_stats_cora = train_eval_loop_gnn(model, edge_indices, node_features, train_y, train_mask, 
                                            node_features, valid_y, valid_mask, node_features, test_y, test_mask, num_classes, seed, filename+"/"+filename, device, is_cora)
  # Print out graphs if not using GPU
  if device == torch.device('cpu'):
    plot_stats(train_stats_cora, name=filename)

In [None]:
# Training loop for GraphSAGE which using subgraph batches instead of the entire graph
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for seed in seeds:
  set_seeds(seed)
  # Original paper uses neighbourhood sizes  S1 = 25 and S2 = 10 so this is what we use
  train_loader = NeighborLoader(neighbour_dataset, num_neighbors = [25, 10], batch_size=1024, input_nodes=train_mask)

  # Create the model
  model = GraphSAGEModelWrapper(in_channels = node_features.shape[-1], hidden_channels = HIDDEN_DIM, num_layers=1, out_channels=num_classes)
  model = model.to(device)

  # Run training loop
  print("TRAINING WITH SEED: ", str(seed))
  train_stats_cora = train_eval_loop_gnn(model, edge_indices, node_features, train_y, train_mask, 
                                            node_features, valid_y, valid_mask, node_features, test_y, test_mask, num_classes, seed, filename+"/"+filename, device, is_cora, subgraph_batches=train_loader)
  # Print out graphs if not using GPU
  if device == torch.device('cpu'):
    plot_stats(train_stats_cora, name=filename)

In [None]:
# Training loop for GraphSAGE which using subgraph batches instead of the entire graph
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for seed in seeds:
  set_seeds(seed)
  # Original paper uses neighbourhood sizes  S1 = 25 and S2 = 10 so this is what we use
  train_loader = NeighborLoader(neighbour_dataset, num_neighbors = [25, 10], batch_size=1024, input_nodes=train_mask)

  # Create the model
  model = GINWrapper(in_channels = node_features.shape[-1], hidden_channels = HIDDEN_DIM, num_layers=1, out_channels=num_classes)
  model = model.to(device)

  # Run training loop
  print("TRAINING WITH SEED: ", str(seed))
  train_stats_cora = train_eval_loop_gnn(model, edge_indices, node_features, train_y, train_mask, 
                                            node_features, valid_y, valid_mask, node_features, test_y, test_mask, num_classes, seed, filename+"/"+filename, device, is_cora, subgraph_batches=train_loader)
  # Print out graphs if not using GPU
  if device == torch.device('cpu'):
    plot_stats(train_stats_cora, name=filename)

# TESTING LOADING

In [None]:
final_results = load_final_results(filename)
for r in final_results:
  print(r)

In [None]:
training_stats_1, embedding = load_training_info(filename+"_1")
plot_stats(training_stats_1, name="Testing")
print(embedding)
print(node_features)

In [None]:
# Loading stored model weights
model = GATModelWrapper(in_channels = node_features.shape[-1], hidden_channels = node_features.shape[-1], num_layers=1, out_channels=num_classes, v2=True)
model.load_state_dict(torch.load(file_path+filename+"/"+"GATV2_1_model.pt"))
model.eval()

- Plot graph with average training stats
- Save node embeddings for each run
- Save training stats for each run
- Save test accuracy for each run


# FastRP

In [79]:
class FastRPEmbeddingWrapper(nn.Module):
  def __init__(self, input_dim, num_classes):
      super().__init__()
      self.linear = nn.Linear(input_dim, num_classes)

  def forward(self, x):
      x = self.linear(x)
      return x

In [75]:
# Copied from https://github.com/GTmac/FastRP/blob/master/fastrp.py
# projection method: choose from Gaussian and Sparse
# input matrix: choose from adjacency and transition matrix
# alpha adjusts the weighting of nodes according to their degree
def fastrp_projection(A, seed, q=3, dim=128, projection_method='gaussian', input_matrix='adj', alpha=None):
    assert input_matrix == 'adj' or input_matrix == 'trans'
    assert projection_method == 'gaussian' or projection_method == 'sparse'
    
    N = A.shape[0]
    if input_matrix == 'adj':
        M = A
    else:
        # Change csc_matrix.sum(A) to A.sum as this caused bugs
        normalizer = spdiags(np.squeeze(1.0 / A.sum(axis=1) ), 0, N, N)
        M = normalizer @ A
    # Gaussian projection matrix
    if projection_method == 'gaussian':
        transformer = random_projection.GaussianRandomProjection(n_components=dim, random_state=seed)
    # Sparse projection matrix
    else:
        transformer = random_projection.SparseRandomProjection(n_components=dim, random_state=seed)
    Y = transformer.fit(M)
    # Random projection for A
    if alpha is not None:
      # Change csc_matrix.sum(A) to A.sum as this caused bugs
        Y.components_ = Y.components_ @ spdiags( \
                        np.squeeze(np.power(A.sum(axis=1), alpha)), 0, N, N)
    cur_U = transformer.transform(M)
    U_list = [cur_U]
    
    for i in range(2, q + 1):
        cur_U = M @ cur_U
        U_list.append(cur_U)
    return U_list

# When weights is None, concatenate instead of linearly combines the embeddings from different powers of A
def fastrp_merge(U_list, weights, normalization=False):
    dense_U_list = [_U.todense() for _U in U_list] if type(U_list[0]) == csc_matrix else U_list
    _U_list = [normalize(_U, norm='l2', axis=1) for _U in dense_U_list] if normalization else dense_U_list

    if weights is None:
        return np.concatenate(_U_list, axis=1)
    U = np.zeros_like(_U_list[0])
    for cur_U, weight in zip(_U_list, weights):
        U += cur_U * weight
    # U = scale(U.todense())
    # U = normalize(U.todense(), norm='l2', axis=1)
    return scale(np.asarray(U.todense())) if type(U) == csr_matrix else scale(np.asarray(U))

# A is always the adjacency matrix
# the choice between adj matrix and trans matrix is decided in the conf
def fastrp_wrapper(A, conf, seed):
    U_list = fastrp_projection(A,
                               seed,
                               q=len(conf['weights']),
                               dim=conf['dim'],
                               projection_method=conf['projection_method'],
                               input_matrix=conf['input_matrix'],
                               alpha=conf['alpha'],
    )
    U = fastrp_merge(U_list, conf['weights'], conf['normalization'])
    return U

In [76]:
# Code adpated from L45 practical notebook
def train_embedding_classifier(X, y, mask, model, optimiser, device):
    model.train()
    # Put data on device
    X = X.to(device)
    y = y.to(device)
    mask = mask.to(device)
    # Train
    optimiser.zero_grad()
    y_out = model(X)
    y_hat = y_out[mask]
    loss = F.cross_entropy(y_hat, y)
    loss.backward()
    optimiser.step()
    return loss.data

def evaluate_embedding_classifier(X, y, mask, model, num_classes, device):
    model.eval()
    # Put data on device
    X = X.to(device)
    y = y.to(device)
    mask = mask.to(device)
    # Evaluate
    with torch.no_grad():
      y_out = model(X)
    y_hat = y_out[mask]
    y_hat = y_hat.data.max(1)[1]
    num_correct = y_hat.eq(y.data).sum()
    num_total = len(y)
    accuracy = 100.0 * (num_correct/num_total)

    # calculate per class accuracy
    values, counts = torch.unique(y_hat[y_hat == y.data], return_counts=True)
    per_class_counts = torch.zeros(num_classes)
    # make sure per_class_counts is on the correct device
    per_class_counts = per_class_counts.to(device)
    # allocate the number of counts per class
    for i, x in enumerate(values):
      per_class_counts[x] = counts[i]
    # find total number of data points per class in the split
    total_per_class = torch.bincount(y.data)
    per_class_accuracy = torch.div(per_class_counts, total_per_class)

    return accuracy, per_class_accuracy
    
# Training loop
def train_eval_loop_embedding_classifier(model, embeddings, train_y, train_mask, 
                                         valid_y, valid_mask, test_y, test_mask, num_classes, seed, filename, device, Cora):
    optimiser = optim.Adam(model.parameters(), lr=LR)
    training_stats = None
    # Choose number of epochs
    NUM_EPOCHS = NUM_EPOCHS_CORA if Cora else NUM_EPOCHS_ARVIX
    # Training loop
    for epoch in range(NUM_EPOCHS):
        train_loss = train_embedding_classifier(embeddings, train_y, train_mask, model, optimiser, device)
        # Calculate accuracy on full graph  
        train_acc, train_class_acc = evaluate_embedding_classifier(embeddings, train_y, train_mask, model, num_classes, device)
        valid_acc, valid_class_acc = evaluate_embedding_classifier(embeddings, valid_y, valid_mask, model, num_classes, device)
        if epoch % 10 == 0 or epoch == (NUM_EPOCHS-1):
            print(f"Epoch {epoch} with train loss: {train_loss:.3f} train accuracy: {train_acc:.3f} validation accuracy: {valid_acc:.3f}")
            print("Per class train accuracy: ", train_class_acc)
            print("Per class val accuracy: ", valid_class_acc)
        # store the loss and the accuracy for the final plot
        epoch_stats = {'train_acc': train_acc, 'val_acc': valid_acc, 'epoch':epoch}
        training_stats = update_stats(training_stats, epoch_stats)

    # Lets look at our final test performance
    # Only need to get the node embeddings once, take from the training evaluation call
    test_acc, test_class_acc = evaluate_embedding_classifier(embeddings, test_y, test_mask, model, num_classes, device)
    print(f"Our final test accuracy for the GNN is: {test_acc:.3f}")
    print("Final per class accuracy on test set: ", test_class_acc)

    # Save training stats if on final iteration of the run, the node embeddings are actually passed in for training, where  
    node_embeddings = embeddings
    save_training_info(training_stats, node_embeddings, filename+"_"+str(seed))
    # Save final results
    final_results_list = [seed, test_acc, test_class_acc, train_class_acc, valid_class_acc]
    save_final_results(final_results_list, filename)
    # Save final model weights incase we want to do further inference later
    torch.save(model.state_dict(), file_path+filename+"_" + str(seed) + "_model.pt")
    return training_stats

In [80]:
# Use parameters from example in https://github.com/GTmac/FastRP/blob/master/fast-random-projection-blogcatalog.ipynb
# Except our input matrix in an adjacency matrix and since we are not tuning alpha we just set this to None
input_matrix = 'adj'
alpha = -0.67
conf = {
        'projection_method': 'sparse',
        'input_matrix': input_matrix,
        'weights': [0.0, 0.0, 1.0, 6.67],
        'normalization': True,
        'dim': HIDDEN_DIM,
        'alpha': alpha,
        'C': 0.1
    }

num_nodes = node_features.shape[0]

# Convert adjacency matrix to scipy matrix
adj_matrix = to_scipy_sparse_matrix(edge_indices)
# Whether we are using the adjacency or transition matrix
if input_matrix == 'trans':
  # Create the degree matrix for the graph
  degrees = degree(edge_indices[0])
  diagonal_degree = sp.sparse.spdiags(degrees, 0, degrees.size()[0], degrees.size()[0])
  # Create the transition matrix for the graph = D-1(A)
  transition_matrix = scipy.sparse.linalg.inv(diagonal_degree).multiply(adj_matrix)
  print("Using transition matrix")
else:
  transition_matrix = adj_matrix
  print("Using adjacency matrix")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for seed in seeds:
  set_seeds(seed)
  
  embeddings = fastrp_wrapper(transition_matrix, conf, seed)
  # convert to tensor 
  embeddings = torch.from_numpy(embeddings)

  # Create the model
  model = FastRPEmbeddingWrapper(HIDDEN_DIM, num_classes)
  model = model.to(device)

  # Run training loop
  print("TRAINING WITH SEED: ", str(seed))
  train_stats_cora = train_eval_loop_embedding_classifier(model, embeddings, train_y, train_mask, 
                                             valid_y, valid_mask, test_y, test_mask, num_classes, seed, filename+"/"+filename, device, is_cora)
  # Print out graphs if not using GPU
  if device == torch.device('cpu'):
    plot_stats(train_stats_cora, name=filename)

Using adjacency matrix
SETTING SEEDS TO:  4193977854




TRAINING WITH SEED:  4193977854
Epoch 0 with train loss: 2.867 train accuracy: 21.586 validation accuracy: 21.020
Per class train accuracy:  tensor([0.0778, 0.0830, 0.1024, 0.0856, 0.2584, 0.1186, 0.1441, 0.6841, 0.2860,
        0.0000, 0.0139, 0.1665, 0.6508, 0.2334, 0.5619], device='cuda:0')
Per class val accuracy:  tensor([0.1338, 0.0761, 0.0805, 0.0465, 0.2796, 0.0888, 0.1622, 0.6919, 0.1935,
        0.0417, 0.0035, 0.1376, 0.6429, 0.2382, 0.6400], device='cuda:0')
Epoch 9 with train loss: 1.087 train accuracy: 72.628 validation accuracy: 71.129
Per class train accuracy:  tensor([0.4245, 0.6751, 0.7537, 0.5525, 0.8062, 0.5970, 0.4685, 0.7852, 0.5269,
        0.3714, 0.8811, 0.7998, 0.8214, 0.7465, 0.9181], device='cuda:0')
Per class val accuracy:  tensor([0.4789, 0.5326, 0.7073, 0.4302, 0.7742, 0.5535, 0.4189, 0.7946, 0.4645,
        0.4167, 0.8789, 0.7862, 0.8571, 0.7751, 0.9086], device='cuda:0')
Our final test accuracy for the GNN is: 72.549
Final per class accuracy on test set:



TRAINING WITH SEED:  1863727779
Epoch 0 with train loss: 2.839 train accuracy: 22.341 validation accuracy: 21.619
Per class train accuracy:  tensor([0.1627, 0.2491, 0.4358, 0.0389, 0.2560, 0.0913, 0.2748, 0.0343, 0.2602,
        0.0143, 0.1663, 0.3060, 0.0119, 0.1350, 0.7257], device='cuda:0')
Per class val accuracy:  tensor([0.2254, 0.2609, 0.4220, 0.0116, 0.2545, 0.0752, 0.2027, 0.0162, 0.2194,
        0.0833, 0.1661, 0.3194, 0.0000, 0.1427, 0.6229], device='cuda:0')
Epoch 9 with train loss: 1.054 train accuracy: 73.137 validation accuracy: 71.238
Per class train accuracy:  tensor([0.4693, 0.6029, 0.7203, 0.4903, 0.7943, 0.6388, 0.6171, 0.7798, 0.5419,
        0.5429, 0.8718, 0.7801, 0.8135, 0.7694, 0.9124], device='cuda:0')
Per class val accuracy:  tensor([0.4577, 0.5000, 0.6878, 0.3721, 0.7706, 0.5763, 0.5811, 0.8054, 0.4774,
        0.5833, 0.8616, 0.7813, 0.8452, 0.7799, 0.8971], device='cuda:0')
Our final test accuracy for the GNN is: 73.039
Final per class accuracy on test set:



TRAINING WITH SEED:  170173784
Epoch 0 with train loss: 2.802 train accuracy: 27.117 validation accuracy: 25.164
Per class train accuracy:  tensor([0.2241, 0.4332, 0.3862, 0.1556, 0.2500, 0.1498, 0.1216, 0.2004, 0.3075,
        0.0714, 0.6189, 0.2141, 0.1349, 0.2056, 0.4152], device='cuda:0')
Per class val accuracy:  tensor([0.2394, 0.4348, 0.3683, 0.1279, 0.2222, 0.1207, 0.1757, 0.2270, 0.2516,
        0.0417, 0.5952, 0.1990, 0.1071, 0.1778, 0.3886], device='cuda:0')
Epoch 9 with train loss: 1.109 train accuracy: 71.909 validation accuracy: 71.483
Per class train accuracy:  tensor([0.4929, 0.6065, 0.7366, 0.5914, 0.7560, 0.5954, 0.5811, 0.8267, 0.6344,
        0.5571, 0.8753, 0.7703, 0.8333, 0.7066, 0.9029], device='cuda:0')
Per class val accuracy:  tensor([0.5493, 0.5000, 0.7146, 0.5116, 0.7491, 0.5718, 0.5135, 0.8541, 0.5935,
        0.5000, 0.9100, 0.7961, 0.8690, 0.7050, 0.9029], device='cuda:0')
Our final test accuracy for the GNN is: 72.331
Final per class accuracy on test set: 



TRAINING WITH SEED:  2342954646
Epoch 0 with train loss: 2.885 train accuracy: 19.276 validation accuracy: 19.329
Per class train accuracy:  tensor([0.0283, 0.1083, 0.1797, 0.1245, 0.1627, 0.1688, 0.2297, 0.0054, 0.1204,
        0.1857, 0.3776, 0.2568, 0.3690, 0.2056, 0.1905], device='cuda:0')
Per class val accuracy:  tensor([0.0352, 0.0761, 0.1780, 0.0814, 0.1720, 0.2118, 0.1892, 0.0054, 0.1419,
        0.2083, 0.3910, 0.2359, 0.3810, 0.1983, 0.1657], device='cuda:0')
Epoch 9 with train loss: 1.091 train accuracy: 72.555 validation accuracy: 70.311
Per class train accuracy:  tensor([0.4458, 0.6282, 0.7138, 0.5564, 0.8002, 0.5901, 0.5360, 0.7852, 0.6172,
        0.4857, 0.8938, 0.7687, 0.8333, 0.7549, 0.9105], device='cuda:0')
Per class val accuracy:  tensor([0.4718, 0.5109, 0.6951, 0.4767, 0.7814, 0.5421, 0.4189, 0.8054, 0.5355,
        0.3333, 0.8997, 0.7740, 0.8571, 0.7340, 0.9029], device='cuda:0')
Our final test accuracy for the GNN is: 71.078
Final per class accuracy on test set:



TRAINING WITH SEED:  116846604
Epoch 0 with train loss: 2.858 train accuracy: 21.086 validation accuracy: 21.374
Per class train accuracy:  tensor([0.1014, 0.2274, 0.3472, 0.0584, 0.0945, 0.2008, 0.0811, 0.0469, 0.3742,
        0.0286, 0.1628, 0.3421, 0.0873, 0.1689, 0.3962], device='cuda:0')
Per class val accuracy:  tensor([0.0915, 0.1957, 0.3512, 0.0698, 0.1075, 0.2050, 0.0541, 0.0649, 0.3032,
        0.0417, 0.1349, 0.3538, 0.0714, 0.1983, 0.3771], device='cuda:0')
Epoch 9 with train loss: 1.079 train accuracy: 73.156 validation accuracy: 71.401
Per class train accuracy:  tensor([0.3892, 0.6137, 0.7593, 0.5603, 0.7608, 0.6441, 0.5045, 0.8195, 0.5978,
        0.3857, 0.8741, 0.8056, 0.8571, 0.7416, 0.9143], device='cuda:0')
Per class val accuracy:  tensor([0.4507, 0.4457, 0.7463, 0.4419, 0.7204, 0.6128, 0.5000, 0.8216, 0.4839,
        0.2500, 0.8754, 0.8084, 0.8929, 0.7437, 0.9029], device='cuda:0')
Our final test accuracy for the GNN is: 72.849
Final per class accuracy on test set: 



TRAINING WITH SEED:  2105922959
Epoch 0 with train loss: 2.825 train accuracy: 22.269 validation accuracy: 21.129
Per class train accuracy:  tensor([0.0660, 0.3682, 0.1366, 0.2101, 0.1938, 0.3209, 0.0901, 0.6083, 0.0323,
        0.3000, 0.2910, 0.3700, 0.0833, 0.1495, 0.0457], device='cuda:0')
Per class val accuracy:  tensor([0.0563, 0.3587, 0.1073, 0.2674, 0.1935, 0.2802, 0.0946, 0.5351, 0.0581,
        0.2500, 0.2837, 0.3661, 0.0476, 0.1511, 0.0514], device='cuda:0')
Epoch 9 with train loss: 1.086 train accuracy: 72.928 validation accuracy: 72.165
Per class train accuracy:  tensor([0.4434, 0.6390, 0.7463, 0.6070, 0.7907, 0.6433, 0.4369, 0.7906, 0.6387,
        0.5857, 0.8741, 0.8056, 0.8333, 0.7122, 0.9181], device='cuda:0')
Per class val accuracy:  tensor([0.5563, 0.5652, 0.7122, 0.5233, 0.7599, 0.5968, 0.3919, 0.7838, 0.5419,
        0.5000, 0.8997, 0.8206, 0.8810, 0.7376, 0.8971], device='cuda:0')
Our final test accuracy for the GNN is: 72.576
Final per class accuracy on test set:



TRAINING WITH SEED:  2739899259
Epoch 0 with train loss: 2.829 train accuracy: 21.923 validation accuracy: 19.193
Per class train accuracy:  tensor([0.1675, 0.3574, 0.2293, 0.0973, 0.4701, 0.2342, 0.1216, 0.3249, 0.4065,
        0.0714, 0.1998, 0.1854, 0.2341, 0.1060, 0.2095], device='cuda:0')
Per class val accuracy:  tensor([0.1127, 0.3152, 0.1951, 0.0581, 0.4444, 0.2164, 0.0946, 0.2595, 0.3032,
        0.0000, 0.1869, 0.1351, 0.2262, 0.1112, 0.1886], device='cuda:0')
Epoch 9 with train loss: 1.072 train accuracy: 72.437 validation accuracy: 69.847
Per class train accuracy:  tensor([0.4670, 0.6534, 0.7634, 0.5409, 0.7859, 0.6213, 0.4820, 0.8105, 0.5720,
        0.4000, 0.8591, 0.7941, 0.8175, 0.7158, 0.9295], device='cuda:0')
Per class val accuracy:  tensor([0.4577, 0.5435, 0.7561, 0.3953, 0.7455, 0.5558, 0.4054, 0.7892, 0.4645,
        0.3333, 0.8927, 0.7961, 0.8690, 0.7062, 0.8914], device='cuda:0')
Our final test accuracy for the GNN is: 71.786
Final per class accuracy on test set:



TRAINING WITH SEED:  1024258131
Epoch 0 with train loss: 2.849 train accuracy: 21.477 validation accuracy: 21.619
Per class train accuracy:  tensor([0.0778, 0.1372, 0.4049, 0.1051, 0.3804, 0.0791, 0.1036, 0.2780, 0.0968,
        0.0000, 0.5658, 0.0509, 0.3294, 0.1471, 0.2305], device='cuda:0')
Per class val accuracy:  tensor([0.1268, 0.0870, 0.3902, 0.1047, 0.3226, 0.0729, 0.0541, 0.2649, 0.1032,
        0.0833, 0.5848, 0.0614, 0.3929, 0.1572, 0.2743], device='cuda:0')
Epoch 9 with train loss: 1.069 train accuracy: 71.991 validation accuracy: 70.802
Per class train accuracy:  tensor([0.3726, 0.5993, 0.7650, 0.5681, 0.7919, 0.6023, 0.4910, 0.7690, 0.6258,
        0.1429, 0.8972, 0.7572, 0.8135, 0.7364, 0.9162], device='cuda:0')
Per class val accuracy:  tensor([0.4366, 0.4891, 0.7317, 0.5465, 0.7348, 0.5353, 0.4730, 0.7892, 0.5935,
        0.1250, 0.9066, 0.7912, 0.8571, 0.7388, 0.9143], device='cuda:0')
Our final test accuracy for the GNN is: 71.841
Final per class accuracy on test set:



TRAINING WITH SEED:  806299656
Epoch 0 with train loss: 2.892 train accuracy: 19.995 validation accuracy: 20.883
Per class train accuracy:  tensor([0.1132, 0.1913, 0.0642, 0.0895, 0.4545, 0.2053, 0.2658, 0.3357, 0.2925,
        0.0143, 0.1085, 0.3470, 0.2659, 0.1362, 0.0781], device='cuda:0')
Per class val accuracy:  tensor([0.1197, 0.1630, 0.0854, 0.0930, 0.4695, 0.2164, 0.2432, 0.3514, 0.2903,
        0.0833, 0.0830, 0.3784, 0.2857, 0.1403, 0.0971], device='cuda:0')
Epoch 9 with train loss: 1.085 train accuracy: 72.364 validation accuracy: 71.919
Per class train accuracy:  tensor([0.4410, 0.6065, 0.7049, 0.6070, 0.7584, 0.6365, 0.4910, 0.7924, 0.6043,
        0.5000, 0.8799, 0.7949, 0.8333, 0.7328, 0.9200], device='cuda:0')
Per class val accuracy:  tensor([0.5070, 0.5217, 0.7146, 0.5349, 0.7204, 0.6196, 0.4459, 0.7730, 0.5226,
        0.2917, 0.9135, 0.8108, 0.8333, 0.7473, 0.9143], device='cuda:0')
Our final test accuracy for the GNN is: 72.712
Final per class accuracy on test set: 



TRAINING WITH SEED:  880019963
Epoch 0 with train loss: 2.969 train accuracy: 16.010 validation accuracy: 15.840
Per class train accuracy:  tensor([0.0637, 0.0686, 0.0789, 0.1984, 0.3373, 0.1711, 0.1081, 0.3646, 0.0495,
        0.0857, 0.0358, 0.1091, 0.1151, 0.0919, 0.7295], device='cuda:0')
Per class val accuracy:  tensor([0.1127, 0.0652, 0.0756, 0.1860, 0.3226, 0.1777, 0.0811, 0.3135, 0.0645,
        0.0417, 0.0450, 0.0934, 0.0833, 0.1064, 0.7029], device='cuda:0')
Epoch 9 with train loss: 1.135 train accuracy: 70.563 validation accuracy: 69.466
Per class train accuracy:  tensor([0.4764, 0.6065, 0.7447, 0.5370, 0.7835, 0.6198, 0.4505, 0.7960, 0.6215,
        0.2143, 0.8684, 0.7219, 0.8214, 0.6860, 0.9086], device='cuda:0')
Per class val accuracy:  tensor([0.4789, 0.4891, 0.7195, 0.4651, 0.7527, 0.5786, 0.4324, 0.7784, 0.5355,
        0.3333, 0.8789, 0.7248, 0.8810, 0.7110, 0.9029], device='cuda:0')
Our final test accuracy for the GNN is: 70.724
Final per class accuracy on test set: 



TRAINING WITH SEED:  1818027900
Epoch 0 with train loss: 2.839 train accuracy: 21.286 validation accuracy: 21.947
Per class train accuracy:  tensor([0.0943, 0.0866, 0.2024, 0.0700, 0.4856, 0.1452, 0.1351, 0.2708, 0.1699,
        0.3000, 0.2286, 0.2240, 0.1429, 0.1310, 0.5714], device='cuda:0')
Per class val accuracy:  tensor([0.1127, 0.1304, 0.1927, 0.1047, 0.4373, 0.1754, 0.0676, 0.2811, 0.1613,
        0.2083, 0.2284, 0.2310, 0.1429, 0.1439, 0.6400], device='cuda:0')
Epoch 9 with train loss: 1.079 train accuracy: 72.110 validation accuracy: 70.256
Per class train accuracy:  tensor([0.3892, 0.6173, 0.7553, 0.5603, 0.7835, 0.6221, 0.4640, 0.7690, 0.6151,
        0.4857, 0.8764, 0.7957, 0.8095, 0.7179, 0.9181], device='cuda:0')
Per class val accuracy:  tensor([0.3803, 0.5217, 0.7146, 0.4302, 0.7276, 0.5558, 0.4459, 0.7892, 0.5677,
        0.5417, 0.8962, 0.8133, 0.8571, 0.7267, 0.8857], device='cuda:0')
Our final test accuracy for the GNN is: 72.576
Final per class accuracy on test set:



TRAINING WITH SEED:  2135956485
Epoch 0 with train loss: 2.914 train accuracy: 19.967 validation accuracy: 19.438
Per class train accuracy:  tensor([0.0991, 0.2924, 0.2707, 0.1089, 0.1292, 0.1939, 0.1622, 0.3682, 0.3161,
        0.1429, 0.3684, 0.1001, 0.0714, 0.0556, 0.6743], device='cuda:0')
Per class val accuracy:  tensor([0.1408, 0.2826, 0.2463, 0.0930, 0.1111, 0.2050, 0.1351, 0.3838, 0.2839,
        0.0000, 0.3702, 0.0983, 0.1667, 0.0472, 0.6400], device='cuda:0')
Epoch 9 with train loss: 1.105 train accuracy: 72.601 validation accuracy: 71.047
Per class train accuracy:  tensor([0.4670, 0.6426, 0.7756, 0.5447, 0.7584, 0.6814, 0.5090, 0.7960, 0.6409,
        0.5571, 0.8753, 0.8113, 0.8532, 0.6634, 0.9181], device='cuda:0')
Per class val accuracy:  tensor([0.4930, 0.5543, 0.7561, 0.4070, 0.7276, 0.6492, 0.5676, 0.8000, 0.5419,
        0.3750, 0.8893, 0.7936, 0.8810, 0.6723, 0.9086], device='cuda:0')
Our final test accuracy for the GNN is: 71.514
Final per class accuracy on test set:



TRAINING WITH SEED:  3710910636
Epoch 0 with train loss: 2.872 train accuracy: 19.967 validation accuracy: 18.839
Per class train accuracy:  tensor([0.0448, 0.1877, 0.0163, 0.1946, 0.1794, 0.2281, 0.0450, 0.6570, 0.1656,
        0.1000, 0.3637, 0.3363, 0.2381, 0.1197, 0.1219], device='cuda:0')
Per class val accuracy:  tensor([0.0493, 0.1739, 0.0195, 0.1395, 0.1505, 0.1913, 0.0946, 0.5838, 0.0968,
        0.0417, 0.3875, 0.3538, 0.2738, 0.1197, 0.0743], device='cuda:0')
Epoch 9 with train loss: 1.077 train accuracy: 73.447 validation accuracy: 72.519
Per class train accuracy:  tensor([0.4599, 0.5993, 0.7016, 0.5447, 0.7644, 0.6350, 0.4730, 0.7870, 0.6473,
        0.5143, 0.8741, 0.8056, 0.8254, 0.7759, 0.9257], device='cuda:0')
Per class val accuracy:  tensor([0.4930, 0.5326, 0.7073, 0.4186, 0.7491, 0.5718, 0.4865, 0.8108, 0.5677,
        0.5417, 0.9031, 0.8206, 0.8571, 0.7787, 0.8971], device='cuda:0')
Our final test accuracy for the GNN is: 73.312
Final per class accuracy on test set:



TRAINING WITH SEED:  1517964140
Epoch 0 with train loss: 2.844 train accuracy: 22.996 validation accuracy: 22.710
Per class train accuracy:  tensor([0.2217, 0.0542, 0.2797, 0.0078, 0.0837, 0.3163, 0.1081, 0.2112, 0.2409,
        0.1857, 0.5012, 0.1838, 0.3968, 0.0988, 0.6057], device='cuda:0')
Per class val accuracy:  tensor([0.2113, 0.0217, 0.2561, 0.0233, 0.1111, 0.3030, 0.0811, 0.2216, 0.2903,
        0.2917, 0.4913, 0.2064, 0.3333, 0.0907, 0.5829], device='cuda:0')
Epoch 9 with train loss: 1.090 train accuracy: 73.001 validation accuracy: 70.911
Per class train accuracy:  tensor([0.5094, 0.6606, 0.7626, 0.3930, 0.7464, 0.6882, 0.5180, 0.8087, 0.6172,
        0.5000, 0.8845, 0.7564, 0.8770, 0.7207, 0.9067], device='cuda:0')
Per class val accuracy:  tensor([0.5352, 0.5435, 0.6976, 0.3140, 0.7168, 0.6355, 0.5270, 0.7946, 0.5677,
        0.4583, 0.8927, 0.7813, 0.8929, 0.7122, 0.9029], device='cuda:0')
Our final test accuracy for the GNN is: 73.883
Final per class accuracy on test set:



TRAINING WITH SEED:  4083009686
Epoch 0 with train loss: 2.916 train accuracy: 20.176 validation accuracy: 19.984
Per class train accuracy:  tensor([0.1486, 0.0758, 0.0992, 0.0428, 0.2093, 0.1635, 0.1982, 0.5614, 0.0043,
        0.0143, 0.2217, 0.3560, 0.0000, 0.1520, 0.4762], device='cuda:0')
Per class val accuracy:  tensor([0.1831, 0.0652, 0.1024, 0.1163, 0.2330, 0.1162, 0.2027, 0.5514, 0.0065,
        0.0000, 0.2422, 0.3268, 0.0000, 0.1536, 0.4857], device='cuda:0')
Epoch 9 with train loss: 1.059 train accuracy: 73.410 validation accuracy: 72.274
Per class train accuracy:  tensor([0.5165, 0.6209, 0.7626, 0.5175, 0.7835, 0.6760, 0.5631, 0.7852, 0.5376,
        0.4429, 0.8776, 0.7966, 0.8214, 0.7291, 0.9067], device='cuda:0')
Per class val accuracy:  tensor([0.5775, 0.5543, 0.7317, 0.4070, 0.7276, 0.6287, 0.5000, 0.8000, 0.4645,
        0.2500, 0.9135, 0.8182, 0.8810, 0.7473, 0.8686], device='cuda:0')
Our final test accuracy for the GNN is: 73.230
Final per class accuracy on test set:



TRAINING WITH SEED:  2455059856
Epoch 0 with train loss: 2.914 train accuracy: 20.504 validation accuracy: 20.802
Per class train accuracy:  tensor([0.1156, 0.2202, 0.1585, 0.1440, 0.2727, 0.1506, 0.0766, 0.0578, 0.1806,
        0.0000, 0.4804, 0.0697, 0.0913, 0.1721, 0.7657], device='cuda:0')
Per class val accuracy:  tensor([0.0986, 0.2065, 0.1488, 0.1744, 0.2652, 0.1526, 0.1081, 0.0865, 0.1419,
        0.0417, 0.5190, 0.0688, 0.0714, 0.1935, 0.6971], device='cuda:0')
Epoch 9 with train loss: 1.064 train accuracy: 73.610 validation accuracy: 71.701
Per class train accuracy:  tensor([0.3373, 0.5704, 0.7577, 0.5564, 0.8361, 0.6738, 0.4640, 0.8159, 0.5957,
        0.4857, 0.8868, 0.8072, 0.8651, 0.7316, 0.9143], device='cuda:0')
Per class val accuracy:  tensor([0.3451, 0.4891, 0.7195, 0.4535, 0.7814, 0.6059, 0.3919, 0.8378, 0.5290,
        0.3333, 0.8997, 0.8034, 0.8929, 0.7533, 0.9086], device='cuda:0')
Our final test accuracy for the GNN is: 73.039
Final per class accuracy on test set:



TRAINING WITH SEED:  400225693
Epoch 0 with train loss: 2.908 train accuracy: 18.821 validation accuracy: 18.593
Per class train accuracy:  tensor([0.0495, 0.1264, 0.2610, 0.2646, 0.0502, 0.2395, 0.0360, 0.1859, 0.1763,
        0.0000, 0.5624, 0.1378, 0.0714, 0.1403, 0.1010], device='cuda:0')
Per class val accuracy:  tensor([0.0563, 0.1739, 0.2341, 0.3256, 0.0394, 0.1891, 0.0405, 0.2162, 0.2000,
        0.0417, 0.5606, 0.1130, 0.0357, 0.1632, 0.1086], device='cuda:0')
Epoch 9 with train loss: 1.071 train accuracy: 73.228 validation accuracy: 71.838
Per class train accuracy:  tensor([0.4198, 0.5776, 0.7049, 0.5564, 0.7787, 0.6601, 0.5045, 0.7996, 0.6409,
        0.3286, 0.8949, 0.7957, 0.8770, 0.7525, 0.9029], device='cuda:0')
Per class val accuracy:  tensor([0.4507, 0.5000, 0.7000, 0.5581, 0.7348, 0.6150, 0.4865, 0.8324, 0.5355,
        0.2917, 0.9100, 0.7715, 0.9048, 0.7545, 0.9029], device='cuda:0')
Our final test accuracy for the GNN is: 73.747
Final per class accuracy on test set: 



TRAINING WITH SEED:  89475662
Epoch 0 with train loss: 2.810 train accuracy: 25.280 validation accuracy: 25.273
Per class train accuracy:  tensor([0.2406, 0.3141, 0.1902, 0.0078, 0.6065, 0.3582, 0.1216, 0.1245, 0.0731,
        0.1000, 0.3788, 0.2018, 0.6468, 0.1463, 0.2648], device='cuda:0')
Per class val accuracy:  tensor([0.2606, 0.3043, 0.1854, 0.0000, 0.5950, 0.3280, 0.0811, 0.1459, 0.0774,
        0.2500, 0.3633, 0.2064, 0.6548, 0.1608, 0.2743], device='cuda:0')
Epoch 9 with train loss: 1.044 train accuracy: 73.638 validation accuracy: 71.429
Per class train accuracy:  tensor([0.5047, 0.6679, 0.7163, 0.5097, 0.8146, 0.6935, 0.5000, 0.7870, 0.5656,
        0.6143, 0.8834, 0.7744, 0.8413, 0.7384, 0.9238], device='cuda:0')
Per class val accuracy:  tensor([0.5704, 0.5217, 0.7049, 0.3372, 0.7491, 0.6082, 0.4865, 0.7892, 0.4710,
        0.4583, 0.8927, 0.7887, 0.8690, 0.7521, 0.8971], device='cuda:0')
Our final test accuracy for the GNN is: 73.829
Final per class accuracy on test set:  



TRAINING WITH SEED:  361232447
Epoch 0 with train loss: 2.803 train accuracy: 25.107 validation accuracy: 24.782
Per class train accuracy:  tensor([0.0731, 0.1191, 0.2764, 0.1556, 0.2572, 0.2563, 0.0225, 0.6877, 0.0753,
        0.1000, 0.3349, 0.2847, 0.7302, 0.1354, 0.3410], device='cuda:0')
Per class val accuracy:  tensor([0.0775, 0.0652, 0.2878, 0.1395, 0.2151, 0.2437, 0.0405, 0.6649, 0.0774,
        0.0833, 0.3114, 0.3317, 0.7738, 0.1282, 0.3371], device='cuda:0')
Epoch 9 with train loss: 1.059 train accuracy: 72.928 validation accuracy: 72.110
Per class train accuracy:  tensor([0.3514, 0.5884, 0.7610, 0.5875, 0.7835, 0.6365, 0.4324, 0.8069, 0.6022,
        0.2714, 0.8649, 0.7998, 0.8373, 0.7521, 0.9200], device='cuda:0')
Per class val accuracy:  tensor([0.4225, 0.4891, 0.7488, 0.5116, 0.7455, 0.6128, 0.4324, 0.8054, 0.5548,
        0.3333, 0.8754, 0.8059, 0.8690, 0.7533, 0.9143], device='cuda:0')
Our final test accuracy for the GNN is: 73.257
Final per class accuracy on test set: 



TRAINING WITH SEED:  3647665043
Epoch 0 with train loss: 2.915 train accuracy: 21.186 validation accuracy: 20.965
Per class train accuracy:  tensor([0.0755, 0.2166, 0.4846, 0.1012, 0.0215, 0.1163, 0.0721, 0.4639, 0.0753,
        0.0571, 0.0704, 0.1731, 0.5873, 0.1463, 0.6648], device='cuda:0')
Per class val accuracy:  tensor([0.0775, 0.1957, 0.4341, 0.1279, 0.0323, 0.1503, 0.0946, 0.4811, 0.1097,
        0.0833, 0.0588, 0.1720, 0.6310, 0.1342, 0.6286], device='cuda:0')
Epoch 9 with train loss: 1.108 train accuracy: 71.891 validation accuracy: 71.183
Per class train accuracy:  tensor([0.3231, 0.5776, 0.7528, 0.6148, 0.7560, 0.6327, 0.4414, 0.7798, 0.5849,
        0.3429, 0.8984, 0.8039, 0.8532, 0.7166, 0.9162], device='cuda:0')
Per class val accuracy:  tensor([0.3944, 0.5109, 0.7488, 0.5000, 0.7276, 0.6036, 0.4459, 0.7838, 0.5290,
        0.3750, 0.9100, 0.7862, 0.8810, 0.7376, 0.8800], device='cuda:0')
Our final test accuracy for the GNN is: 72.277
Final per class accuracy on test set:



TRAINING WITH SEED:  1221215631
Epoch 0 with train loss: 2.859 train accuracy: 19.858 validation accuracy: 19.275
Per class train accuracy:  tensor([0.0401, 0.3394, 0.1528, 0.2062, 0.3313, 0.1498, 0.0450, 0.3249, 0.0710,
        0.0143, 0.4018, 0.2494, 0.2143, 0.1592, 0.0610], device='cuda:0')
Per class val accuracy:  tensor([0.0423, 0.3587, 0.1390, 0.1744, 0.3262, 0.1845, 0.0405, 0.3514, 0.0258,
        0.0000, 0.3841, 0.2555, 0.2619, 0.1330, 0.0286], device='cuda:0')
Epoch 9 with train loss: 1.064 train accuracy: 72.983 validation accuracy: 71.101
Per class train accuracy:  tensor([0.3561, 0.6137, 0.7846, 0.5798, 0.8002, 0.6190, 0.4324, 0.7996, 0.5656,
        0.4000, 0.8949, 0.7998, 0.8254, 0.7428, 0.9029], device='cuda:0')
Per class val accuracy:  tensor([0.4577, 0.5326, 0.7561, 0.5233, 0.7599, 0.5923, 0.3919, 0.8000, 0.5226,
        0.2083, 0.9031, 0.8108, 0.8690, 0.7050, 0.8971], device='cuda:0')
Our final test accuracy for the GNN is: 73.094
Final per class accuracy on test set:



TRAINING WITH SEED:  2036056847
Epoch 0 with train loss: 2.934 train accuracy: 15.010 validation accuracy: 15.185
Per class train accuracy:  tensor([0.1014, 0.3610, 0.2138, 0.1284, 0.0467, 0.1080, 0.1667, 0.1083, 0.1075,
        0.0571, 0.3441, 0.1247, 0.3095, 0.1044, 0.1752], device='cuda:0')
Per class val accuracy:  tensor([0.1056, 0.3913, 0.2585, 0.0465, 0.0573, 0.1185, 0.1486, 0.1081, 0.0710,
        0.0417, 0.3080, 0.1548, 0.2500, 0.1028, 0.1543], device='cuda:0')
Epoch 9 with train loss: 1.094 train accuracy: 72.701 validation accuracy: 70.911
Per class train accuracy:  tensor([0.3703, 0.6462, 0.7683, 0.5486, 0.7464, 0.6677, 0.5541, 0.7762, 0.5785,
        0.4429, 0.8649, 0.7555, 0.8056, 0.7537, 0.8990], device='cuda:0')
Per class val accuracy:  tensor([0.4296, 0.5435, 0.7366, 0.4186, 0.7276, 0.6150, 0.4324, 0.7622, 0.4774,
        0.5417, 0.8824, 0.7543, 0.8571, 0.7630, 0.8800], device='cuda:0')
Our final test accuracy for the GNN is: 72.658
Final per class accuracy on test set:



TRAINING WITH SEED:  1860537279
Epoch 0 with train loss: 2.811 train accuracy: 23.697 validation accuracy: 22.356
Per class train accuracy:  tensor([0.1297, 0.3574, 0.3447, 0.0428, 0.2703, 0.2464, 0.1306, 0.1643, 0.4645,
        0.1714, 0.1189, 0.1009, 0.4762, 0.2418, 0.3276], device='cuda:0')
Per class val accuracy:  tensor([0.1690, 0.3478, 0.2756, 0.0581, 0.2688, 0.2141, 0.0676, 0.1676, 0.3226,
        0.1250, 0.1661, 0.0835, 0.5476, 0.2418, 0.3429], device='cuda:0')
Epoch 9 with train loss: 1.041 train accuracy: 74.229 validation accuracy: 72.219
Per class train accuracy:  tensor([0.4198, 0.5921, 0.7886, 0.5486, 0.8158, 0.6525, 0.5225, 0.8141, 0.6043,
        0.5571, 0.8499, 0.7736, 0.8294, 0.7686, 0.9238], device='cuda:0')
Per class val accuracy:  tensor([0.4859, 0.5109, 0.7610, 0.4535, 0.7706, 0.6241, 0.4459, 0.8216, 0.5290,
        0.4583, 0.8547, 0.7518, 0.8690, 0.7678, 0.8800], device='cuda:0')
Our final test accuracy for the GNN is: 73.529
Final per class accuracy on test set:



TRAINING WITH SEED:  516507873
Epoch 0 with train loss: 2.759 train accuracy: 24.861 validation accuracy: 24.591
Per class train accuracy:  tensor([0.0613, 0.6137, 0.2959, 0.0856, 0.2572, 0.3985, 0.0721, 0.3718, 0.1484,
        0.1857, 0.3164, 0.1616, 0.3968, 0.1798, 0.1733], device='cuda:0')
Per class val accuracy:  tensor([0.0352, 0.5761, 0.2683, 0.0930, 0.2796, 0.3326, 0.0541, 0.3459, 0.2129,
        0.1667, 0.3875, 0.1572, 0.3929, 0.1862, 0.1943], device='cuda:0')
Epoch 9 with train loss: 1.053 train accuracy: 72.828 validation accuracy: 71.265
Per class train accuracy:  tensor([0.4245, 0.6137, 0.7577, 0.5370, 0.7847, 0.6958, 0.3829, 0.7816, 0.6581,
        0.4571, 0.8788, 0.7539, 0.8135, 0.7251, 0.9048], device='cuda:0')
Per class val accuracy:  tensor([0.5070, 0.4783, 0.7220, 0.4535, 0.7133, 0.6583, 0.4054, 0.7946, 0.6258,
        0.2917, 0.8893, 0.7690, 0.8810, 0.7170, 0.8971], device='cuda:0')
Our final test accuracy for the GNN is: 73.856
Final per class accuracy on test set: 



TRAINING WITH SEED:  3692371949
Epoch 0 with train loss: 2.856 train accuracy: 22.960 validation accuracy: 23.146
Per class train accuracy:  tensor([0.0802, 0.4007, 0.2488, 0.0973, 0.0574, 0.2236, 0.0450, 0.0271, 0.0753,
        0.0143, 0.4122, 0.3224, 0.5476, 0.1697, 0.6400], device='cuda:0')
Per class val accuracy:  tensor([0.1056, 0.4022, 0.2463, 0.1163, 0.0681, 0.2460, 0.1081, 0.0324, 0.0645,
        0.0000, 0.3737, 0.3391, 0.5357, 0.1644, 0.6171], device='cuda:0')
Epoch 9 with train loss: 1.048 train accuracy: 73.465 validation accuracy: 72.056
Per class train accuracy:  tensor([0.4410, 0.5668, 0.7480, 0.5409, 0.7799, 0.7065, 0.5045, 0.8087, 0.6172,
        0.4429, 0.8764, 0.7186, 0.8333, 0.7642, 0.9010], device='cuda:0')
Per class val accuracy:  tensor([0.4789, 0.5652, 0.7171, 0.4767, 0.7419, 0.6629, 0.4324, 0.7838, 0.5419,
        0.3750, 0.8858, 0.7248, 0.8690, 0.7775, 0.8743], device='cuda:0')
Our final test accuracy for the GNN is: 74.428
Final per class accuracy on test set:



TRAINING WITH SEED:  3300171104
Epoch 0 with train loss: 2.963 train accuracy: 16.128 validation accuracy: 16.685
Per class train accuracy:  tensor([0.1085, 0.1913, 0.0740, 0.2179, 0.2153, 0.0677, 0.1486, 0.3899, 0.1183,
        0.0143, 0.1882, 0.1116, 0.2937, 0.1209, 0.5333], device='cuda:0')
Per class val accuracy:  tensor([0.1408, 0.2826, 0.1024, 0.2558, 0.2043, 0.0569, 0.1486, 0.3838, 0.1548,
        0.1667, 0.2215, 0.0958, 0.3333, 0.1149, 0.4800], device='cuda:0')
Epoch 9 with train loss: 1.093 train accuracy: 72.428 validation accuracy: 71.647
Per class train accuracy:  tensor([0.4245, 0.6173, 0.7431, 0.6070, 0.7883, 0.6555, 0.5360, 0.7563, 0.6000,
        0.4143, 0.8811, 0.7941, 0.8690, 0.7037, 0.9105], device='cuda:0')
Per class val accuracy:  tensor([0.4507, 0.5326, 0.7341, 0.4884, 0.7312, 0.6355, 0.4865, 0.7892, 0.6129,
        0.2917, 0.8927, 0.8108, 0.8690, 0.7122, 0.8857], device='cuda:0')
Our final test accuracy for the GNN is: 72.141
Final per class accuracy on test set:



TRAINING WITH SEED:  2794978777
Epoch 0 with train loss: 2.775 train accuracy: 26.453 validation accuracy: 26.145
Per class train accuracy:  tensor([0.0896, 0.5090, 0.2187, 0.1984, 0.1603, 0.1787, 0.0946, 0.6480, 0.1011,
        0.0286, 0.5162, 0.2707, 0.6944, 0.1685, 0.4590], device='cuda:0')
Per class val accuracy:  tensor([0.0986, 0.4891, 0.1854, 0.1512, 0.1541, 0.1549, 0.0270, 0.6270, 0.0903,
        0.0000, 0.5606, 0.2604, 0.7143, 0.1850, 0.4971], device='cuda:0')
Epoch 9 with train loss: 1.095 train accuracy: 71.673 validation accuracy: 70.311
Per class train accuracy:  tensor([0.3892, 0.6462, 0.7228, 0.5681, 0.7847, 0.6312, 0.4730, 0.8177, 0.5591,
        0.3429, 0.8661, 0.7801, 0.8214, 0.7170, 0.9238], device='cuda:0')
Per class val accuracy:  tensor([0.4085, 0.5652, 0.7195, 0.5233, 0.7491, 0.5763, 0.4189, 0.8054, 0.4645,
        0.2917, 0.8789, 0.8108, 0.8571, 0.7231, 0.8800], device='cuda:0')
Our final test accuracy for the GNN is: 70.752
Final per class accuracy on test set:



TRAINING WITH SEED:  3303475786
Epoch 0 with train loss: 2.883 train accuracy: 21.432 validation accuracy: 20.447
Per class train accuracy:  tensor([0.1415, 0.0469, 0.1976, 0.1440, 0.3170, 0.2662, 0.0946, 0.1047, 0.0344,
        0.2714, 0.4815, 0.2330, 0.0000, 0.1217, 0.5162], device='cuda:0')
Per class val accuracy:  tensor([0.1479, 0.0652, 0.1732, 0.1163, 0.2688, 0.2574, 0.0405, 0.1351, 0.0387,
        0.1250, 0.5225, 0.2211, 0.0000, 0.1125, 0.4743], device='cuda:0')
Epoch 9 with train loss: 1.063 train accuracy: 73.510 validation accuracy: 71.565
Per class train accuracy:  tensor([0.4292, 0.6498, 0.8000, 0.5486, 0.7656, 0.6646, 0.3874, 0.7780, 0.6151,
        0.5571, 0.8845, 0.7974, 0.7976, 0.7328, 0.9162], device='cuda:0')
Per class val accuracy:  tensor([0.4789, 0.5543, 0.7659, 0.5233, 0.7384, 0.6173, 0.3649, 0.7784, 0.5161,
        0.2500, 0.8962, 0.7838, 0.8571, 0.7304, 0.9086], device='cuda:0')
Our final test accuracy for the GNN is: 72.958
Final per class accuracy on test set:



TRAINING WITH SEED:  2952735006
Epoch 0 with train loss: 2.938 train accuracy: 18.130 validation accuracy: 17.121
Per class train accuracy:  tensor([0.0731, 0.5415, 0.0634, 0.1751, 0.2667, 0.1970, 0.1622, 0.0686, 0.2624,
        0.0286, 0.1224, 0.0861, 0.6667, 0.1024, 0.7162], device='cuda:0')
Per class val accuracy:  tensor([0.0845, 0.4239, 0.0659, 0.1744, 0.2760, 0.1526, 0.1622, 0.0865, 0.2194,
        0.0000, 0.1176, 0.1007, 0.7381, 0.0859, 0.6914], device='cuda:0')
Epoch 9 with train loss: 1.133 train accuracy: 71.546 validation accuracy: 70.256
Per class train accuracy:  tensor([0.4080, 0.6282, 0.6976, 0.6187, 0.8134, 0.6829, 0.4144, 0.7834, 0.5871,
        0.5571, 0.8695, 0.7646, 0.8333, 0.6888, 0.9162], device='cuda:0')
Per class val accuracy:  tensor([0.4718, 0.5217, 0.7073, 0.5116, 0.7563, 0.6446, 0.3649, 0.8054, 0.4903,
        0.5000, 0.8685, 0.7715, 0.8690, 0.6941, 0.9029], device='cuda:0')
Our final test accuracy for the GNN is: 72.440
Final per class accuracy on test set:



# Node2Vec

In [None]:
from torch_geometric.nn import Node2Vec
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

data_name = "Arxiv"

# Get masks and training labels for each split
if data_name == "Cora":
  num_classes = 7
  data = cora_data
  # Get the edge indices and node features for our model
  edge_indices = data.edge_index
  node_features = data.x
  # CHANGE: To name of model being tested
  filename =  "Node2Vec_Cora"
  train_mask = data.train_mask
  train_y = data.y[train_mask]
  valid_mask = data.val_mask
  valid_y = data.y[valid_mask]
  test_mask = data.test_mask
  test_y = data.y[test_mask]
elif data_name == "Coauthor":
  data = cs_data
  # Get the edge indices and node features for our model
  edge_indices = data.edge_index
  node_features = data.x
  num_classes = 15
  filename =  "Node2Vec_Coauthor_CS"
  train_mask = train_mask_cs
  train_y = data.y[train_mask]
  valid_mask = val_mask_cs
  valid_y = data.y[valid_mask]
  test_mask = test_mask_cs
  test_y = data.y[test_mask]
elif data_name == "Arxiv":
  data = arxiv_data
  edge_indices = arxiv_data.edge_index
  node_features = arxiv_data.x
  neighbour_dataset = arxiv_data

  # Get masks and training labels for each split
  train_mask = train_idx
  train_y = arxiv_data.y[train_mask]
  valid_mask = valid_idx
  valid_y = arxiv_data.y[valid_mask]
  test_mask = test_idx
  test_y = arxiv_data.y[test_mask]

  num_classes = 40
  is_cora = False

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# use 30 seeds which have been randomly generated using seed_list = [np.random.randint(4294967296 - 1) for i in range(30)]
seeds = [4193977854, 1863727779, 170173784, 2342954646, 116846604, 2105922959, 2739899259, 1024258131, 806299656, 880019963, 1818027900, 2135956485, 3710910636, 1517964140, 4083009686, 2455059856, 400225693, 89475662, 361232447, 3647665043, 1221215631, 2036056847, 1860537279, 516507873, 3692371949, 3300171104, 2794978777, 3303475786, 2952735006, 572297925]

# create folder for saving all model info into if it does not exist already
if not os.path.exists(file_path+filename+"/"):
  os.mkdir(file_path+filename+"/")

filename = filename + "/" + filename

for seed in seeds:
  set_seeds(seed)
  # Create the model
  #model = GATModelWrapper(in_channels = node_features.shape[-1], hidden_channels = node_features.shape[-1], num_layers=1, out_channels=num_classes, v2=True)
  model = Node2VecWrapper(data.edge_index.to(device), embedding_size=128, walk_length=20,
                     context_size=10, walks_per_node=10,
                     num_negative_samples=1, p=1, q=1, sparse=True, out_channels=num_classes).to(device)
  loader = model.loader(batch_size=128, shuffle=True,
                      num_workers=0)
  optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=0.01)

  def train():
    model.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
      optimizer.zero_grad()
      loss = model.loss(pos_rw.to(device), neg_rw.to(device))
      loss.backward()
      optimizer.step()
      total_loss += loss.item()
    return total_loss / len(loader)

  @torch.no_grad()
  def find_model_acc(model, train_z, train_y, test_z, test_y, solver: str = 'lbfgs', multi_class: str = 'auto', *args, **kwargs):
    pred_y = model.test(train_z, train_y, test_z, test_y, solver=solver, multi_class=multi_class, *args, **kwargs)
    acc = accuracy_score(test_y.detach().cpu().numpy(), pred_y)
    matrix = confusion_matrix(test_y.detach().cpu().numpy(), pred_y)
    per_class_acc = matrix.diagonal()/matrix.sum(axis=1)
    #print(m)
    #report = classification_report(test_y.detach().cpu().numpy(), pred_y)
    #print(report)
    return acc, per_class_acc

  @torch.no_grad()
  def test():
    model.eval()
    
    pred, z = model()
    acc_train, per_class_train_acc = find_model_acc(model, z[train_mask], data.y[train_mask],
                      z[train_mask], data.y[train_mask])
  
    acc_val, per_class_val_acc = find_model_acc(model, z[train_mask], data.y[train_mask],
                      z[valid_mask], data.y[valid_mask])

    acc_test, per_class_test_acc = find_model_acc(model, z[train_mask], data.y[train_mask],
                      z[test_mask], data.y[test_mask])

    return z, acc_train, per_class_train_acc, acc_val, per_class_val_acc, acc_test, per_class_test_acc

  training_stats = None
  for epoch in range(0, 10):
    loss = train()
    node_embeddings, acc_train, per_class_train_acc, acc_val, per_class_val_acc, acc_test, per_class_test_acc = test()
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Acc_train: {acc_train:.4f}, Acc_val: {acc_val:.4f}, Acc_test: {acc_test:.4f}')
    print(f'Per class train accuracy: ', per_class_train_acc)
    epoch_stats = {'train_acc': acc_train, 'val_acc': acc_val, 'test_acc': acc_test, 'epoch':epoch}
    training_stats = update_stats(training_stats, epoch_stats)
  
  # Save training stats if on final iteration of the run
  save_training_info(training_stats, node_embeddings, filename+"_"+str(seed))
  # Save final results
  final_results_list = [seed, acc_test, per_class_test_acc, per_class_train_acc, per_class_val_acc]
  save_final_results(final_results_list, filename)
  # Save final model weights incase we want to do further inference later
  torch.save(model.state_dict(), file_path+filename+"_" + str(seed) + "_model.pt")

  plot_stats(training_stats, name=filename)

# Similarity tests

https://github.com/SGDE2020/embedding_stability/blob/master/similarity_tests/similarity_tests.py