In [1]:
import numpy as np

import torch
from sklearn.metrics import mean_squared_error
from torch_geometric.loader import DataLoader

from utils import set_all_seeds

from exploration.dataset import PyGAcademicGraph

set_all_seeds(seed=4)

In [2]:
batch_size = 1

# train dataset
train_dataset = PyGAcademicGraph(split="train", setting="transductive")
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)

# val dataset
val_dataset = PyGAcademicGraph(split="val", setting="transductive")
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

# test dataset
test_dataset = PyGAcademicGraph(split="test", setting="transductive")
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

# number of features
n_features = train_dataset[0].x.shape[1]

print("Number of samples in the train dataset: ", len(train_dataset))
print("Number of samples in the val dataset: ", len(test_dataset))
print("Number of samples in the test dataset: ", len(test_dataset))
print("Output of one sample from the train dataset: ", train_dataset[0])
print("Edge_index :")
print(train_dataset[0].edge_index)
print("Number of features per node: ", n_features)

Number of samples in the train dataset:  9
Number of samples in the val dataset:  9
Number of samples in the test dataset:  9
Output of one sample from the train dataset:  Data(edge_index=[2, 36936], y=[359], x=[359, 8], edge_attr=[36936, 3], domain='Academia', train_mask=[359], val_mask=[359], test_mask=[359], mask=[359])
Edge_index :
tensor([[  0,   0,   0,  ..., 357, 358, 358],
        [  1,  89, 121,  ..., 352,  89, 358]])
Number of features per node:  8


In [3]:
# calculate average on training set
total_sum = 0
total_num = 0
for i, train_batch in enumerate(train_dataloader):
    mask = train_batch.mask
    total_sum += torch.sum(train_batch.y[mask])
    total_num += torch.sum((mask == True).float())

average = (total_sum / total_num).item()

In [4]:
def evaluate_average(average, dataloader):
    all_predictions, all_labels = [], []

    for i, batch in enumerate(dataloader):
        mask = batch.mask
        num = torch.sum((mask == True).int())
        all_predictions.append(np.ones(num) * average)
        all_labels.append(batch.y[mask].detach().cpu().numpy())
    
    all_predictions = np.concatenate(all_predictions)
    all_labels = np.concatenate(all_labels)
    return mean_squared_error(all_labels, all_predictions)

In [5]:

MSE_test = evaluate_average(average, test_dataloader)
print("Average : MSE on the test set: {:.4f}".format(MSE_test))

Average : MSE on the test set: 0.0381
