In [None]:
import torch

TORCH_version = torch.__version__
TORCH = TORCH_version.split('+')[0]
CUDA_version = torch.version.cuda
CUDA = "cu" + CUDA_version.replace('.', '')

In [None]:
%%capture
!pip install torch-scatter==latest+{CUDA}     -f https://pytorch-geometric.com/whl/torch-{TORCH}.html
!pip install torch-sparse==latest+{CUDA}      -f https://pytorch-geometric.com/whl/torch-{TORCH}.html
!pip install torch-cluster==latest+{CUDA}     -f https://pytorch-geometric.com/whl/torch-{TORCH}.html
!pip install torch-spline-conv==latest+{CUDA} -f https://pytorch-geometric.com/whl/torch-{TORCH}.html
!pip install torch-geometric

In [None]:
import numpy as np
import pandas as pd
from plotnine import ggplot, geom_line, aes, xlab, theme, element_blank, ggtitle
import scipy.sparse as sparse
from sklearn.model_selection import KFold
import torch
import torch.nn as nn

from gcn.model import TwoLayerGCN, GCN, TwoLayerChebNet
from gcn.trainer import Trainer, RunConfig
from gcn.utils import Dataset, load_data, set_labels

In [None]:
# important for reproducibility!
def set_seed(seed=1):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

In [None]:
# training parameters, there is no batch size as we use the whole set in each iteration
run_config = RunConfig(
    learning_rate=0.1,
    num_epochs=200,
    weight_decay=5e-4,
    output_dir="/content/gcn-training/"
)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def evaluate_gcn_on_dataset(dataset: Dataset, iter = 1):
    set_seed()
    features, labels, train_labels, val_labels, test_labels, adjacency_matrix, \
    laplacian_matrix, num_classes = load_data(dataset)
    accuracies = []

    for i in range(iter):
        model = TwoLayerGCN(
            input_size=features.size(1),
            hidden_size=16,
            output_size=num_classes,
            dropout=0.5
        )
        trainer = Trainer(model)
        trainer.train(features, train_labels, val_labels, adjacency_matrix, device, run_config, log=False)

        _, accuracy = trainer.evaluate(features, test_labels, adjacency_matrix, device)
        accuracies.append(accuracy)
    print(f"\nPerformance on {dataset.name}:\n- test accuracy = {np.mean(accuracies):.3f} +- {np.std(accuracies):.3f}\n")

def evaluate_chebnet_on_dataset(dataset: Dataset, k = 2, iter = 1):
    set_seed()
    features, labels, train_labels, val_labels, test_labels, adjacency_matrix, \
    laplacian_matrix, num_classes = load_data(dataset)
    accuracies = []

    for i in range(iter):
        model = TwoLayerChebNet(
            input_size=features.size(1),
            hidden_size=16,
            output_size=num_classes,
            dropout=0.5,
            k=k
        )

        trainer = Trainer(model)
        trainer.train(features, train_labels, val_labels, laplacian_matrix, device, run_config, log=False)

        _ , accuracy = trainer.evaluate(features, test_labels, laplacian_matrix, device)
        accuracies.append(accuracy)
    print(f"\nPerformance on {dataset.name}:\n- test accuracy = {np.mean(accuracies):.3f} +- {np.std(accuracies):.3f}\n")

In [None]:
evaluate_gcn_on_dataset(Dataset.Cora)  # iter=100 to get uncertainty reported
evaluate_gcn_on_dataset(Dataset.CiteSeer)
evaluate_gcn_on_dataset(Dataset.PubMed)