In [1]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# change the working directory to the Drive root
%cd /content/drive/My\ Drive

/content/drive/My Drive


In [3]:
import torch

import numpy as np
import scipy.sparse as sparse


def accuracy(output, labels):
    y_pred = output.max(1)[1].type_as(labels)
    correct = y_pred.eq(labels).double()
    correct = correct.sum()
    return correct / len(labels)


def prepare_dataset(labels, num_classes, config):
    """ Splits the loaded dataset into train/validation/test sets. """
    if not config.follow_paper:
        # Follow train/val/test indices as in the official implementation
        # on GitHub: https://github.com/tkipf/pygcn
        train_set = range(140)
        validation_set = range(200, 500)
        test_set = range(500, 1500)
    else:
        # https://arxiv.org/pdf/1609.02907.pdf
        # The original paper proposes that the training set is composed
        # out of 20 samples per class -> 140 samples, but the indices
        # above (range(140)) do not contain 20 samples per class
        # The remaining val/test indices were selected empirically
        classes = [ind for ind in range(num_classes)]
        train_set = []

        # Construct train set (indices) out of 20 samples per each class
        for class_label in classes:
            target_indices = torch.nonzero(labels == class_label, as_tuple=False).tolist()
            train_set += [ind[0] for ind in target_indices[:config.train_size_per_class]]

        # Extract the remaining samples
        validation_test_set = [ind for ind in range(len(labels)) if ind not in train_set]
        # Split the remaining samples into validation/test set
        validation_set = validation_test_set[:config.validation_size]
        test_set = validation_test_set[config.validation_size:config.validation_size+config.test_size]

    return train_set, validation_set, test_set


def enumerate_labels(labels):
    """ Converts the labels from the original
        string form to the integer [0:MaxLabels-1]
    """
    unique = list(set(labels))
    labels = np.array([unique.index(label) for label in labels])
    return labels


def normalize_adjacency(adj):
    """ Normalizes the adjacency matrix according to the
        paper by Kipf et al.
        https://arxiv.org/pdf/1609.02907.pdf
    """
    adj = adj + sparse.eye(adj.shape[0])

    node_degrees = np.array(adj.sum(1))
    node_degrees = np.power(node_degrees, -0.5).flatten()
    node_degrees[np.isinf(node_degrees)] = 0.0
    node_degrees[np.isnan(node_degrees)] = 0.0
    degree_matrix = sparse.diags(node_degrees, dtype=np.float32)

    adj = degree_matrix @ adj @ degree_matrix
    return adj


def convert_scipy_to_torch_sparse(matrix):
    matrix_helper_coo = matrix.tocoo().astype('float32')
    data = torch.FloatTensor(matrix_helper_coo.data)
    rows = torch.LongTensor(matrix_helper_coo.row)
    cols = torch.LongTensor(matrix_helper_coo.col)
    indices = torch.vstack([rows, cols])

    shape = torch.Size(matrix_helper_coo.shape)
    matrix = torch.sparse.FloatTensor(indices, data, shape)
    return matrix


def load_data(config):
    """ Loads the graph data and stores them using
        efficient sparse matrices approach.
    """
    print("Loading Cora dataset...")
    ###############################
    # Loading Graph Nodes Data
    ###############################
    raw_nodes_data = np.genfromtxt(config.nodes_path, dtype="str")
    raw_node_ids = raw_nodes_data[:, 0].astype('int32')  # unique identifier of each node
    raw_node_labels = raw_nodes_data[:, -1]
    labels_enumerated = enumerate_labels(raw_node_labels)  # target labels as integers
    node_features = sparse.csr_matrix(raw_nodes_data[:, 1:-1], dtype="float32")

    ################################
    # Loading Graph Structure Data
    ################################
    ids_ordered = {raw_id: order for order, raw_id in enumerate(raw_node_ids)}
    raw_edges_data = np.genfromtxt(config.edges_path, dtype="int32")
    edges_ordered = np.array(list(map(ids_ordered.get, raw_edges_data.flatten())),
                             dtype='int32').reshape(raw_edges_data.shape)
    ####################
    # ADJACENCY MATRIX
    ####################
    adj = sparse.coo_matrix((np.ones(edges_ordered.shape[0]), (edges_ordered[:, 0], edges_ordered[:, 1])),
                            shape=(labels_enumerated.shape[0], labels_enumerated.shape[0]),
                            dtype=np.float32)
    # Make the adjacency matrix symmetric
    adj = adj + adj.T.multiply(adj.T > adj)
    adj = normalize_adjacency(adj)

    ####################################
    # Adapt the data to PyTorch format
    ####################################
    features = torch.FloatTensor(node_features.toarray())
    labels = torch.LongTensor(labels_enumerated)
    adj = convert_scipy_to_torch_sparse(adj)

    print("Dataset loaded.")

    return features, labels, adj, edges_ordered

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class GCNLayer(nn.Module):
    def __init__(self, in_features, out_features, use_bias=True):
        super(GCNLayer, self).__init__()
        self.weight = nn.Parameter(torch.FloatTensor(torch.zeros(size=(in_features, out_features))))
        if use_bias:
            self.bias = nn.Parameter(torch.FloatTensor(torch.zeros(size=(out_features,))))
        else:
            self.register_parameter('bias', None)

        self.initialize_weights()

    def initialize_weights(self):
        nn.init.xavier_uniform_(self.weight)
        if self.bias is not None:
            nn.init.zeros_(self.bias)

    def forward(self, x, adj):
        x = x @ self.weight
        if self.bias is not None:
            x += self.bias

        return torch.sparse.mm(adj, x)


class GCN(nn.Module):
    def __init__(self, node_features, hidden_dim, num_classes, dropout, use_bias=True):
        super(GCN, self).__init__()
        self.gcn_1 = GCNLayer(node_features, hidden_dim, use_bias)
        self.gcn_2 = GCNLayer(hidden_dim, num_classes, use_bias)
        self.dropout = nn.Dropout(p=dropout)

    def initialize_weights(self):
        self.gcn_1.initialize_weights()
        self.gcn_2.initialize_weights()

    def forward(self, x, adj):
        x = F.relu(self.gcn_1(x, adj))
        x = self.dropout(x)
        x = self.gcn_2(x, adj)
        return x

In [46]:
import torch
import torch.nn as nn

import numpy as np
import time


def training_loop(model, features, labels, adj, train_set_ind, val_set_ind, config):
    if config.cuda:
        model.cuda()
        adj = adj.cuda()
        features = features.cuda()
        labels = labels.cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr,
                                 weight_decay=config.weight_decay)
    criterion = nn.CrossEntropyLoss()

    validation_acc = []
    validation_loss = []

    if config.use_early_stopping:
        last_min_val_loss = float('inf')
        patience_counter = 0
        stopped_early = False

    t_start = time.time()
    for epoch in range(config.epochs):
        optimizer.zero_grad()
        model.train()

        y_pred = model(features, adj)
        train_loss = criterion(y_pred[train_set_ind], labels[train_set_ind])
        train_acc = accuracy(y_pred[train_set_ind], labels[train_set_ind])
        train_loss.backward()
        optimizer.step()

        with torch.no_grad():
            model.eval()
            val_loss = criterion(y_pred[val_set_ind], labels[val_set_ind])
            val_acc = accuracy(y_pred[val_set_ind], labels[val_set_ind])

            validation_loss.append(val_loss.item())
            validation_acc.append(val_acc)

            if config.use_early_stopping:
                if val_loss < last_min_val_loss:
                    last_min_val_loss = val_loss
                    patience_counter = 0
                else:
                    patience_counter += 1
                    if patience_counter == config.patience:
                        stopped_early = True
                        t_end = time.time()

        #if not config.multiple_runs:
        print(" | ".join([f"Epoch: {epoch:4d}", f"Train loss: {train_loss.item():.4f}",
                          f"Train acc: {train_acc:.4f}",
                          f"Val loss: {val_loss.item():.4f}",
                          f"Val acc: {val_acc:.4f}"]))

        if config.use_early_stopping and stopped_early:
            break

    if (not config.multiple_runs) and config.use_early_stopping and stopped_early:
        print(f"EARLY STOPPING condition met. Stopped at epoch: {epoch}.")
    else:
        t_end = time.time()

    if not config.multiple_runs:
        print(f"Total training time: {t_end-t_start:.4f} seconds")

    return validation_acc, validation_loss


def evaluate_on_test(model, features, labels, adj, test_ind, config):
    if config.cuda:
        model.cuda()
        adj = adj.cuda()
        features = features.cuda()
        labels = labels.cuda()

    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        model.eval()
        y_pred = model(features, adj)
        test_loss = criterion(y_pred[test_ind], labels[test_ind])
        test_acc = accuracy(y_pred[test_ind], labels[test_ind])

    print()
    print(f"Test loss: {test_loss:.4f}  |  Test acc: {test_acc:.4f}")
    if not config.multiple_runs:
        return y_pred
    else:
        return test_acc.item(), test_loss.item()


def multiple_runs(model, features, labels, adj, indices, config, training_loop, evaluate_on_test):
    train_set_ind, val_set_ind, test_set_ind = indices
    acc = []
    loss = []

    t1 = time.time()
    for i in range(config.num_of_runs):
        print("Run:", i+1)
        model.initialize_weights()
        training_loop(model, features, labels, adj,
                      train_set_ind, val_set_ind, config)

        acc_curr, loss_curr = evaluate_on_test(model, features, labels,
                                               adj, test_set_ind, config)
        acc.append(acc_curr)
        loss.append(loss_curr)

    print()
    print(f"ACC:  mean: {np.mean(acc):.4f} | std: {np.std(acc):.4f}")
    print(f"LOSS: mean: {np.mean(loss):.4f} | std: {np.std(loss):.4f}")
    print(f"Total training time: {time.time()-t1:.4f} seconds")
    return loss, acc, np.mean(loss), np.std(loss), np.mean(acc), np.std(acc)

In [None]:
# DO NOT RUN THIS
import torch
from argparse import ArgumentParser

use_cuda = torch.cuda.is_available()

parser = ArgumentParser()
parser.add_argument("--cuda", type=bool, default=use_cuda)
parser.add_argument("--nodes_path", type=str, default="../data/cora.content")
parser.add_argument("--edges_path", type=str, default="../data/cora.cites")
parser.add_argument("--hidden_dim", type=int, default=16)
parser.add_argument("--dropout", type=float, default=0.5)
parser.add_argument("--use_bias", type=bool, default=True)
parser.add_argument("--train_size_per_class", type=int, default=20)
parser.add_argument("--validation_size", type=int, default=500)
parser.add_argument("--test_size", type=int, default=1000)
parser.add_argument("--lr", type=float, default=1e-2)
parser.add_argument("--weight_decay", type=float, default=5e-3)
parser.add_argument("--patience", type=int, default=10)
parser.add_argument("--epochs", type=int, default=200)
parser.add_argument("--use_early_stopping", type=bool, default=True)
parser.add_argument("--multiple_runs", type=bool, default=False)
parser.add_argument("--num_of_runs", type=int, default=100)
parser.add_argument("--follow_paper", type=bool, default=True)

config = parser.parse_args()
if __name__ == "__main__":
    features, labels, adj, edges = load_data(config)
    NUM_CLASSES = int(labels.max().item() + 1)

    train_set_ind, val_set_ind, test_set_ind = prepare_dataset(labels, NUM_CLASSES, config)

    model = GCN(features.shape[1], config.hidden_dim,
                NUM_CLASSES, config.dropout, config.use_bias)

    if not config.multiple_runs:
        print("Started training with 1 run.",
              f"Early stopping: {'Yes' if config.use_early_stopping else 'No'}")
        val_acc, val_loss = training_loop(model, features, labels, adj, train_set_ind, val_set_ind, config)
        out_features = evaluate_on_test(model, features, labels, adj, test_set_ind, config)

    else:
        print(f"Started training with {config.num_of_runs} runs.",
              f"Early stopping: {'Yes' if config.use_early_stopping else 'No'}")
        multiple_runs(model, features, labels, adj,
                      [train_set_ind, val_set_ind, test_set_ind],
                      config, training_loop, evaluate_on_test)

In [138]:
import torch

use_cuda = torch.cuda.is_available()

class Config:
    def __init__(self, cuda=use_cuda, nodes_path="./datasets/cora/cora.content", edges_path="./datasets/cora/cora.cites",
                 hidden_dim=16, dropout=0.5, use_bias=True, train_size_per_class=20, validation_size=500,
                 test_size=1000, lr=1e-2, weight_decay=5e-3, patience=100, epochs=200, use_early_stopping=True,
                 multiple_runs=True, num_of_runs=10, follow_paper=True):
        self.cuda = cuda
        self.nodes_path = nodes_path
        self.edges_path = edges_path
        self.hidden_dim = hidden_dim
        self.dropout = dropout
        self.use_bias = use_bias
        self.train_size_per_class = train_size_per_class
        self.validation_size = validation_size
        self.test_size = test_size
        self.lr = lr
        self.weight_decay = weight_decay
        self.patience = patience
        self.epochs = epochs
        self.use_early_stopping = use_early_stopping
        self.multiple_runs = multiple_runs
        self.num_of_runs = num_of_runs
        self.follow_paper = follow_paper

config = Config()

features, labels, adj, edges = load_data(config)
NUM_CLASSES = int(labels.max().item() + 1)

train_set_ind, val_set_ind, test_set_ind = prepare_dataset(labels, NUM_CLASSES, config)

model = GCN(features.shape[1], config.hidden_dim, NUM_CLASSES, config.dropout, config.use_bias)

if not config.multiple_runs:
    print("Started training with 1 run.", f"Early stopping: {'Yes' if config.use_early_stopping else 'No'}")
    val_acc, val_loss = training_loop(model, features, labels, adj, train_set_ind, val_set_ind, config)
    out_features = evaluate_on_test(model, features, labels, adj, test_set_ind, config)
else:
    print(f"Started training with {config.num_of_runs} runs.", f"Early stopping: {'Yes' if config.use_early_stopping else 'No'}")
    loss_cora, acc_cora, loss_mean_cora, loss_std_cora, acc_mean_cora, acc_std_cora = multiple_runs(model, features, labels, adj, [train_set_ind, val_set_ind, test_set_ind], config, training_loop, evaluate_on_test)

Loading Cora dataset...
Dataset loaded.
Started training with 10 runs. Early stopping: Yes
Run: 1
Epoch:    0 | Train loss: 1.9500 | Train acc: 0.1214 | Val loss: 1.9530 | Val acc: 0.0940
Epoch:    1 | Train loss: 1.8676 | Train acc: 0.3286 | Val loss: 1.9203 | Val acc: 0.1420
Epoch:    2 | Train loss: 1.7667 | Train acc: 0.3429 | Val loss: 1.8703 | Val acc: 0.1620
Epoch:    3 | Train loss: 1.6751 | Train acc: 0.4500 | Val loss: 1.8239 | Val acc: 0.2260
Epoch:    4 | Train loss: 1.5582 | Train acc: 0.5857 | Val loss: 1.7547 | Val acc: 0.2760
Epoch:    5 | Train loss: 1.4493 | Train acc: 0.6571 | Val loss: 1.6671 | Val acc: 0.3720
Epoch:    6 | Train loss: 1.3365 | Train acc: 0.7143 | Val loss: 1.5844 | Val acc: 0.4640
Epoch:    7 | Train loss: 1.2294 | Train acc: 0.7714 | Val loss: 1.5439 | Val acc: 0.4980
Epoch:    8 | Train loss: 1.1347 | Train acc: 0.7929 | Val loss: 1.4736 | Val acc: 0.5480
Epoch:    9 | Train loss: 1.0646 | Train acc: 0.8429 | Val loss: 1.3858 | Val acc: 0.6360
Ep

In [139]:
print(f'GCN test loss:\t\t{loss_mean_cora:.4f} +/- {loss_std_cora:.4f}')
print(f'GCN test accuracy: \t{acc_mean_cora:.4f} +/- {acc_std_cora:.4f}')

GCN test loss:		0.6502 +/- 0.0128
GCN test accuracy: 	0.8074 +/- 0.0079


In [140]:
loss_cora.append(loss_mean_cora)
loss_cora.append(loss_std_cora)
acc_cora.append(acc_mean_cora)
acc_cora.append(acc_std_cora)

In [145]:
from IPython.display import display, HTML
import pandas as pd

df = pd.DataFrame({'Run # / Stat' : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 'Mean', 'Standard Deviation'],
                   'GCN Cora Test Loss' : loss_cora ,
                   'GCN Cora Test Accuracy' : acc_cora})
#display(df)
display(HTML(df.to_html(index=False)))

Run # / Stat,GCN Cora Test Loss,GCN Cora Test Accuracy
1,0.633784,0.826
2,0.658846,0.793
3,0.675436,0.801
4,0.644365,0.809
5,0.651475,0.805
6,0.654251,0.811
7,0.655056,0.808
8,0.658631,0.805
9,0.630789,0.807
10,0.639013,0.809


In [22]:
from IPython.display import display, HTML
import pandas as pd

df = pd.DataFrame({'Iteration #' : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 'Mean', 'Standard Deviation'],
                   'Test Loss' : [ 0.0619, 0.0638, 0.0563, 0.0717, 0.0623, 0.0619, 0.0622, 0.0632, 0.0618, 0.0639, loss_mean.item(), loss_std.item() ],
                   'Test Accuracy' : [0.9765, 0.9735, 0.9773, 0.9724, 0.9751, 0.9761, 0.9745, 0.9770, 0.9740, 0.9751, score_mean.item(), score_std.item()]})
#display(df)
display(HTML(df.to_html(index=False)))

Iteration #,Test Loss,Test Accuracy
1,0.0619,0.9765
2,0.0638,0.9735
3,0.0563,0.9773
4,0.0717,0.9724
5,0.0623,0.9751
6,0.0619,0.9761
7,0.0622,0.9745
8,0.0632,0.977
9,0.0618,0.974
10,0.0639,0.9751


In [81]:
def preprocess_index(cites_data, content_data):

    # Get unique IDs from both files
    content_ids = np.unique(content_data[:, 0])
    cites_ids = np.unique(np.concatenate((cites_data[:, 0], cites_data[:, 1])))

    # Create ID mappings
    content_id_mapping = dict(zip(content_ids, range(len(content_ids))))
    cites_id_mapping = {id: content_id_mapping.get(id, len(content_id_mapping) + idx) for idx, id in enumerate(np.setdiff1d(cites_ids, content_ids))}
    len_diff = len(cites_id_mapping)
    cites_id_mapping.update(content_id_mapping)

    # Update IDs in .cites file
    cites_data[:, 0] = [cites_id_mapping[id] for id in cites_data[:, 0]]
    cites_data[:, 1] = [cites_id_mapping[id] for id in cites_data[:, 1]]

    # Update IDs in .content file
    content_data[:, 0] = [content_id_mapping[id] for id in content_data[:, 0]]
    # Add len_diff rows to content_data
    additional_rows = np.zeros((len_diff, content_data.shape[1]),  dtype=content_data.dtype)
    additional_rows[:, 1:-1] = np.array([['0'] * (content_data.shape[1] - 2)] * len_diff)
    additional_rows[:, 0] = np.arange(len(content_ids), len(content_ids) + len_diff)
    additional_rows[:, -1] = np.array(['NONE'] * len_diff)

    new_content_data = np.vstack((content_data, additional_rows))

    return new_content_data, cites_data.astype(np.int64)

def load_citeseer(config):
    """ Loads the graph data and stores them using
        efficient sparse matrices approach.
    """
    print("Loading Citeseer dataset...")
    ###############################
    # Loading Graph Nodes Data
    ###############################
    raw_nodes_data = np.genfromtxt(config.nodes_path, dtype="str")
    raw_edges_data = np.genfromtxt(config.edges_path, dtype="str")

    raw_nodes_data, raw_edges_data = preprocess_index(raw_edges_data, raw_nodes_data)

    raw_node_ids = raw_nodes_data[:, 0].astype('int32')  # unique identifier of each node
    raw_node_labels = raw_nodes_data[:, -1]
    labels_enumerated = enumerate_labels(raw_node_labels)  # target labels as integers
    node_features = sparse.csr_matrix(raw_nodes_data[:, 1:-1], dtype="float32")

    ################################
    # Loading Graph Structure Data
    ################################
    ids_ordered = {raw_id: order for order, raw_id in enumerate(raw_node_ids)}

    edges_ordered = np.array(list(map(ids_ordered.get, raw_edges_data.flatten())),
                             dtype='int32').reshape(raw_edges_data.shape)
    ####################
    # ADJACENCY MATRIX
    ####################
    adj = sparse.coo_matrix((np.ones(edges_ordered.shape[0]), (edges_ordered[:, 0], edges_ordered[:, 1])),
                            shape=(labels_enumerated.shape[0], labels_enumerated.shape[0]),
                            dtype=np.float32)
    # Make the adjacency matrix symmetric
    adj = adj + adj.T.multiply(adj.T > adj)
    adj = normalize_adjacency(adj)

    ####################################
    # Adapt the data to PyTorch format
    ####################################
    features = torch.FloatTensor(node_features.toarray())
    labels = torch.LongTensor(labels_enumerated)
    adj = convert_scipy_to_torch_sparse(adj)

    print("Dataset loaded.")

    return features, labels, adj, edges_ordered

In [106]:
def prepare_dataset_citeseer(labels, num_classes, config):
    """ Splits the loaded dataset into train/validation/test sets. """
    if not config.follow_paper:
        # Follow train/val/test indices as in the official implementation
        # on GitHub: https://github.com/tkipf/pygcn
        # train_set = range(120)
        # validation_set = range(200, 500)
        # test_set = range(500, 1500)

        idx = torch.randperm(len(labels))
        test_set, validation_set, train_set = idx[:1200], idx[1200:1600], idx[1600:]
    else:
        # https://arxiv.org/pdf/1609.02907.pdf
        # The original paper proposes that the training set is composed
        # out of 20 samples per class -> 140 samples, but the indices
        # above (range(140)) do not contain 20 samples per class
        # The remaining val/test indices were selected empirically
        classes = [ind for ind in range(num_classes)]
        train_set = []

        # Construct train set (indices) out of 20 samples per each class
        for class_label in classes:
            target_indices = torch.nonzero(labels == class_label, as_tuple=False).tolist()
            train_set += [ind[0] for ind in target_indices[:config.train_size_per_class]]

        # Extract the remaining samples
        validation_test_set = [ind for ind in range(len(labels)) if ind not in train_set]
        # Split the remaining samples into validation/test set
        validation_set = validation_test_set[:config.validation_size]
        test_set = validation_test_set[config.validation_size:config.validation_size+config.test_size]

    return train_set, validation_set, test_set

In [108]:
import torch
torch.manual_seed(9)
use_cuda = torch.cuda.is_available()

class Config:
    def __init__(self,
                 cuda=use_cuda,
                 nodes_path="./datasets/citeseer/citeseer.content",
                 edges_path="./datasets/citeseer/citeseer.cites",
                 hidden_dim=64,
                 dropout=0.6,
                 use_bias=True,
                 train_size_per_class=20,
                 validation_size=400,
                 test_size=1000,
                 lr= 0.005, # 1e-2,
                 weight_decay=5e-4, # 5e-3,
                 patience=100,
                 epochs=200,
                 use_early_stopping=True,
                 multiple_runs=True,
                 num_of_runs=10,
                 follow_paper=False):
        self.cuda = cuda
        self.nodes_path = nodes_path
        self.edges_path = edges_path
        self.hidden_dim = hidden_dim
        self.dropout = dropout
        self.use_bias = use_bias
        self.train_size_per_class = train_size_per_class
        self.validation_size = validation_size
        self.test_size = test_size
        self.lr = lr
        self.weight_decay = weight_decay
        self.patience = patience
        self.epochs = epochs
        self.use_early_stopping = use_early_stopping
        self.multiple_runs = multiple_runs
        self.num_of_runs = num_of_runs
        self.follow_paper = follow_paper

config = Config()


In [109]:
features, labels, adj, edges = load_citeseer(config)
NUM_CLASSES = int(labels.max().item() + 1)


Loading Citeseer dataset...
Dataset loaded.


In [110]:
train_set_ind, val_set_ind, test_set_ind = prepare_dataset_citeseer(labels, NUM_CLASSES, config)

model = GCN(features.shape[1], config.hidden_dim, NUM_CLASSES, config.dropout, config.use_bias)

In [116]:
if not config.multiple_runs:
    print("Started training with 1 run.", f"Early stopping: {'Yes' if config.use_early_stopping else 'No'}")
    val_acc, val_loss = training_loop(model, features, labels, adj, train_set_ind, val_set_ind, config)
    out_features = evaluate_on_test(model, features, labels, adj, test_set_ind, config)
else:
    print(f"Started training with {config.num_of_runs} runs.", f"Early stopping: {'Yes' if config.use_early_stopping else 'No'}")
    loss_citeseer, acc_citeseer, loss_mean_citeseer, loss_std_citeseer, acc_mean_citeseer, acc_std_citeseer = multiple_runs(model, features, labels, adj, [train_set_ind, val_set_ind, test_set_ind], config, training_loop, evaluate_on_test)

Started training with 10 runs. Early stopping: Yes
Run: 1
Epoch:    0 | Train loss: 1.9494 | Train acc: 0.1459 | Val loss: 1.9538 | Val acc: 0.1600
Epoch:    1 | Train loss: 1.7583 | Train acc: 0.5223 | Val loss: 1.7860 | Val acc: 0.4925
Epoch:    2 | Train loss: 1.5677 | Train acc: 0.5987 | Val loss: 1.6249 | Val acc: 0.5275
Epoch:    3 | Train loss: 1.3672 | Train acc: 0.6862 | Val loss: 1.4593 | Val acc: 0.5925
Epoch:    4 | Train loss: 1.1871 | Train acc: 0.7238 | Val loss: 1.3057 | Val acc: 0.6525
Epoch:    5 | Train loss: 1.0171 | Train acc: 0.7609 | Val loss: 1.1639 | Val acc: 0.6900
Epoch:    6 | Train loss: 0.9005 | Train acc: 0.7869 | Val loss: 1.0774 | Val acc: 0.7125
Epoch:    7 | Train loss: 0.8017 | Train acc: 0.7985 | Val loss: 0.9956 | Val acc: 0.7450
Epoch:    8 | Train loss: 0.7114 | Train acc: 0.8049 | Val loss: 0.9264 | Val acc: 0.7375
Epoch:    9 | Train loss: 0.6681 | Train acc: 0.8188 | Val loss: 0.8731 | Val acc: 0.7425
Epoch:   10 | Train loss: 0.6039 | Train a

In [124]:
print(f'GCN Citeseer test loss:\t\t{loss_mean_citeseer:.4f} +/- {loss_std_citeseer:.4f}')
print(f'GCN Citeseer test accuracy: \t{acc_mean_citeseer:.4f} +/- {acc_std_citeseer:.4f}')

GCN Citeseer test loss:		1.1484 +/- 0.0065
GCN Citeseer test accuracy: 	0.7087 +/- 0.0026


In [118]:
loss_citeseer.append(loss_mean_citeseer)
loss_citeseer.append(loss_std_citeseer)
acc_citeseer.append(acc_mean_citeseer)
acc_citeseer.append(acc_std_citeseer)

In [144]:
from IPython.display import display, HTML
import pandas as pd

df = pd.DataFrame({'Run # / Stat' : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 'Mean', 'Standard Deviation'],
                   'GCN Citeseer Test Loss' : loss_citeseer ,
                   'GCN Citeseer Test Accuracy' : acc_citeseer})
#display(df)
display(HTML(df.to_html(index=False)))

Run # / Stat,GCN Citeseer Test Loss,GCN Citeseer Test Accuracy
1,1.143853,0.710833
2,1.141904,0.705833
3,1.150969,0.711667
4,1.14758,0.7075
5,1.142655,0.708333
6,1.147345,0.71
7,1.1402,0.708333
8,1.162366,0.703333
9,1.154088,0.7125
10,1.153365,0.709167
