# Install packages
May need to restart runtime after installing new packages

In [None]:
!python -c "import torch; print(torch.__version__)"

In [None]:
!pip install pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.13.0+cu116.html
!pip install wandb -Uq

#Import packages

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

import torch
import torch.nn as nn
import torch_geometric

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
import wandb

wandb.login()

#Configuration/Hyper-Parameters

In [None]:
exp_config = {
    'model': 'GAT',
    'skip': None,
    'dataset': 'Cora',
    'hid_dim': 8,
    'n_layers': 2,
    'dropout_ratio': 0.6,
    'in_heads': 8,
    'out_heads': 1,
    'self_loop': True,
    'Beta': None,
    'epochs': 1000,
    'batch_size': None,
    'max_patience': 100,
    'optimizer': 'Adam',
    'lr': 0.005,
    'weight_decay':5e-4,
}
run_name = '{}_{}'.format(exp_config['dataset'], exp_config['model'])
print(run_name)

#Datasets

In [None]:
if exp_config['dataset'] == 'PPI':
    train_dataset = torch_geometric.datasets.PPI(root='./', split='train')
    val_dataset = torch_geometric.datasets.PPI(root='./', split='val')
    test_dataset = torch_geometric.datasets.PPI(root='./', split='test')

    print('Train: ', train_dataset.data)
    print('Val: ', val_dataset.data)
    print('Test: ', test_dataset.data)

    train_loader = torch_geometric.loader.DataLoader(train_dataset, exp_config['batch_size'], shuffle=True, pin_memory=True, num_workers=2)
    val_loader = torch_geometric.loader.DataLoader(val_dataset, exp_config['batch_size'], shuffle=False, pin_memory=True, num_workers=2)
    test_loader = torch_geometric.loader.DataLoader(test_dataset, exp_config['batch_size'], shuffle=False, pin_memory=True, num_workers=2)
elif exp_config['dataset'] in ['Cornell', 'Texas', 'Wisconsin']:
    dataset = torch_geometric.datasets.WebKB(root='./', name=exp_config['dataset'], transform=torch_geometric.transforms.NormalizeFeatures())
    print(dataset[0])
else:
    dataset = torch_geometric.datasets.Planetoid(
        root="./",
        name=exp_config['dataset'],
        split="public",
        transform=torch_geometric.transforms.NormalizeFeatures()
    )
    print(dataset.data)
    print('Training nodes:', dataset.data.train_mask.sum().item())
    print('Validation nodes:', dataset.data.val_mask.sum().item())
    print('Testing nodes:', dataset.data.test_mask.sum().item())

#Models

In [None]:
from torch_geometric.nn.models.basic_gnn import GATv2Conv
from torch_geometric.nn import GCNConv

class GCN(nn.Module):
    
    def __init__(self, input_dim, hid_dim, n_classes, n_layers, dropout_ratio, skip):
        super().__init__()
        self.input_dim = input_dim
        self.hid_dim = hid_dim
        self.n_classes = n_classes
        self.n_layers = n_layers
        self.dropout_ratio = dropout_ratio
        self.skip = skip

        if self.skip:
            assert self.n_layers >= 3

        if self.n_layers == 0:
            self.net = nn.Linear(self.input_dim, self.n_classes)
        elif self.n_layers == 1:
            self.net = GCNConv(self.input_dim, self.n_classes)
        else:
            self.net = nn.ModuleList()
            for i in range(self.n_layers):
                if i == 0:
                    self.net.append(GCNConv(self.input_dim, self.hid_dim))
                    self.net.append(nn.ReLU())
                    self.net.append(nn.Dropout(self.dropout_ratio))
                elif i == self.n_layers - 1:
                    self.net.append(GCNConv(self.hid_dim, self.n_classes))
                else:
                    self.net.append(GCNConv(self.hid_dim, self.hid_dim))
                    self.net.append(nn.ReLU())
                    self.net.append(nn.Dropout(self.dropout_ratio))

    def forward(self, X, A):
        if self.n_layers == 0:
            X = self.net(X)
        elif self.n_layers == 1:
            X = self.net(X, A)
        else:
            for i, layer in enumerate(self.net[:-1]):
                if i%3 == 0: # GCNConv layer
                    if i == 0 and self.skip:
                        X = layer(X, A)
                        prev = torch.clone(X)
                    elif self.skip:
                        X = layer(X, A)
                        X = X + prev
                        prev = torch.clone(X)
                    else:
                        X = layer(X, A)
                else: # ReLU or Dropout layer
                    X = layer(X)
            
            # final layer (classifier)
            X = self.net[-1](X, A)
        return X


from torch_geometric.nn import GATConv, GATv2Conv

class GAT(nn.Module):

    def __init__(self, layer_type, input_dim, hid_dim, n_classes, n_layers, in_heads, out_heads, self_loop, dropout_ratio, skip):
        super().__init__()
        if layer_type == 'GAT':
            self.gconv = getattr(torch_geometric.nn, 'GATConv')
        if layer_type == 'GATv2':
            self.gconv = getattr(torch_geometric.nn, 'GATv2Conv')
        self.input_dim = input_dim
        self.hid_dim = hid_dim
        self.n_classes = n_classes
        self.n_layers = n_layers
        self.in_heads = in_heads
        self.out_heads = out_heads
        self.self_loop = self_loop
        self.dropout_ratio = dropout_ratio
        self.skip = skip

        if self.skip:
            assert self.n_layers >= 3

        if self.n_layers == 0:
            self.net = nn.Linear(self.input_dim, self.n_classes)
        elif self.n_layers == 1:
            self.net = self.gconv(self.input_dim, self.n_classes, self.out_heads, concat=False, dropout=self.dropout_ratio, add_self_loops=self.self_loop)
        else:
            self.net = nn.ModuleList()
            for i in range(self.n_layers):
                if i == 0:
                    self.net.append(nn.Dropout(self.dropout_ratio))
                    self.net.append(self.gconv(self.input_dim, self.hid_dim, self.in_heads, dropout=self.dropout_ratio, add_self_loops=self.self_loop))
                    self.net.append(nn.ELU())
                elif i == self.n_layers - 1:
                    self.net.append(nn.Dropout(self.dropout_ratio))
                    self.net.append(self.gconv(self.hid_dim*self.in_heads, self.n_classes, self.out_heads, concat=False, dropout=self.dropout_ratio, add_self_loops=self.self_loop))
                else:
                    self.net.append(nn.Dropout(self.dropout_ratio))
                    self.net.append(self.gconv(self.hid_dim*self.in_heads, self.hid_dim, self.in_heads, dropout=self.dropout_ratio, add_self_loops=self.self_loop))
                    self.net.append(nn.ELU())

    def forward(self, X, A):
        if self.n_layers == 0:
            X = self.net(X)
        elif self.n_layers == 1:
            X = self.net(X, A)
        else:
            for i, layer in enumerate(self.net[:-1]):
                if isinstance(layer, GATConv) or isinstance(layer, GATv2Conv):
                    if i == 1 and self.skip:
                        X = layer(X, A)
                        prev = torch.clone(X)
                    elif self.skip:
                        X = layer(X, A)
                        X = X + prev
                        prev = torch.clone(X)
                    else:
                        X = layer(X, A)
                else:
                    X = layer(X)
            
            X = self.net[-1](X, A)
        return X


from torch_geometric.nn import AGNNConv

class AGNN(nn.Module):

    def __init__(self, input_dim, hid_dim, n_classes, n_layers, dropout_ratio, beta):
        super().__init__()
        self.input_dim = input_dim
        self.hid_dim = hid_dim
        self.n_classes = n_classes
        self.n_layers = n_layers
        self.dropout_ratio = dropout_ratio
        self.beta = beta

        self.net = nn.ModuleList()
        self.net.append(nn.Linear(self.input_dim, self.hid_dim))
        self.net.append(nn.ReLU())
        self.net.append(nn.Dropout(self.dropout_ratio))
        for _ in range(self.n_layers):
            self.net.append(AGNNConv(requires_grad=self.beta))
        self.net.append(nn.Linear(self.hid_dim, self.n_classes))

    def forward(self, X, A):
        for layer in self.net:
            if isinstance(layer, AGNNConv):
                X = layer(X, A)
            else:
                X = layer(X)
        return X


class MLP(nn.Module):

    def __init__(self, input_dim, hid_dim, n_classes, n_layers, dropout_ratio):
        super().__init__()
        self.input_dim = input_dim
        self.hid_dim = hid_dim
        self.n_classes = n_classes
        self.n_layers = n_layers
        self.dropout_ratio = dropout_ratio

        self.net = nn.ModuleList()
        if self.n_layers == 1:
            self.net.append(nn.Linear(self.input_dim, self.n_classes))
        else:
            for i in range(self.n_layers):
                if i == 0:
                    self.net.append(nn.Linear(self.input_dim, self.hid_dim))
                    self.net.append(nn.ReLU())
                    self.net.append(nn.Dropout(self.dropout_ratio))
                elif i == self.n_layers-1:
                    self.net.append(nn.Linear(self.hid_dim, self.n_classes))
                else:
                    self.net.append(nn.Linear(self.hid_dim, self.hid_dim))
                    self.net.append(nn.ReLU())
                    self.net.append(nn.Dropout(self.dropout_ratio))

    def forward(self, X, A):
        for layer in self.net:
            X = layer(X)
        return X

#Training, validation, and testing loops

In [None]:
def train_eval_transductive(config):
    X = dataset[0].x.to(device)
    A = dataset[0].edge_index.to(device)
    y = dataset[0].y.to(device)
    train_mask = dataset[0].train_mask.to(device)
    val_mask = dataset[0].val_mask.to(device)
    y_train = y[train_mask]
    y_val = y[val_mask]

    if config.model == 'GCN':
        model = GCN(input_dim=dataset.num_features, 
                    hid_dim=config.hid_dim, 
                    n_classes=dataset.num_classes, 
                    n_layers=config.n_layers, 
                    dropout_ratio=config.dropout_ratio, 
                    skip=config.skip)
    if config.model == 'GAT' or config.model == 'GATv2':
        model = GAT(layer_type=config.model,
                    input_dim=dataset.num_features, 
                    hid_dim=config.hid_dim, 
                    n_classes=dataset.num_classes, 
                    n_layers=config.n_layers, 
                    in_heads=config.in_heads, 
                    out_heads=config.out_heads, 
                    self_loop=config.self_loop, 
                    dropout_ratio=config.dropout_ratio, 
                    skip=config.skip)
    if config.model == 'AGNN':
        model = AGNN(input_dim=dataset.num_features,
                     hid_dim=config.hid_dim,
                     n_classes=dataset.num_classes,
                     n_layers=config.n_layers,
                     dropout_ratio=config.dropout_ratio,
                     beta=config.Beta)
    if config.model == 'MLP':
        model = MLP(input_dim=dataset.num_features,
                    hid_dim=config.hid_dim,
                    n_classes=dataset.num_classes,
                    n_layers=config.n_layers,
                    dropout_ratio=config.dropout_ratio)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = getattr(torch.optim, exp_config['optimizer'])(params=model.parameters(),
                                                              lr=config.lr,
                                                              weight_decay=config.weight_decay) 

    print('Training and validating {}'.format(config.model))
    best_val_acc = 0
    best_val_loss = 1e10
    patience = 0
    for epoch in tqdm(range(config.epochs)):
        model.train()
        optimizer.zero_grad()

        output = model(X, A)
        output_train = output[train_mask]

        train_loss = criterion(output_train, y_train)
        train_loss.backward()
        optimizer.step()

        _, pred_train = torch.max(output_train, 1)
        train_acc = y_train.eq(pred_train).sum() / len(y_train)


        model.eval()
        with torch.no_grad():
            output = model(X, A)
            output_val = output[val_mask]

            val_loss = criterion(output_val, y_val)
            _, pred_val = torch.max(output_val, 1)
            val_acc = y_val.eq(pred_val).sum() / len(y_val)

            # wandb.log({'train loss':train_loss, 'train accuracy':train_acc,
            #        'val loss':val_loss, 'val accuracy':val_acc})

            if val_acc > best_val_acc:
                best_val_acc = val_acc
                # wandb.run.summary['best val accuracy'] = best_val_acc
                # wandb.run.summary['best epoch'] = epoch
            
            if val_loss < (best_val_loss * 0.99):
                best_val_loss = val_loss
                patience = 0
            else:
                patience += 1
                if patience == config.max_patience:
                    print('Early stopping at Epoch {}'.format(epoch))
                    return model
                
    return model


def test_transductive(model):
    print('Testing model')
    X = dataset[0].x.to(device)
    A = dataset[0].edge_index.to(device)
    y = dataset[0].y.to(device)
    test_mask = dataset[0].test_mask.to(device)
    y_test = y[test_mask]

    model.eval()
    with torch.no_grad():
        output = model(X, A)
        output_test = output[test_mask]
        _, pred = torch.max(output_test, 1)
        acc = y_test.eq(pred).sum() / len(y_test)
        # wandb.run.summary['test accuracy'] = acc
    return acc

In [None]:
from sklearn.metrics import f1_score

def train_eval_inductive(config):
    sig = nn.Sigmoid()

    if config.model == 'GCN':
        model = GCN(input_dim=train_dataset.num_features, 
                    hid_dim=config.hid_dim, 
                    n_classes=train_dataset.num_classes, 
                    n_layers=config.n_layers, 
                    dropout_ratio=config.dropout_ratio, 
                    skip=config.skip)
    if config.model == 'GAT' or config.model == 'GATv2':
        model = GAT(layer_type=config.model,
                    input_dim=train_dataset.num_features, 
                    hid_dim=config.hid_dim, 
                    n_classes=train_dataset.num_classes, 
                    n_layers=config.n_layers, 
                    in_heads=config.in_heads, 
                    out_heads=config.out_heads, 
                    self_loop=config.self_loop, 
                    dropout_ratio=config.dropout_ratio, 
                    skip=config.skip)
    if config.model == 'AGNN':
        model = AGNN(input_dim=train_dataset.num_features,
                     hid_dim=config.hid_dim,
                     n_classes=train_dataset.num_classes,
                     n_layers=config.n_layers,
                     dropout_ratio=config.dropout_ratio,
                     beta=config.Beta)
    if config.model == 'MLP':
        model = MLP(input_dim=train_dataset.num_features,
                    hid_dim=config.hid_dim,
                    n_classes=train_dataset.num_classes,
                    n_layers=config.n_layers,
                    dropout_ratio=config.dropout_ratio)
    model = model.to(device)

    criterion = nn.BCEWithLogitsLoss()
    optimizer = getattr(torch.optim, exp_config['optimizer'])(params=model.parameters(),
                                                              lr=config.lr,
                                                              weight_decay=config.weight_decay) 
    
    print('Training and validating {}'.format(config.model))
    best_val_f1 = 0
    best_val_loss = 1e10
    patience = 0

    for epoch in tqdm(range(config.epochs)):
        train_loss = []
        train_f1 = []

        model.train()
        for data in train_loader:
            data = data.to(device)
            x = data.x
            A = data.edge_index
            y = data.y

            optimizer.zero_grad()
            output = model(x, A)
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())

            preds = torch.round(sig(output))
            preds = preds.detach().to('cpu').numpy()
            train_f1.append(f1_score(y.to('cpu').numpy(), preds, average='micro'))
        
        model.eval()
        val_loss = []
        val_f1 = []
        with torch.no_grad():
            for data in val_loader:
                data = data.to(device)
                x = data.x
                A = data.edge_index
                y = data.y

                output = model(x, A)
                loss = criterion(output, y)
                val_loss.append(loss.item())

                preds = torch.round(sig(output))
                preds = preds.to('cpu').numpy()
                val_f1.append(f1_score(y.to('cpu').numpy(), preds, average='micro'))
        

        train_loss = torch.tensor(train_loss).mean()
        train_f1 = torch.tensor(train_f1).mean()
        val_loss = torch.tensor(val_loss).mean()
        val_f1 = torch.tensor(val_f1).mean()

        wandb.log({'train loss':train_loss, 'train f1':train_f1,
                   'val loss':val_loss, 'val f1':val_f1})

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            wandb.run.summary['best val f1'] = best_val_f1
            wandb.run.summary['best epoch'] = epoch
        
        if val_loss < (best_val_loss * 0.99):
            best_val_loss = val_loss
            patience = 0
        else:
            patience += 1
            if patience == config.max_patience:
                print('Early stopping at Epoch {}'.format(epoch))
                return model

    return model

def test_inductive(model):
    print('Testing model')
    sig = nn.Sigmoid()

    test_f1 = []
    model.eval()
    with torch.no_grad():
        for data in tqdm(test_loader):
            data = data.to(device)
            x = data.x
            A = data.edge_index
            y = data.y

            output = model(x, A)
            preds = torch.round(sig(output))
            preds = preds.to('cpu').numpy()
            test_f1.append(f1_score(y.to('cpu').numpy(), preds, average='micro'))
        
    # wandb.run.summary['test f1'] = torch.tensor(test_f1).mean().item()
    return torch.tensor(test_f1).mean().item()

In [None]:
from sklearn.model_selection import train_test_split

def train_eval_hetero(config):
    X = dataset[0].x.to(device)
    A = dataset[0].edge_index.to(device)
    y = dataset[0].y.to(device)

    test_results = []
    print('Training and validating {}'.format(config.model))
    for i in tqdm(range(10)):
        train_mask, test_mask = train_test_split(np.arange(len(dataset[0].x)), test_size=0.2, random_state=i)
        y_train = y[train_mask]
        y_test = y[test_mask]

        if config.model == 'GCN':
            model = GCN(input_dim=dataset.num_features, 
                        hid_dim=config.hid_dim, 
                        n_classes=dataset.num_classes, 
                        n_layers=config.n_layers, 
                        dropout_ratio=config.dropout_ratio, 
                        skip=config.skip)
        if config.model == 'GAT' or config.model == 'GATv2':
            model = GAT(layer_type=config.model,
                        input_dim=dataset.num_features, 
                        hid_dim=config.hid_dim, 
                        n_classes=dataset.num_classes, 
                        n_layers=config.n_layers, 
                        in_heads=config.in_heads, 
                        out_heads=config.out_heads, 
                        self_loop=config.self_loop, 
                        dropout_ratio=config.dropout_ratio, 
                        skip=config.skip)
        if config.model == 'AGNN':
            model = AGNN(input_dim=dataset.num_features,
                        hid_dim=config.hid_dim,
                        n_classes=dataset.num_classes,
                        n_layers=config.n_layers,
                        dropout_ratio=config.dropout_ratio,
                        beta=config.Beta)
        if config.model == 'MLP':
            model = MLP(input_dim=dataset.num_features,
                        hid_dim=config.hid_dim,
                        n_classes=dataset.num_classes,
                        n_layers=config.n_layers,
                        dropout_ratio=config.dropout_ratio)
        model = model.to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = getattr(torch.optim, exp_config['optimizer'])(params=model.parameters(),
                                                                lr=config.lr,
                                                                weight_decay=config.weight_decay) 

    
        for epoch in range(config.epochs):
            model.train()
            optimizer.zero_grad()

            output = model(X, A)
            output_train = output[train_mask]

            train_loss = criterion(output_train, y_train)
            train_loss.backward()
            optimizer.step()

            _, pred_train = torch.max(output_train, 1)
            train_acc = (y_train.eq(pred_train).sum() / len(y_train)).item()

            if epoch % 100 == 0:
                print('{}.\tEPOCH:{}\tTRAIN ACC:{:.4}'.format(i, epoch, train_acc))
        
        model.eval()
        with torch.no_grad():
            output = model(X, A)
            output_test = output[test_mask]
            _, pred_test = torch.max(output_test, 1)
            test_acc = (y_test.eq(pred_test).sum() / len(y_test)).item()
            test_results.append(test_acc)
            print('TEST ACC:{:.4}'.format(test_acc))
        print()
    return test_results

#Run experiment

In [None]:
n_runs = 10
with wandb.init(project='GRL', name=run_name, config=exp_config, tags=run_name.split('_')):
    config = wandb.config
    test_results = []
    if config.dataset == 'PPI':
        for _ in range(n_runs):
            best_model = train_eval_inductive(config)
            test_results.append(test_inductive(best_model))
    elif config.dataset in ['Cornell', 'Texas', 'Wisconsin']:
        test_results = train_eval_hetero(config)
    else:
        for _ in range(n_runs):
            best_model = train_eval_transductive(config)
            test_results.append(test_transductive(best_model))

    test_results = torch.tensor(test_results)
    wandb.run.summary['mean test'] = test_results.mean().item()
    wandb.run.summary['std test'] = test_results.std().item()

# Code for running experiments with different depths:
# with wandb.init(project='GRL', name=run_name, config=exp_config, tags=run_name.split('_')):
#     config = wandb.config
#     layers = np.arange(1,11)
#     n_runs = 5
#     results_by_depth = []
#     for n_layers in layers:
#         config.n_layers = n_layers
#         if n_layers >= 3:
#             config.skip = True
#         else:
#             config.skip = False

#         test_results = []
#         if config.dataset == 'PPI':
#             for _ in range(n_runs):
#                 best_model = train_eval_inductive(config)
#                 test_results.append(test_inductive(best_model))
#         else:
#             for _ in range(n_runs):
#                 best_model = train_eval_transductive(config)
#                 test_results.append(test_transductive(best_model))

#         test_results = torch.tensor(test_results)
#         results_by_depth.append(test_results.mean().item())
#     wandb.run.summary['test_results_by_depth'] = results_by_depth