In [1]:
import sys
print(sys.version)

3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]


In [2]:
import torch
print(torch.__version__)

2.1.0+cu118


In [3]:
!pip install torch-scatter -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install torch-cluster -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install git+https://github.com/pyg-team/pytorch_geometric.git
!pip install ogb  # for datasets

Looking in links: https://data.pyg.org/whl/torch-2.1.0+cu118.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu118/torch_scatter-2.1.2%2Bpt21cu118-cp310-cp310-linux_x86_64.whl (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m103.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.1.2+pt21cu118
Looking in links: https://data.pyg.org/whl/torch-2.1.0+cu118.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu118/torch_sparse-0.6.18%2Bpt21cu118-cp310-cp310-linux_x86_64.whl (4.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m49.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.18+pt21cu118
Looking in links: https://data.pyg.org/whl/torch-2.1.0+cu118.html
Collecting torch-cluster
  Downloading https://data.

In [4]:
from torch_geometric.data import Data
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GATConv
import torch.nn.functional as F
import torch.nn as nn
from tqdm import tqdm
import numpy as np

In [5]:
cora_dataset = Planetoid('/tmp/cora', 'cora')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [6]:
cora_data = cora_dataset[0]

In [7]:
# For debug use only
num_nodes = cora_data.num_nodes
print('cora has {} nodes'.format(num_nodes))

num_edges = cora_data.num_edges
print('cora has {} edges'.format(num_edges))

cora has 2708 nodes
cora has 10556 edges


In [8]:
# For debug use only
print(cora_data)
print(cora_data.x.device)

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
cpu


In [9]:
# For debug use only
cora_x_train = cora_data.x[cora_data.train_mask]
cora_x_val = cora_data.x[cora_data.val_mask]
cora_x_test = cora_data.x[cora_data.test_mask]

print("number of nodes in cora train set,", cora_x_train.shape[0])
print("number of nodes in cora val set,", cora_x_val.shape[0])
print("number of nodes in cora test set,", cora_x_test.shape[0])

number of nodes in cora train set, 140
number of nodes in cora val set, 500
number of nodes in cora test set, 1000


In [10]:
# For debug use only
print(cora_data.y)
print(cora_data.y.shape)
s = set()
histogram = np.zeros(7)
for label in cora_data.y:
    s.add(label.item())
    histogram[label.item()]+=1
print(s)
print(histogram)

tensor([3, 4, 4,  ..., 3, 3, 3])
torch.Size([2708])
{0, 1, 2, 3, 4, 5, 6}
[351. 217. 418. 818. 426. 298. 180.]


In [11]:
# For debug use only
print(cora_data.x.shape)
print(cora_data.x[170:180])
print(cora_data.num_features)
print(cora_data.num_nodes)
print(cora_data.num_node_types)
print(type(cora_data))

torch.Size([2708, 1433])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
1433
2708
1
<class 'torch_geometric.data.data.Data'>


In [167]:
# Define the GAT model
class GAT(torch.nn.Module):
    # hidden channels will be the embedding dimension for each attention head
    # after applying the first GAT layer.
    def __init__(self, in_channels, hidden_channels,
                 num_heads, dropout_rate, num_classes):
        super().__init__()

        self.dropout_rate = dropout_rate

        self.hidden_channels = hidden_channels
        self.num_heads = num_heads
        self.num_classes = num_classes

        self.conv1 = GATConv(in_channels, hidden_channels, heads=num_heads,
                                dropout=dropout_rate)
        self.conv2 = GATConv(hidden_channels*num_heads, num_classes,
                                dropout=dropout_rate, concat=False)

    def forward(self, x, edge_index):
        out = F.dropout(x, p=self.dropout_rate, training=self.training)
        out = self.conv1(out, edge_index)
        assert out.shape[-1] == self.hidden_channels * self.num_heads
        out = F.elu(out)

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = self.conv2(out, edge_index)
        assert out.shape[-1] == self.num_classes
        return out

In [13]:
def train(model, data, optimizer, loss_fn):
    model.train()
    optimizer.zero_grad()

    pred = model(data.x, data.edge_index)
    loss = loss_fn(pred[data.train_mask], data.y[data.train_mask])

    loss.backward()
    optimizer.step()

    return loss

In [14]:
@torch.no_grad()
def evaluate(model, data, test_mask, loss_fn):
    accuracy_list = [0.0, 0.0]
    loss_list = [0.0, 0.0]
    model.eval()

    logits = model(data.x, data.edge_index)
    pred = logits.argmax(dim=-1)

    for i, mask in enumerate([data.train_mask, test_mask]):
        accuracy_list[i] = pred[mask].eq(data.y[mask]).float().mean().item()
        loss_list[i] = loss_fn(logits[mask], data.y[mask]).item()

    return accuracy_list, loss_list

In [98]:
def summarize(model, detailed=False):
    num_params = 0
    print(f"Model Summary: {type(model).__name__}\n")
    for name, param in model.named_parameters():
        if detailed: print(name, param.size())
        num_params += param.numel()
    print(f"\nTotal number of params: {num_params}")

In [194]:
def train_model(model, data, optimizer, loss_fn, save_name, num_epochs=1000,
                log_freq=50, patience=100, logging=True):
    print(f"Using {device}, model: {next(model.parameters()).device}, data: {data.x.device}")

    # Early stopping initialization
    best_val_loss = float('inf')
    best_val_acc = 0.0
    patience_counter = 0
    best_epoch = 0
    best_model_state = None

    # Evaluate before training
    if logging:
        acc_list, loss_list = evaluate(model, data, data.val_mask, loss_fn)
        print("Before training: ")
        print(f"Train Acc: {acc_list[0]:.4f}, Train Loss: {loss_list[0]:.4f}, Val Acc: {acc_list[1]:.4f}, Val Loss: {loss_list[1]:.4f}\n")

    # Start training
    for epoch in tqdm(range(num_epochs), desc="Training Epochs"):
        loss = train(model, data, optimizer, loss_fn)
        acc_list, loss_list = evaluate(model, data,
                                       data.val_mask, loss_fn)

        # Update early stopping criteria
        val_loss = loss_list[1]
        val_acc = acc_list[1]
        # if val_loss < best_val_loss or val_acc > best_val_acc:
        if val_acc > best_val_acc:
            best_val_loss = min(best_val_loss, val_loss)
            best_val_acc = max(best_val_acc, val_acc)
            patience_counter = 0
            best_epoch = epoch
            torch.save(model.state_dict(), save_name)
        else:
            patience_counter += 1

        # Check if patience limit is reached
        if patience_counter >= patience:
            if logging: print(f"\nEarly stopping triggered at epoch {epoch + 1}")
            break

        # Logging
        if logging and ((epoch % log_freq == 0) or (epoch + 1 == num_epochs)):
            print(f"Epoch: {epoch+1}, Loss: {loss:.4f}")
            print(f"    Eval: Train Acc: {acc_list[0]:.4f}, Train Loss: {loss_list[0]:.4f}, Val Acc: {acc_list[1]:.4f}, Val Loss: {loss_list[1]:.4f}")

    if best_model_state is not None:
        model.load_state_dict(best_model_state["state_dict"])

    print(f"\nTraining completed.\nBest Validation at Epoch: {best_epoch + 1}\nBest Val Acc: {best_val_acc:.4f}, Best Val Loss: {best_val_loss:.4f}\n")

In [17]:
# device = torch.device("cpu")
# device = torch.device("cuda")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_heads = 8
dropout_rate = 0.4
emb_dim1 = 8
lr = 0.005

cora_num_classes = len(cora_data.y.unique())
assert cora_num_classes == 7

In [18]:
num_epochs = 1000
log_freq = 50

cora_model = GAT(cora_data.num_features, emb_dim1, num_heads, dropout_rate,
            cora_num_classes).to(device)
cora_data.to(device)

lambda_l2 = 0.001
cora_optimizer = torch.optim.Adam(cora_model.parameters(), lr=lr, weight_decay=lambda_l2)
loss_fn = nn.CrossEntropyLoss()

In [195]:
train_model(cora_model, cora_data, cora_optimizer, loss_fn, "cora_model_01.pth", num_epochs, log_freq, patience=100)
cora_model.load_state_dict(torch.load("cora_model_01.pth", map_location=device))
cora_model = cora_model.to(device)

Using cuda, model: cuda:0, data: cuda:0
Before training: 
Train Acc: 1.0000, Train Loss: 0.0147, Val Acc: 0.7840, Val Loss: 0.7338



Training Epochs:   1%|          | 11/1000 [00:00<00:09, 109.54it/s]

Epoch: 1, Loss: 0.0941
    Eval: Train Acc: 1.0000, Train Loss: 0.0147, Val Acc: 0.7820, Val Loss: 0.7357


Training Epochs:   7%|▋         | 69/1000 [00:00<00:08, 110.86it/s]

Epoch: 51, Loss: 0.1693
    Eval: Train Acc: 1.0000, Train Loss: 0.0072, Val Acc: 0.7720, Val Loss: 0.7609


Training Epochs:  12%|█▏        | 117/1000 [00:01<00:07, 115.50it/s]

Epoch: 101, Loss: 0.1009
    Eval: Train Acc: 1.0000, Train Loss: 0.0044, Val Acc: 0.7760, Val Loss: 0.7779


Training Epochs:  13%|█▎        | 128/1000 [00:01<00:07, 110.96it/s]


Early stopping triggered at epoch 129

Training completed.
Best Validation at Epoch: 29
Best Val Acc: 0.7860, Best Val Loss: 0.7357






In [196]:
# Evaluate after training
acc_list, loss_list = evaluate(cora_model, cora_data, cora_data.val_mask, loss_fn)
print("After training: ")
print(f"Train Acc: {acc_list[0]:.4f}, Train Loss: {loss_list[0]:.4f}, Val Acc: {acc_list[1]:.4f}, Val Loss: {loss_list[1]:.4f}\n")

After training: 
Train Acc: 1.0000, Train Loss: 0.0104, Val Acc: 0.7860, Val Loss: 0.7396



In [21]:
cite_dataset = Planetoid('/tmp/Citeseer', 'Citeseer')
cite_data = cite_dataset[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!


In [22]:
# For debug use only
print(f"Number of nodes: {cite_data.num_nodes}")
print(f"Number of features per node: {cite_data.num_node_features}")
print(f"Number of edges: {cite_data.num_edges}")
print(f"Number of undirectd edge: {cite_data.edge_index.shape[1]//2}")
print(f"Number of classes: {len(cite_data.y.unique())}")

Number of nodes: 3327
Number of features per node: 3703
Number of edges: 9104
Number of undirectd edge: 4552
Number of classes: 6


In [23]:
# For debug use only
cite_x_train = cite_data.x[cite_data.train_mask]
cite_x_val = cite_data.x[cite_data.val_mask]
cite_x_test = cite_data.x[cite_data.test_mask]

print("number of nodes in cora train set,", cite_x_train.shape[0])
print("number of nodes in cora val set,", cite_x_val.shape[0])
print("number of nodes in cora test set,", cite_x_test.shape[0])

number of nodes in cora train set, 120
number of nodes in cora val set, 500
number of nodes in cora test set, 1000


In [197]:
cite_num_classes = len(cite_data.y.unique())
assert cite_num_classes == 6

cite_model = GAT(cite_data.num_features, emb_dim1, num_heads, dropout_rate,
            cite_num_classes).to(device)
cite_data.to(device)

# Create optimizer for citeseer
cite_optimizer = torch.optim.Adam(cite_model.parameters(), lr=lr, weight_decay=lambda_l2)

# Train citeseer model
train_model(cite_model, cite_data, cite_optimizer, loss_fn, "cite_model_01.pth", num_epochs=1000, log_freq=10, patience=100)
cite_model.load_state_dict(torch.load("cite_model_01.pth", map_location=device))
cite_model = cite_model.to(device)

# Evaluate after training
acc_list, loss_list = evaluate(cite_model, cite_data, cite_data.val_mask, loss_fn)
print("\nAfter training: ")
print(f"Train Acc: {acc_list[0]:.4f}, Train Loss: {loss_list[0]:.4f}, Val Acc: {acc_list[1]:.4f}, Val Loss: {loss_list[1]:.4f}\n")

Using cuda, model: cuda:0, data: cuda:0
Before training: 
Train Acc: 0.0833, Train Loss: 1.8252, Val Acc: 0.0860, Val Loss: 1.8241



Training Epochs:   1%|          | 6/1000 [00:00<00:17, 58.30it/s]

Epoch: 1, Loss: 1.8358
    Eval: Train Acc: 0.7667, Train Loss: 1.5407, Val Acc: 0.4460, Val Loss: 1.6842
Epoch: 11, Loss: 0.4654
    Eval: Train Acc: 0.9667, Train Loss: 0.2025, Val Acc: 0.6600, Val Loss: 1.0457


Training Epochs:   3%|▎         | 30/1000 [00:00<00:13, 72.77it/s]

Epoch: 21, Loss: 0.2882
    Eval: Train Acc: 0.9917, Train Loss: 0.0744, Val Acc: 0.6440, Val Loss: 1.0655
Epoch: 31, Loss: 0.2762
    Eval: Train Acc: 1.0000, Train Loss: 0.0389, Val Acc: 0.6420, Val Loss: 1.1047


Training Epochs:   5%|▌         | 54/1000 [00:00<00:12, 74.03it/s]

Epoch: 41, Loss: 0.2638
    Eval: Train Acc: 1.0000, Train Loss: 0.0286, Val Acc: 0.6680, Val Loss: 1.0693
Epoch: 51, Loss: 0.1983
    Eval: Train Acc: 1.0000, Train Loss: 0.0192, Val Acc: 0.6580, Val Loss: 1.1032


Training Epochs:   7%|▋         | 74/1000 [00:00<00:10, 84.53it/s]

Epoch: 61, Loss: 0.2864
    Eval: Train Acc: 1.0000, Train Loss: 0.0150, Val Acc: 0.6700, Val Loss: 1.1038
Epoch: 71, Loss: 0.2173
    Eval: Train Acc: 1.0000, Train Loss: 0.0127, Val Acc: 0.6620, Val Loss: 1.0981


Training Epochs:   9%|▉         | 94/1000 [00:01<00:09, 92.12it/s]

Epoch: 81, Loss: 0.2014
    Eval: Train Acc: 1.0000, Train Loss: 0.0117, Val Acc: 0.6640, Val Loss: 1.0933
Epoch: 91, Loss: 0.2310
    Eval: Train Acc: 1.0000, Train Loss: 0.0109, Val Acc: 0.6640, Val Loss: 1.0949


Training Epochs:  10%|█         | 105/1000 [00:01<00:11, 81.36it/s]

Epoch: 101, Loss: 0.2199
    Eval: Train Acc: 1.0000, Train Loss: 0.0094, Val Acc: 0.6740, Val Loss: 1.0916

Early stopping triggered at epoch 106

Training completed.
Best Validation at Epoch: 6
Best Val Acc: 0.7000, Best Val Loss: 1.2128


After training: 
Train Acc: 0.9417, Train Loss: 0.5382, Val Acc: 0.7000, Val Loss: 1.2128






In [25]:
def val_stats(models, data, val_mask, loss_fn):
    accuracies = []
    losses = []

    for model in models:
        acc, loss = evaluate(model, data, val_mask, loss_fn)
        accuracies.append(acc[1])
        losses.append(loss[1])

    # Convert to tensors
    accuracies_tensor = torch.tensor(accuracies)
    losses_tensor = torch.tensor(losses)

    # Calculate mean and standard deviation
    avg_accuracy = torch.mean(accuracies_tensor).item()
    std_accuracy = torch.std(accuracies_tensor).item()
    avg_loss = torch.mean(losses_tensor).item()
    std_loss = torch.std(losses_tensor).item()

    return avg_accuracy, std_accuracy, avg_loss, std_loss, accuracies_tensor, losses_tensor

In [27]:
cora_val_stats = val_stats([cora_model], cora_data, cora_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cora_val_stats
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4e}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4e}")

Average Accuracy: 0.7840, Std of Accuracy: nan, Average Loss: 0.7338, Std of Loss: nan


In [28]:
def create_models(Model_Class, num_runs, data):
    models = []

    for i in range(num_runs):
        print(f"Run: {i+1}")
        # Create a new GAT model instance
        model = Model_Class(data.num_features, emb_dim1, num_heads, dropout_rate,
                    len(data.y.unique())).to(device)

        # Create a new optimizer instance
        optimizer = torch.optim.Adam(model.parameters(), lr=lr,
                                     weight_decay=lambda_l2)

        # Train the model
        train_model(model, data, optimizer, loss_fn, "temp_model.pth", num_epochs,
                    log_freq, patience=100, logging=False)

        # Load the best model state
        model.load_state_dict(torch.load("temp_model.pth", map_location=device))

        # Append the trained model to the list
        models.append(model)

    return models

In [168]:
num_runs = 10
cora_models = create_models(GAT, num_runs, cora_data)

cora_val_stats = val_stats(cora_models, cora_data, cora_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cora_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  14%|█▍        | 138/1000 [00:01<00:08, 105.27it/s]



Training completed.
Best Validation at Epoch: 39
Best Val Acc: 0.7940, Best Val Loss: 0.6539

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  15%|█▌        | 154/1000 [00:01<00:07, 106.33it/s]



Training completed.
Best Validation at Epoch: 55
Best Val Acc: 0.7980, Best Val Loss: 0.7189

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 130/1000 [00:01<00:08, 106.08it/s]



Training completed.
Best Validation at Epoch: 31
Best Val Acc: 0.7920, Best Val Loss: 0.7151

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 129/1000 [00:01<00:07, 109.51it/s]



Training completed.
Best Validation at Epoch: 30
Best Val Acc: 0.7920, Best Val Loss: 0.7124

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  24%|██▍       | 241/1000 [00:02<00:07, 108.43it/s]



Training completed.
Best Validation at Epoch: 142
Best Val Acc: 0.7860, Best Val Loss: 0.7337

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  20%|█▉        | 198/1000 [00:02<00:08, 93.41it/s]



Training completed.
Best Validation at Epoch: 99
Best Val Acc: 0.7960, Best Val Loss: 0.7067

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  19%|█▉        | 192/1000 [00:02<00:09, 84.94it/s]



Training completed.
Best Validation at Epoch: 93
Best Val Acc: 0.8000, Best Val Loss: 0.7304

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 128/1000 [00:01<00:08, 106.52it/s]



Training completed.
Best Validation at Epoch: 29
Best Val Acc: 0.7880, Best Val Loss: 0.6972

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 126/1000 [00:01<00:08, 107.10it/s]



Training completed.
Best Validation at Epoch: 27
Best Val Acc: 0.8040, Best Val Loss: 0.6577

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  28%|██▊       | 277/1000 [00:02<00:06, 109.36it/s]


Training completed.
Best Validation at Epoch: 178
Best Val Acc: 0.7880, Best Val Loss: 0.7574

Number of Runs: 10
Average Accuracy: 0.7848, Std of Accuracy: 0.0069, Average Loss: 0.7155, Std of Loss: 0.0393





In [169]:
num_runs = 10
cite_models = create_models(GAT, num_runs, cite_data)

cite_val_stats = val_stats(cite_models, cite_data, cite_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cite_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  36%|███▌      | 357/1000 [00:03<00:06, 97.93it/s] 



Training completed.
Best Validation at Epoch: 258
Best Val Acc: 0.7040, Best Val Loss: 1.0252

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  14%|█▍        | 140/1000 [00:01<00:08, 97.34it/s] 



Training completed.
Best Validation at Epoch: 41
Best Val Acc: 0.7060, Best Val Loss: 0.9663

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 114/1000 [00:01<00:09, 98.36it/s] 



Training completed.
Best Validation at Epoch: 15
Best Val Acc: 0.6940, Best Val Loss: 0.9630

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  28%|██▊       | 277/1000 [00:03<00:08, 89.73it/s]



Training completed.
Best Validation at Epoch: 178
Best Val Acc: 0.7080, Best Val Loss: 1.0114

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 114/1000 [00:01<00:12, 71.08it/s]



Training completed.
Best Validation at Epoch: 15
Best Val Acc: 0.7160, Best Val Loss: 0.9269

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:02<00:17, 50.60it/s]



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.6940, Best Val Loss: 0.9585

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 113/1000 [00:01<00:09, 98.41it/s] 



Training completed.
Best Validation at Epoch: 14
Best Val Acc: 0.6840, Best Val Loss: 1.0044

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  20%|█▉        | 195/1000 [00:02<00:09, 85.88it/s]



Training completed.
Best Validation at Epoch: 96
Best Val Acc: 0.6940, Best Val Loss: 0.9928

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 113/1000 [00:02<00:16, 52.80it/s]



Training completed.
Best Validation at Epoch: 14
Best Val Acc: 0.6920, Best Val Loss: 0.9998

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 113/1000 [00:02<00:20, 42.55it/s]


Training completed.
Best Validation at Epoch: 14
Best Val Acc: 0.6960, Best Val Loss: 0.9876

Number of Runs: 10
Average Accuracy: 0.6862, Std of Accuracy: 0.0178, Average Loss: 1.0079, Std of Loss: 0.0565





In [170]:
summarize(cora_models[0])
summarize(cite_models[0])

Model Summary: GAT


Total number of params: 92373
Model Summary: GAT


Total number of params: 237586


In [33]:
emb_dim1 = 8

In [162]:
# Define the GAT model
class GAT1(torch.nn.Module):
    # hidden channels will be the embedding dimension for each attention head
    # after applying the first GAT layer.
    def __init__(self, in_channels, hidden_channels,
                 num_heads, dropout_rate, num_classes):
        super().__init__()

        self.dropout_rate = dropout_rate

        self.hidden_channels = hidden_channels
        self.num_heads = num_heads

        self.conv1 = GATConv(in_channels, hidden_channels, heads=num_heads,
                                dropout=dropout_rate)
        self.conv2 = GATConv(hidden_channels*num_heads, hidden_channels,
                             heads=num_heads, dropout=dropout_rate)
        self.conv3 = GATConv(hidden_channels*num_heads, num_classes,
                                dropout=dropout_rate, concat=False)

    def forward(self, x, edge_index):
        out = F.dropout(x, p=self.dropout_rate, training=self.training)
        out = self.conv1(out, edge_index)
        assert out.shape[-1] == self.hidden_channels * self.num_heads
        out = F.elu(out)

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = self.conv2(out, edge_index)
        assert out.shape[-1] == self.hidden_channels * self.num_heads
        out = F.elu(out)

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = self.conv3(out, edge_index)
        return out

In [163]:
num_runs = 10
cora_models = create_models(GAT1, num_runs, cora_data)

cora_val_stats = val_stats(cora_models, cora_data, cora_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cora_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 115/1000 [00:01<00:11, 79.00it/s]



Training completed.
Best Validation at Epoch: 16
Best Val Acc: 0.8060, Best Val Loss: 0.6831

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 114/1000 [00:01<00:11, 80.38it/s]



Training completed.
Best Validation at Epoch: 15
Best Val Acc: 0.7980, Best Val Loss: 0.7350

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 115/1000 [00:01<00:10, 80.51it/s]



Training completed.
Best Validation at Epoch: 16
Best Val Acc: 0.8060, Best Val Loss: 0.6600

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 114/1000 [00:01<00:10, 81.45it/s]



Training completed.
Best Validation at Epoch: 15
Best Val Acc: 0.7980, Best Val Loss: 0.6993

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 115/1000 [00:01<00:12, 69.86it/s]



Training completed.
Best Validation at Epoch: 16
Best Val Acc: 0.8080, Best Val Loss: 0.6344

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 117/1000 [00:01<00:13, 63.20it/s]



Training completed.
Best Validation at Epoch: 18
Best Val Acc: 0.7960, Best Val Loss: 0.7173

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 115/1000 [00:01<00:12, 70.89it/s]



Training completed.
Best Validation at Epoch: 16
Best Val Acc: 0.7860, Best Val Loss: 0.6700

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 115/1000 [00:01<00:10, 81.15it/s]



Training completed.
Best Validation at Epoch: 16
Best Val Acc: 0.8020, Best Val Loss: 0.7030

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  15%|█▌        | 153/1000 [00:01<00:10, 81.71it/s]



Training completed.
Best Validation at Epoch: 54
Best Val Acc: 0.8060, Best Val Loss: 0.6670

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 113/1000 [00:01<00:10, 83.44it/s]


Training completed.
Best Validation at Epoch: 14
Best Val Acc: 0.7820, Best Val Loss: 0.7685

Number of Runs: 10
Average Accuracy: 0.7910, Std of Accuracy: 0.0113, Average Loss: 0.7179, Std of Loss: 0.0770





In [164]:
num_runs = 10
cite_models = create_models(GAT1, num_runs, cite_data)

cite_val_stats = val_stats(cite_models, cite_data, cite_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cite_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:11, 75.36it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.6840, Best Val Loss: 0.9787

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:11, 75.01it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.6960, Best Val Loss: 0.9329

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 109/1000 [00:01<00:11, 75.40it/s]



Training completed.
Best Validation at Epoch: 10
Best Val Acc: 0.6720, Best Val Loss: 0.9904

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 109/1000 [00:01<00:13, 67.79it/s]



Training completed.
Best Validation at Epoch: 10
Best Val Acc: 0.6980, Best Val Loss: 0.9232

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:15, 59.10it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.7140, Best Val Loss: 0.9808

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 109/1000 [00:01<00:13, 63.66it/s]



Training completed.
Best Validation at Epoch: 10
Best Val Acc: 0.7100, Best Val Loss: 0.9640

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:11, 76.53it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.6720, Best Val Loss: 1.0372

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:11, 75.79it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.7020, Best Val Loss: 0.9963

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 109/1000 [00:01<00:11, 75.69it/s]



Training completed.
Best Validation at Epoch: 10
Best Val Acc: 0.7020, Best Val Loss: 0.9458

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:11, 76.47it/s]


Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.6860, Best Val Loss: 1.0221

Number of Runs: 10
Average Accuracy: 0.6830, Std of Accuracy: 0.0175, Average Loss: 0.9771, Std of Loss: 0.0368





In [165]:
summarize(cora_models[0])
print()
summarize(cite_models[0])

Model Summary: GAT1


Total number of params: 96661

Model Summary: GAT1


Total number of params: 241874


In [158]:
# Define the GAT model
from torch.nn import TransformerEncoderLayer

# d_model = 64

class GAT2(torch.nn.Module):
    # hidden channels will be the embedding dimension for each attention head
    # after applying the first GAT layer.
    def __init__(self, in_channels, hidden_channels,
                 num_heads, dropout_rate, num_classes):
        super().__init__()

        self.dropout_rate = dropout_rate

        self.hidden_channels = hidden_channels
        self.num_heads = num_heads
        self.num_classes = num_classes

        self.conv1 = GATConv(in_channels, hidden_channels, heads=num_heads,
                                dropout=dropout_rate)

        self.encoder1 = TransformerEncoderLayer(
            d_model=hidden_channels*num_heads, nhead=num_heads,
            dropout=dropout_rate, batch_first=True)

        self.conv2 = GATConv(hidden_channels*num_heads, num_classes,
                                dropout=dropout_rate, concat=False)

    def forward(self, x, edge_index):
        out = F.dropout(x, p=self.dropout_rate, training=self.training)
        out = self.conv1(out, edge_index)
        assert out.shape[-1] == self.hidden_channels * self.num_heads
        out = F.elu(out)

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = out.unsqueeze(0)
        out = self.encoder1(out)
        out = out.squeeze(0)

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = self.conv2(out, edge_index)
        assert out.shape[-1] == self.num_classes

        return out

In [159]:
num_runs = 10
cora_models = create_models(GAT2, num_runs, cora_data)

cora_val_stats = val_stats(cora_models, cora_data, cora_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cora_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:03<00:30, 29.46it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.7680, Best Val Loss: 0.7504

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  18%|█▊        | 183/1000 [00:05<00:26, 31.14it/s]



Training completed.
Best Validation at Epoch: 84
Best Val Acc: 0.7700, Best Val Loss: 0.7882

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 121/1000 [00:03<00:27, 31.88it/s]



Training completed.
Best Validation at Epoch: 22
Best Val Acc: 0.7940, Best Val Loss: 0.7453

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 124/1000 [00:03<00:28, 31.17it/s]



Training completed.
Best Validation at Epoch: 25
Best Val Acc: 0.7980, Best Val Loss: 0.7535

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 127/1000 [00:04<00:29, 30.08it/s]



Training completed.
Best Validation at Epoch: 28
Best Val Acc: 0.7960, Best Val Loss: 0.6676

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 122/1000 [00:03<00:27, 31.48it/s]



Training completed.
Best Validation at Epoch: 23
Best Val Acc: 0.7660, Best Val Loss: 0.8496

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 120/1000 [00:03<00:27, 31.66it/s]



Training completed.
Best Validation at Epoch: 21
Best Val Acc: 0.7720, Best Val Loss: 0.7588

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 130/1000 [00:04<00:29, 29.56it/s]



Training completed.
Best Validation at Epoch: 31
Best Val Acc: 0.8040, Best Val Loss: 0.6514

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:03<00:28, 31.22it/s]



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.8040, Best Val Loss: 0.8021

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 130/1000 [00:04<00:27, 31.70it/s]


Training completed.
Best Validation at Epoch: 31
Best Val Acc: 0.7980, Best Val Loss: 0.7121

Number of Runs: 10
Average Accuracy: 0.7862, Std of Accuracy: 0.0171, Average Loss: 0.9622, Std of Loss: 0.2227





In [160]:
num_runs = 10
cite_models = create_models(GAT2, num_runs, cite_data)

cite_val_stats = val_stats(cite_models, cite_data, cite_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cite_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 111/1000 [00:05<00:41, 21.47it/s]



Training completed.
Best Validation at Epoch: 12
Best Val Acc: 0.6940, Best Val Loss: 1.1635

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 121/1000 [00:05<00:38, 22.70it/s]



Training completed.
Best Validation at Epoch: 22
Best Val Acc: 0.6900, Best Val Loss: 0.9786

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 113/1000 [00:05<00:40, 22.01it/s]



Training completed.
Best Validation at Epoch: 14
Best Val Acc: 0.7080, Best Val Loss: 0.8769

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 114/1000 [00:05<00:39, 22.22it/s]



Training completed.
Best Validation at Epoch: 15
Best Val Acc: 0.7060, Best Val Loss: 0.9190

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 113/1000 [00:05<00:39, 22.39it/s]



Training completed.
Best Validation at Epoch: 14
Best Val Acc: 0.6860, Best Val Loss: 1.1320

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 117/1000 [00:05<00:41, 21.46it/s]



Training completed.
Best Validation at Epoch: 18
Best Val Acc: 0.6680, Best Val Loss: 1.0572

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 117/1000 [00:05<00:39, 22.28it/s]



Training completed.
Best Validation at Epoch: 18
Best Val Acc: 0.6780, Best Val Loss: 1.0836

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 111/1000 [00:05<00:40, 22.18it/s]



Training completed.
Best Validation at Epoch: 12
Best Val Acc: 0.6780, Best Val Loss: 1.0518

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 114/1000 [00:05<00:40, 21.82it/s]



Training completed.
Best Validation at Epoch: 15
Best Val Acc: 0.6980, Best Val Loss: 0.9485

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 118/1000 [00:05<00:39, 22.52it/s]


Training completed.
Best Validation at Epoch: 19
Best Val Acc: 0.6740, Best Val Loss: 1.0578

Number of Runs: 10
Average Accuracy: 0.6876, Std of Accuracy: 0.0130, Average Loss: 1.1344, Std of Loss: 0.1639





In [161]:
summarize(cora_models[0])
summarize(cite_models[0])

Model Summary: GAT2


Total number of params: 373525
Model Summary: GAT2


Total number of params: 518738


In [50]:
print(num_heads)
print(emb_dim1)

8
8


In [153]:
from torch.nn import Linear

class GAT3(torch.nn.Module):
    # hidden channels will be the embedding dimension for each attention head
    # after applying the first GAT layer.
    def __init__(self, in_channels, hidden_channels,
                 num_heads, dropout_rate, num_classes):
        super().__init__()

        self.dropout_rate = dropout_rate

        self.hidden_channels = hidden_channels
        self.num_heads = num_heads
        self.num_classes = num_classes

        self.conv1 = GATConv(in_channels, hidden_channels, heads=num_heads,
                                dropout=dropout_rate)

        self.conv2 = GATConv(hidden_channels*num_heads, hidden_channels,
                             heads = num_heads,
                             dropout=dropout_rate)

        self.encoder1 = TransformerEncoderLayer(
            d_model=hidden_channels*num_heads, nhead=num_heads,
            dropout=0,
            batch_first=True)

        self.linear = Linear(hidden_channels*num_heads, num_classes)

    def forward(self, x, edge_index):
        out = F.dropout(x, p=self.dropout_rate, training=self.training)
        out = self.conv1(out, edge_index)
        assert out.shape[-1] == self.hidden_channels * self.num_heads
        out = F.elu(out)

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = self.conv2(out, edge_index)
        assert out.shape[-1] == self.hidden_channels * self.num_heads
        out = F.elu(out)

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = out.unsqueeze(0)
        out = self.encoder1(out)
        out = out.squeeze(0)

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = self.linear(out)
        assert out.shape[-1] == self.num_classes

        return out

In [157]:
num_runs = 10
cora_models = create_models(GAT3, num_runs, cora_data)

cora_val_stats = val_stats(cora_models, cora_data, cora_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cora_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:03<00:27, 32.78it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.7780, Best Val Loss: 0.7881

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  16%|█▌        | 159/1000 [00:04<00:26, 32.16it/s]



Training completed.
Best Validation at Epoch: 60
Best Val Acc: 0.7780, Best Val Loss: 0.7661

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:03<00:26, 33.58it/s]



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.7900, Best Val Loss: 0.6894

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 129/1000 [00:03<00:26, 33.27it/s]



Training completed.
Best Validation at Epoch: 30
Best Val Acc: 0.7900, Best Val Loss: 0.7591

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 117/1000 [00:03<00:27, 32.31it/s]



Training completed.
Best Validation at Epoch: 18
Best Val Acc: 0.7920, Best Val Loss: 0.7674

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:03<00:27, 32.69it/s]



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.8020, Best Val Loss: 0.6430

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 131/1000 [00:03<00:25, 34.16it/s]



Training completed.
Best Validation at Epoch: 32
Best Val Acc: 0.7980, Best Val Loss: 0.6805

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  15%|█▍        | 146/1000 [00:04<00:24, 34.23it/s]



Training completed.
Best Validation at Epoch: 47
Best Val Acc: 0.7920, Best Val Loss: 0.7750

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:03<00:27, 31.95it/s]



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.7780, Best Val Loss: 0.7249

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 114/1000 [00:03<00:26, 33.42it/s]


Training completed.
Best Validation at Epoch: 15
Best Val Acc: 0.7840, Best Val Loss: 0.7508

Number of Runs: 10
Average Accuracy: 0.7878, Std of Accuracy: 0.0088, Average Loss: 0.9079, Std of Loss: 0.1948





In [155]:
num_runs = 10
cite_models = create_models(GAT3, num_runs, cite_data)

cite_val_stats = val_stats(cite_models, cite_data, cite_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cite_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  20%|█▉        | 195/1000 [00:08<00:33, 24.34it/s]



Training completed.
Best Validation at Epoch: 96
Best Val Acc: 0.6760, Best Val Loss: 1.1152

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:04<00:36, 24.34it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.7280, Best Val Loss: 0.8998

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:04<00:36, 24.74it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.7000, Best Val Loss: 0.9714

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  21%|██▏       | 214/1000 [00:08<00:32, 24.36it/s]



Training completed.
Best Validation at Epoch: 115
Best Val Acc: 0.6820, Best Val Loss: 1.1127

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:04<00:35, 24.78it/s]



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.6680, Best Val Loss: 1.0450

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:04<00:36, 24.62it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.6840, Best Val Loss: 1.0414

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:04<00:37, 23.89it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.7140, Best Val Loss: 0.9713

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 113/1000 [00:04<00:35, 24.85it/s]



Training completed.
Best Validation at Epoch: 14
Best Val Acc: 0.6820, Best Val Loss: 1.0086

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 109/1000 [00:04<00:36, 24.50it/s]



Training completed.
Best Validation at Epoch: 10
Best Val Acc: 0.7200, Best Val Loss: 0.9439

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:04<00:37, 23.64it/s]



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.6680, Best Val Loss: 1.0113

Number of Runs: 10
Average Accuracy: 0.6918, Std of Accuracy: 0.0224, Average Loss: 1.2020, Std of Loss: 0.3678


In [156]:
summarize(cora_models[0])
print()
summarize(cite_models[0])

Model Summary: GAT3


Total number of params: 377799

Model Summary: GAT3


Total number of params: 523014


In [143]:
from torch_geometric.nn import TransformerConv

class GAT4(torch.nn.Module):
    # hidden channels will be the embedding dimension for each attention head
    # after applying the first GAT layer.
    def __init__(self, in_channels, hidden_channels,
                 num_heads, dropout_rate, num_classes):
        super().__init__()

        self.dropout_rate = dropout_rate

        self.hidden_channels = hidden_channels
        self.num_heads = num_heads
        self.num_classes = num_classes

        self.encoder1 = TransformerConv(in_channels, hidden_channels,
                                        heads=num_heads, dropout=dropout_rate)

        self.encoder2 = TransformerConv(hidden_channels*num_heads,
                                        num_classes, heads = num_heads,
                                        concat=False, dropout=dropout_rate)

    def forward(self, x, edge_index):
        out = F.dropout(x, p=self.dropout_rate, training=self.training)
        out = self.encoder1(out, edge_index)
        assert out.shape[-1] == self.hidden_channels * self.num_heads
        out = F.elu(out)

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = self.encoder2(out, edge_index)
        assert out.shape[-1] == self.num_classes

        return out

In [144]:
num_runs = 10
cora_models = create_models(GAT4, num_runs, cora_data)

cora_val_stats = val_stats(cora_models, cora_data, cora_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cora_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 119/1000 [00:01<00:08, 98.74it/s] 



Training completed.
Best Validation at Epoch: 20
Best Val Acc: 0.7780, Best Val Loss: 0.7264

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  20%|██        | 205/1000 [00:02<00:07, 99.61it/s] 



Training completed.
Best Validation at Epoch: 106
Best Val Acc: 0.7860, Best Val Loss: 0.7046

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  14%|█▍        | 141/1000 [00:01<00:08, 98.42it/s] 



Training completed.
Best Validation at Epoch: 42
Best Val Acc: 0.7760, Best Val Loss: 0.7238

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 132/1000 [00:01<00:08, 97.54it/s] 



Training completed.
Best Validation at Epoch: 33
Best Val Acc: 0.7720, Best Val Loss: 0.7090

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 120/1000 [00:01<00:11, 75.04it/s]



Training completed.
Best Validation at Epoch: 21
Best Val Acc: 0.7760, Best Val Loss: 0.6769

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 122/1000 [00:01<00:11, 75.35it/s]



Training completed.
Best Validation at Epoch: 23
Best Val Acc: 0.7740, Best Val Loss: 0.6962

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  30%|██▉       | 295/1000 [00:03<00:07, 94.19it/s] 



Training completed.
Best Validation at Epoch: 196
Best Val Acc: 0.7740, Best Val Loss: 0.7402

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▎        | 125/1000 [00:01<00:08, 98.64it/s] 



Training completed.
Best Validation at Epoch: 26
Best Val Acc: 0.7760, Best Val Loss: 0.7104

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  19%|█▊        | 187/1000 [00:01<00:08, 97.29it/s] 



Training completed.
Best Validation at Epoch: 88
Best Val Acc: 0.7700, Best Val Loss: 0.7201

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  26%|██▌       | 262/1000 [00:02<00:07, 99.45it/s] 


Training completed.
Best Validation at Epoch: 163
Best Val Acc: 0.7720, Best Val Loss: 0.7193

Number of Runs: 10
Average Accuracy: 0.7706, Std of Accuracy: 0.0054, Average Loss: 0.7231, Std of Loss: 0.0247





In [146]:
num_runs = 10
cite_models = create_models(GAT4, num_runs, cite_data)

cite_val_stats = val_stats(cite_models, cite_data, cite_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cite_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  45%|████▍     | 449/1000 [00:06<00:08, 66.39it/s]



Training completed.
Best Validation at Epoch: 350
Best Val Acc: 0.7060, Best Val Loss: 0.9626

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  38%|███▊      | 376/1000 [00:04<00:08, 76.84it/s]



Training completed.
Best Validation at Epoch: 277
Best Val Acc: 0.7000, Best Val Loss: 0.9575

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  16%|█▋        | 165/1000 [00:02<00:10, 80.08it/s]



Training completed.
Best Validation at Epoch: 66
Best Val Acc: 0.7040, Best Val Loss: 0.9225

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  24%|██▍       | 238/1000 [00:03<00:11, 66.81it/s]



Training completed.
Best Validation at Epoch: 139
Best Val Acc: 0.7020, Best Val Loss: 0.9487

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  24%|██▍       | 240/1000 [00:03<00:09, 76.33it/s]



Training completed.
Best Validation at Epoch: 141
Best Val Acc: 0.6960, Best Val Loss: 0.9275

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  30%|██▉       | 296/1000 [00:03<00:08, 81.15it/s]



Training completed.
Best Validation at Epoch: 197
Best Val Acc: 0.7000, Best Val Loss: 0.9540

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  34%|███▍      | 345/1000 [00:04<00:08, 79.81it/s]



Training completed.
Best Validation at Epoch: 246
Best Val Acc: 0.6980, Best Val Loss: 0.9520

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  32%|███▏      | 321/1000 [00:04<00:10, 64.79it/s]



Training completed.
Best Validation at Epoch: 222
Best Val Acc: 0.6920, Best Val Loss: 0.9706

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  28%|██▊       | 285/1000 [00:03<00:08, 83.75it/s]



Training completed.
Best Validation at Epoch: 186
Best Val Acc: 0.7000, Best Val Loss: 0.9562

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  33%|███▎      | 331/1000 [00:04<00:08, 79.38it/s]



Training completed.
Best Validation at Epoch: 232
Best Val Acc: 0.7100, Best Val Loss: 0.9510

Number of Runs: 10
Average Accuracy: 0.6960, Std of Accuracy: 0.0081, Average Loss: 0.9549, Std of Loss: 0.0173


In [91]:
summarize(cora_models[0])
print()
summarize(cite_models[0])

Model Summary: GAT4

encoder1.lin_key.weight torch.Size([64, 1433])
encoder1.lin_key.bias torch.Size([64])
encoder1.lin_query.weight torch.Size([64, 1433])
encoder1.lin_query.bias torch.Size([64])
encoder1.lin_value.weight torch.Size([64, 1433])
encoder1.lin_value.bias torch.Size([64])
encoder1.lin_skip.weight torch.Size([64, 1433])
encoder1.lin_skip.bias torch.Size([64])
encoder2.lin_key.weight torch.Size([56, 64])
encoder2.lin_key.bias torch.Size([56])
encoder2.lin_query.weight torch.Size([56, 64])
encoder2.lin_query.bias torch.Size([56])
encoder2.lin_value.weight torch.Size([56, 64])
encoder2.lin_value.bias torch.Size([56])
encoder2.lin_skip.weight torch.Size([7, 64])
encoder2.lin_skip.bias torch.Size([7])

Total number of params: 378479

Model Summary: GAT3

conv1.att_src torch.Size([1, 8, 8])
conv1.att_dst torch.Size([1, 8, 8])
conv1.bias torch.Size([64])
conv1.lin.weight torch.Size([64, 3703])
conv2.att_src torch.Size([1, 8, 8])
conv2.att_dst torch.Size([1, 8, 8])
conv2.bias torc

In [141]:
class GAT5(torch.nn.Module):
    # hidden channels will be the embedding dimension for each attention head
    # after applying the first GAT layer.
    def __init__(self, in_channels, hidden_channels,
                 num_heads, dropout_rate, num_classes):
        super().__init__()

        self.dropout_rate = dropout_rate

        self.hidden_channels = hidden_channels
        self.num_heads = num_heads
        self.num_classes = num_classes

        self.conv1 = GATConv(in_channels, hidden_channels, heads=num_heads,
                             dropout=dropout_rate)

        self.encoder1 = TransformerConv(hidden_channels*num_heads,
                                        hidden_channels,
                                        heads=num_heads, dropout=dropout_rate)

        self.conv2 = GATConv(hidden_channels*num_heads, num_classes,
                             dropout=dropout_rate, concat=False)

    def forward(self, x, edge_index):
        out = F.dropout(x, p=self.dropout_rate, training=self.training)
        out = self.conv1(out, edge_index)
        assert out.shape[-1] == self.hidden_channels * self.num_heads
        out = F.elu(out)

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = self.encoder1(out, edge_index)
        assert out.shape[-1] == self.hidden_channels * self.num_heads
        out = F.elu(out)

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = self.conv2(out, edge_index)
        assert out.shape[-1] == self.num_classes

        return out

In [142]:
num_runs = 10
cora_models = create_models(GAT5, num_runs, cora_data)

cora_val_stats = val_stats(cora_models, cora_data, cora_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cora_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:01<00:12, 73.56it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.7940, Best Val Loss: 0.6809

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:01<00:12, 73.62it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.7940, Best Val Loss: 0.6284

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:01<00:11, 74.23it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.7940, Best Val Loss: 0.7030

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  14%|█▍        | 143/1000 [00:01<00:11, 74.45it/s]



Training completed.
Best Validation at Epoch: 44
Best Val Acc: 0.7860, Best Val Loss: 0.7874

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:01<00:15, 57.96it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.7820, Best Val Loss: 0.6910

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  14%|█▍        | 138/1000 [00:02<00:15, 56.15it/s]



Training completed.
Best Validation at Epoch: 39
Best Val Acc: 0.8040, Best Val Loss: 0.6869

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 126/1000 [00:01<00:11, 73.55it/s]



Training completed.
Best Validation at Epoch: 27
Best Val Acc: 0.8280, Best Val Loss: 0.6253

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  15%|█▌        | 151/1000 [00:02<00:11, 75.15it/s]



Training completed.
Best Validation at Epoch: 52
Best Val Acc: 0.7820, Best Val Loss: 0.8265

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 114/1000 [00:01<00:11, 74.67it/s]



Training completed.
Best Validation at Epoch: 15
Best Val Acc: 0.7860, Best Val Loss: 0.6955

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 134/1000 [00:01<00:11, 75.29it/s]



Training completed.
Best Validation at Epoch: 35
Best Val Acc: 0.8060, Best Val Loss: 0.6955

Number of Runs: 10
Average Accuracy: 0.7926, Std of Accuracy: 0.0165, Average Loss: 0.8174, Std of Loss: 0.1897


In [136]:
num_runs = 10
cite_models = create_models(GAT5, num_runs, cite_data)

cite_val_stats = val_stats(cite_models, cite_data, cite_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cite_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:13, 67.03it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.7100, Best Val Loss: 0.9417

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:01<00:13, 68.63it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.7020, Best Val Loss: 0.9878

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:01<00:13, 63.97it/s]



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.6960, Best Val Loss: 0.9850

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:02<00:17, 51.00it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.6880, Best Val Loss: 0.9817

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:01<00:16, 55.52it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.6900, Best Val Loss: 0.9925

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  14%|█▎        | 137/1000 [00:02<00:12, 68.28it/s]



Training completed.
Best Validation at Epoch: 38
Best Val Acc: 0.6900, Best Val Loss: 0.9817

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:12, 69.57it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.7120, Best Val Loss: 0.9524

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:13, 66.87it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.6940, Best Val Loss: 1.0222

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:13, 68.29it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.7020, Best Val Loss: 0.9753

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 109/1000 [00:01<00:13, 68.14it/s]



Training completed.
Best Validation at Epoch: 10
Best Val Acc: 0.7100, Best Val Loss: 0.9561

Number of Runs: 10
Average Accuracy: 0.6860, Std of Accuracy: 0.0110, Average Loss: 1.0275, Std of Loss: 0.1400


In [138]:
summarize(cora_models[0])
print()
summarize(cite_models[0])

Model Summary: GAT5


Total number of params: 109013

Model Summary: GAT5


Total number of params: 254226


In [131]:
class GAT6(torch.nn.Module):
    # hidden channels will be the embedding dimension for each attention head
    # after applying the first GAT layer.
    def __init__(self, in_channels, hidden_channels,
                 num_heads, dropout_rate, num_classes):
        super().__init__()

        self.dropout_rate = dropout_rate

        self.hidden_channels = hidden_channels
        self.num_heads = num_heads
        self.num_classes = num_classes

        self.conv1 = GATConv(in_channels, hidden_channels, heads=num_heads,
                             dropout=dropout_rate)

        self.conv2 = GATConv(hidden_channels*num_heads, hidden_channels,
                             heads=num_heads, dropout=dropout_rate)

        self.encoder1 = TransformerConv(hidden_channels*num_heads,
                                        num_classes, heads=num_heads,
                                        concat=False, dropout=dropout_rate)

    def forward(self, x, edge_index):
        out = F.dropout(x, p=self.dropout_rate, training=self.training)
        out = self.conv1(out, edge_index)
        assert out.shape[-1] == self.hidden_channels * self.num_heads
        out = F.elu(out)

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = self.conv2(out, edge_index)
        assert out.shape[-1] == self.hidden_channels * self.num_heads
        out = F.elu(out)

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = self.encoder1(out, edge_index)
        assert out.shape[-1] == self.num_classes

        return out

In [132]:
num_runs = 10
cora_models = create_models(GAT6, num_runs, cora_data)

cora_val_stats = val_stats(cora_models, cora_data, cora_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cora_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  16%|█▌        | 157/1000 [00:02<00:13, 61.15it/s]



Training completed.
Best Validation at Epoch: 58
Best Val Acc: 0.7980, Best Val Loss: 0.6598

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:01<00:13, 67.64it/s]



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.7960, Best Val Loss: 0.6488

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 118/1000 [00:01<00:11, 78.38it/s]



Training completed.
Best Validation at Epoch: 19
Best Val Acc: 0.7960, Best Val Loss: 0.6840

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:01<00:11, 79.36it/s]



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.8040, Best Val Loss: 0.6506

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 118/1000 [00:01<00:11, 78.76it/s]



Training completed.
Best Validation at Epoch: 19
Best Val Acc: 0.7960, Best Val Loss: 0.6985

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  23%|██▎       | 230/1000 [00:02<00:09, 80.81it/s]



Training completed.
Best Validation at Epoch: 131
Best Val Acc: 0.7900, Best Val Loss: 0.7048

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:01<00:11, 78.85it/s]



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.8080, Best Val Loss: 0.6727

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 115/1000 [00:01<00:13, 64.42it/s]



Training completed.
Best Validation at Epoch: 16
Best Val Acc: 0.8040, Best Val Loss: 0.6972

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:01<00:14, 60.45it/s]



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.8120, Best Val Loss: 0.6415

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 117/1000 [00:01<00:12, 73.52it/s]


Training completed.
Best Validation at Epoch: 18
Best Val Acc: 0.7940, Best Val Loss: 0.6796

Number of Runs: 10
Average Accuracy: 0.7960, Std of Accuracy: 0.0059, Average Loss: 0.7374, Std of Loss: 0.1339





In [133]:
num_runs = 10
cite_models = create_models(GAT6, num_runs, cite_data)

cite_val_stats = val_stats(cite_models, cite_data, cite_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cite_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:01<00:15, 56.10it/s]



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.6900, Best Val Loss: 0.9654

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 111/1000 [00:01<00:15, 56.34it/s]



Training completed.
Best Validation at Epoch: 12
Best Val Acc: 0.6920, Best Val Loss: 0.9524

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:01<00:12, 72.44it/s]



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.6880, Best Val Loss: 0.9693

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:01<00:12, 73.29it/s]



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.7060, Best Val Loss: 0.9371

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 111/1000 [00:01<00:12, 72.70it/s]



Training completed.
Best Validation at Epoch: 12
Best Val Acc: 0.6860, Best Val Loss: 0.9735

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 111/1000 [00:01<00:12, 71.90it/s]



Training completed.
Best Validation at Epoch: 12
Best Val Acc: 0.6860, Best Val Loss: 0.9583

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 111/1000 [00:01<00:12, 72.44it/s]



Training completed.
Best Validation at Epoch: 12
Best Val Acc: 0.7000, Best Val Loss: 0.9820

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:01<00:12, 72.49it/s]



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.6840, Best Val Loss: 0.9897

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:01<00:14, 62.07it/s]



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.6900, Best Val Loss: 0.9647

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:01<00:15, 56.58it/s]


Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.7080, Best Val Loss: 0.9190

Number of Runs: 10
Average Accuracy: 0.6850, Std of Accuracy: 0.0089, Average Loss: 0.9612, Std of Loss: 0.0209





In [107]:
summarize(cora_models[0])
print()
summarize(cite_models[0])

Model Summary: GAT6


Total number of params: 107567

Model Summary: GAT6


Total number of params: 251222


In [122]:
from torch_geometric.nn import TransformerConv

class GAT7(torch.nn.Module):
    # hidden channels will be the embedding dimension for each attention head
    # after applying the first GAT layer.
    def __init__(self, in_channels, hidden_channels,
                 num_heads, dropout_rate, num_classes):
        super().__init__()

        self.dropout_rate = dropout_rate

        self.hidden_channels = hidden_channels
        self.num_heads = num_heads
        self.num_classes = num_classes

        self.encoder1 = TransformerConv(in_channels, hidden_channels,
                                        heads=num_heads, dropout=dropout_rate)

        self.encoder2 = TransformerConv(hidden_channels*num_heads,
                                        hidden_channels, heads = num_heads,
                                        dropout=dropout_rate)

        self.encoder3 = TransformerConv(hidden_channels*num_heads,
                                        num_classes, heads = num_heads,
                                        concat=False, dropout=dropout_rate)

    def forward(self, x, edge_index):
        out = F.dropout(x, p=self.dropout_rate, training=self.training)
        out = self.encoder1(out, edge_index)
        assert out.shape[-1] == self.hidden_channels * self.num_heads
        out = F.elu(out)

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = self.encoder2(out, edge_index)
        assert out.shape[-1] == self.hidden_channels * self.num_heads

        out = F.dropout(out, p=self.dropout_rate, training=self.training)
        out = self.encoder3(out, edge_index)
        assert out.shape[-1] == self.num_classes

        return out

In [123]:
num_runs = 10
cora_models = create_models(GAT7, num_runs, cora_data)

cora_val_stats = val_stats(cora_models, cora_data, cora_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cora_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  19%|█▉        | 194/1000 [00:03<00:14, 56.30it/s]



Training completed.
Best Validation at Epoch: 95
Best Val Acc: 0.7900, Best Val Loss: 0.7111

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  19%|█▉        | 193/1000 [00:02<00:11, 72.51it/s]



Training completed.
Best Validation at Epoch: 94
Best Val Acc: 0.7920, Best Val Loss: 0.6865

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:01<00:12, 73.78it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.7960, Best Val Loss: 0.6279

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 114/1000 [00:01<00:11, 75.02it/s]



Training completed.
Best Validation at Epoch: 15
Best Val Acc: 0.7900, Best Val Loss: 0.6557

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:01<00:12, 72.99it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.7840, Best Val Loss: 0.7129

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 129/1000 [00:01<00:11, 74.74it/s]



Training completed.
Best Validation at Epoch: 30
Best Val Acc: 0.8000, Best Val Loss: 0.6270

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:01<00:12, 72.29it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.7880, Best Val Loss: 0.6587

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  26%|██▌       | 256/1000 [00:04<00:12, 59.19it/s]



Training completed.
Best Validation at Epoch: 157
Best Val Acc: 0.7920, Best Val Loss: 0.7406

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 111/1000 [00:01<00:11, 74.19it/s]



Training completed.
Best Validation at Epoch: 12
Best Val Acc: 0.7920, Best Val Loss: 0.6682

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:01<00:11, 74.87it/s]



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.7880, Best Val Loss: 0.7207

Number of Runs: 10
Average Accuracy: 0.7900, Std of Accuracy: 0.0065, Average Loss: 0.7636, Std of Loss: 0.1257


In [124]:
num_runs = 10
cite_models = create_models(GAT7, num_runs, cite_data)

cite_val_stats = val_stats(cite_models, cite_data, cite_data.val_mask,
                           loss_fn)
avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = cite_val_stats
print(f"Number of Runs: {len(acc_tensor)}")
print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:14, 62.61it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.6920, Best Val Loss: 0.9792

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:01<00:14, 62.72it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.6780, Best Val Loss: 0.9846

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:01<00:12, 71.50it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.6940, Best Val Loss: 1.0105

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  20%|██        | 200/1000 [00:02<00:10, 74.34it/s]



Training completed.
Best Validation at Epoch: 101
Best Val Acc: 0.7040, Best Val Loss: 0.9812

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:12, 70.38it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.6980, Best Val Loss: 0.9519

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:16, 55.11it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.6940, Best Val Loss: 0.9896

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:01<00:15, 56.10it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.7120, Best Val Loss: 0.9874

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:12, 70.76it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.6960, Best Val Loss: 0.9407

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 109/1000 [00:01<00:12, 70.77it/s]



Training completed.
Best Validation at Epoch: 10
Best Val Acc: 0.6940, Best Val Loss: 0.9500

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:01<00:12, 72.35it/s]


Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.6780, Best Val Loss: 0.9665

Number of Runs: 10
Average Accuracy: 0.6844, Std of Accuracy: 0.0119, Average Loss: 0.9856, Std of Loss: 0.0393





In [125]:
summarize(cora_models[0])
print()
summarize(cite_models[0])

Model Summary: GAT7


Total number of params: 395119

Model Summary: GAT7


Total number of params: 974614


In [208]:
def test_model(Model_Class, data_name, mask, num_runs=10):
    global cora_models
    global cite_models
    if data_name == "cora":
        cora_models = create_models(Model_Class, num_runs, cora_data)
        data_val_stats = val_stats(cora_models, cora_data, mask, loss_fn)

    elif data_name == "cite":
        cite_models = create_models(Model_Class, num_runs, cite_data)
        data_val_stats = val_stats(cite_models, cite_data, mask, loss_fn)
    else:
        print("Wrong data set name")
        return

    avg_acc, std_acc, avg_loss, std_loss, acc_tensor, loss_tensor = data_val_stats
    print(f"Number of Runs: {len(acc_tensor)}")
    print(f"Average Accuracy: {avg_acc:.4f}, Std of Accuracy: {std_acc:.4f}, Average Loss: {avg_loss:.4f}, Std of Loss: {std_loss:.4f}")

In [198]:
test_model(GAT, "cora", cora_data.val_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:09, 90.13it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.7900, Best Val Loss: 1.1219

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  14%|█▍        | 143/1000 [00:01<00:09, 86.15it/s]



Training completed.
Best Validation at Epoch: 44
Best Val Acc: 0.8020, Best Val Loss: 0.6557

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  14%|█▎        | 137/1000 [00:01<00:09, 92.14it/s] 



Training completed.
Best Validation at Epoch: 38
Best Val Acc: 0.8080, Best Val Loss: 0.6470

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  21%|██        | 212/1000 [00:01<00:06, 112.80it/s]



Training completed.
Best Validation at Epoch: 113
Best Val Acc: 0.7940, Best Val Loss: 0.6792

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:00<00:07, 112.52it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.7960, Best Val Loss: 1.3974

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 118/1000 [00:01<00:07, 111.69it/s]



Training completed.
Best Validation at Epoch: 19
Best Val Acc: 0.8080, Best Val Loss: 0.7273

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 118/1000 [00:01<00:08, 110.04it/s]



Training completed.
Best Validation at Epoch: 19
Best Val Acc: 0.7980, Best Val Loss: 0.7394

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  20%|██        | 202/1000 [00:01<00:07, 113.26it/s]



Training completed.
Best Validation at Epoch: 103
Best Val Acc: 0.7880, Best Val Loss: 0.7538

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 111/1000 [00:01<00:08, 110.12it/s]



Training completed.
Best Validation at Epoch: 12
Best Val Acc: 0.7900, Best Val Loss: 0.9625

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 113/1000 [00:01<00:08, 110.18it/s]



Training completed.
Best Validation at Epoch: 14
Best Val Acc: 0.7860, Best Val Loss: 0.8830

Number of Runs: 10
Average Accuracy: 0.7960, Std of Accuracy: 0.0079, Average Loss: 0.8573, Std of Loss: 0.2426


In [199]:
test_model(GAT, "cite", cite_data.val_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 105/1000 [00:01<00:09, 98.21it/s] 



Training completed.
Best Validation at Epoch: 6
Best Val Acc: 0.6920, Best Val Loss: 1.1589

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:08, 99.81it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.7080, Best Val Loss: 1.2249

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 103/1000 [00:01<00:08, 100.94it/s]



Training completed.
Best Validation at Epoch: 4
Best Val Acc: 0.6880, Best Val Loss: 1.3282

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 103/1000 [00:01<00:10, 85.22it/s]



Training completed.
Best Validation at Epoch: 4
Best Val Acc: 0.7000, Best Val Loss: 1.3091

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  14%|█▍        | 145/1000 [00:01<00:10, 79.79it/s]



Training completed.
Best Validation at Epoch: 46
Best Val Acc: 0.6920, Best Val Loss: 1.0362

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:10, 81.84it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.7160, Best Val Loss: 1.2424

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:08, 100.13it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.7000, Best Val Loss: 1.2483

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:08, 100.54it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.7000, Best Val Loss: 1.2413

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  31%|███▏      | 314/1000 [00:03<00:06, 101.62it/s]



Training completed.
Best Validation at Epoch: 215
Best Val Acc: 0.7020, Best Val Loss: 1.0528

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 105/1000 [00:01<00:08, 102.76it/s]



Training completed.
Best Validation at Epoch: 6
Best Val Acc: 0.7000, Best Val Loss: 1.1886

Number of Runs: 10
Average Accuracy: 0.6998, Std of Accuracy: 0.0081, Average Loss: 1.2140, Std of Loss: 0.0836


In [200]:
summarize(cora_models[0])
summarize(cite_models[0])

Model Summary: GAT


Total number of params: 92373
Model Summary: GAT


Total number of params: 237586


In [226]:
test_model(GAT, "cora", cora_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 130/1000 [00:01<00:07, 108.99it/s]



Training completed.
Best Validation at Epoch: 31
Best Val Acc: 0.8000, Best Val Loss: 0.6646

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 117/1000 [00:01<00:07, 111.19it/s]



Training completed.
Best Validation at Epoch: 18
Best Val Acc: 0.7940, Best Val Loss: 0.7755

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 119/1000 [00:01<00:08, 109.43it/s]



Training completed.
Best Validation at Epoch: 20
Best Val Acc: 0.7940, Best Val Loss: 0.7848

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  17%|█▋        | 168/1000 [00:01<00:07, 112.36it/s]



Training completed.
Best Validation at Epoch: 69
Best Val Acc: 0.7860, Best Val Loss: 0.7277

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:01<00:07, 112.96it/s]



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.8140, Best Val Loss: 0.7684

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:01<00:10, 86.41it/s]



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.8020, Best Val Loss: 0.8139

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:01<00:10, 83.08it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.7900, Best Val Loss: 1.1864

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:01<00:11, 78.02it/s]



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.7900, Best Val Loss: 0.8267

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:01<00:09, 91.47it/s]



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.7900, Best Val Loss: 1.0608

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  18%|█▊        | 181/1000 [00:01<00:08, 98.21it/s] 


Training completed.
Best Validation at Epoch: 82
Best Val Acc: 0.7920, Best Val Loss: 0.7232

Number of Runs: 10
Average Accuracy: 0.7934, Std of Accuracy: 0.0113, Average Loss: 0.7761, Std of Loss: 0.1773





In [227]:
test_model(GAT, "cite", cite_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 105/1000 [00:01<00:09, 97.32it/s] 



Training completed.
Best Validation at Epoch: 6
Best Val Acc: 0.6960, Best Val Loss: 1.1721

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  25%|██▍       | 248/1000 [00:02<00:07, 100.34it/s]



Training completed.
Best Validation at Epoch: 149
Best Val Acc: 0.6960, Best Val Loss: 1.0649

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  18%|█▊        | 177/1000 [00:01<00:08, 97.33it/s]



Training completed.
Best Validation at Epoch: 78
Best Val Acc: 0.7000, Best Val Loss: 1.0516

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  34%|███▎      | 336/1000 [00:04<00:07, 83.63it/s]



Training completed.
Best Validation at Epoch: 237
Best Val Acc: 0.7000, Best Val Loss: 1.0297

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 126/1000 [00:01<00:08, 98.97it/s] 



Training completed.
Best Validation at Epoch: 27
Best Val Acc: 0.7020, Best Val Loss: 0.9423

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  17%|█▋        | 168/1000 [00:01<00:08, 102.43it/s]



Training completed.
Best Validation at Epoch: 69
Best Val Acc: 0.6980, Best Val Loss: 1.0355

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:01<00:08, 99.63it/s] 



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.7080, Best Val Loss: 1.0710

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  16%|█▋        | 165/1000 [00:01<00:08, 99.62it/s] 



Training completed.
Best Validation at Epoch: 66
Best Val Acc: 0.7140, Best Val Loss: 0.9423

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:01<00:09, 97.13it/s] 



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.6920, Best Val Loss: 0.9975

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 105/1000 [00:01<00:08, 101.06it/s]



Training completed.
Best Validation at Epoch: 6
Best Val Acc: 0.6940, Best Val Loss: 1.1756

Number of Runs: 10
Average Accuracy: 0.6828, Std of Accuracy: 0.0056, Average Loss: 1.0773, Std of Loss: 0.0785


In [228]:
summarize(cora_models[0])
summarize(cite_models[0])

Model Summary: GAT


Total number of params: 92373
Model Summary: GAT


Total number of params: 237586


In [229]:
test_model(GAT1, "cora", cora_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 111/1000 [00:01<00:14, 63.42it/s]



Training completed.
Best Validation at Epoch: 12
Best Val Acc: 0.7780, Best Val Loss: 0.8309

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 109/1000 [00:01<00:13, 66.89it/s]



Training completed.
Best Validation at Epoch: 10
Best Val Acc: 0.8040, Best Val Loss: 0.8858

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:10, 83.53it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.7980, Best Val Loss: 0.9372

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 114/1000 [00:01<00:10, 82.48it/s]



Training completed.
Best Validation at Epoch: 15
Best Val Acc: 0.8180, Best Val Loss: 0.6719

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 118/1000 [00:01<00:10, 81.93it/s]



Training completed.
Best Validation at Epoch: 19
Best Val Acc: 0.8060, Best Val Loss: 0.6819

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:01<00:10, 82.85it/s]



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.8100, Best Val Loss: 0.6465

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:10, 84.07it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.8140, Best Val Loss: 1.3548

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  16%|█▌        | 162/1000 [00:01<00:10, 82.27it/s]



Training completed.
Best Validation at Epoch: 63
Best Val Acc: 0.8080, Best Val Loss: 0.6537

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:01<00:12, 71.21it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.8100, Best Val Loss: 0.7044

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  20%|█▉        | 197/1000 [00:03<00:12, 64.73it/s]


Training completed.
Best Validation at Epoch: 98
Best Val Acc: 0.7940, Best Val Loss: 0.9236

Number of Runs: 10
Average Accuracy: 0.7957, Std of Accuracy: 0.0097, Average Loss: 0.7957, Std of Loss: 0.2163





In [230]:
test_model(GAT1, "cite", cite_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:11, 75.49it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.7080, Best Val Loss: 1.2120

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:11, 76.83it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.6960, Best Val Loss: 1.2292

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 109/1000 [00:01<00:13, 63.81it/s]



Training completed.
Best Validation at Epoch: 10
Best Val Acc: 0.6880, Best Val Loss: 0.9428

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:15, 59.32it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.6860, Best Val Loss: 1.1899

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:13, 67.86it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.7020, Best Val Loss: 1.1934

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:01<00:11, 75.94it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.6760, Best Val Loss: 1.0158

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 105/1000 [00:01<00:11, 75.86it/s]



Training completed.
Best Validation at Epoch: 6
Best Val Acc: 0.6860, Best Val Loss: 1.1152

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:11, 76.11it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.7100, Best Val Loss: 1.1949

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 106/1000 [00:01<00:11, 76.74it/s]



Training completed.
Best Validation at Epoch: 7
Best Val Acc: 0.6860, Best Val Loss: 1.0498

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:01<00:11, 76.03it/s]


Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.6880, Best Val Loss: 1.0013

Number of Runs: 10
Average Accuracy: 0.6799, Std of Accuracy: 0.0086, Average Loss: 1.1229, Std of Loss: 0.0988





In [231]:
summarize(cora_models[0])
summarize(cite_models[0])

Model Summary: GAT1


Total number of params: 96661
Model Summary: GAT1


Total number of params: 241874


In [232]:
test_model(GAT2, "cora", cora_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  25%|██▍       | 248/1000 [00:08<00:24, 30.65it/s]



Training completed.
Best Validation at Epoch: 149
Best Val Acc: 0.7620, Best Val Loss: 0.9221

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:03<00:28, 31.73it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.7880, Best Val Loss: 0.7028

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  15%|█▌        | 154/1000 [00:05<00:28, 29.94it/s]



Training completed.
Best Validation at Epoch: 55
Best Val Acc: 0.7760, Best Val Loss: 0.8518

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 113/1000 [00:03<00:28, 31.35it/s]



Training completed.
Best Validation at Epoch: 14
Best Val Acc: 0.7960, Best Val Loss: 0.6928

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  15%|█▌        | 150/1000 [00:04<00:26, 31.83it/s]



Training completed.
Best Validation at Epoch: 51
Best Val Acc: 0.7760, Best Val Loss: 0.8251

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  14%|█▎        | 136/1000 [00:04<00:28, 30.47it/s]



Training completed.
Best Validation at Epoch: 37
Best Val Acc: 0.8020, Best Val Loss: 0.7381

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 120/1000 [00:03<00:28, 30.92it/s]



Training completed.
Best Validation at Epoch: 21
Best Val Acc: 0.7740, Best Val Loss: 0.8171

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  16%|█▌        | 156/1000 [00:04<00:26, 31.95it/s]



Training completed.
Best Validation at Epoch: 57
Best Val Acc: 0.8220, Best Val Loss: 0.6685

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▎        | 125/1000 [00:04<00:28, 31.21it/s]



Training completed.
Best Validation at Epoch: 26
Best Val Acc: 0.7960, Best Val Loss: 0.7377

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 115/1000 [00:03<00:28, 30.70it/s]


Training completed.
Best Validation at Epoch: 16
Best Val Acc: 0.7920, Best Val Loss: 0.6677

Number of Runs: 10
Average Accuracy: 0.7923, Std of Accuracy: 0.0144, Average Loss: 0.9048, Std of Loss: 0.2676





In [233]:
test_model(GAT2, "cite", cite_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 113/1000 [00:05<00:39, 22.24it/s]



Training completed.
Best Validation at Epoch: 14
Best Val Acc: 0.7140, Best Val Loss: 0.8628

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 118/1000 [00:05<00:38, 22.66it/s]



Training completed.
Best Validation at Epoch: 19
Best Val Acc: 0.7140, Best Val Loss: 1.0246

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 119/1000 [00:05<00:40, 22.01it/s]



Training completed.
Best Validation at Epoch: 20
Best Val Acc: 0.6680, Best Val Loss: 1.1773

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 131/1000 [00:05<00:38, 22.56it/s]



Training completed.
Best Validation at Epoch: 32
Best Val Acc: 0.6720, Best Val Loss: 1.1294

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 131/1000 [00:05<00:39, 21.84it/s]



Training completed.
Best Validation at Epoch: 32
Best Val Acc: 0.6680, Best Val Loss: 1.0619

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:04<00:39, 22.37it/s]



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.7040, Best Val Loss: 1.0132

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 123/1000 [00:05<00:39, 22.02it/s]



Training completed.
Best Validation at Epoch: 24
Best Val Acc: 0.6600, Best Val Loss: 1.0555

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 115/1000 [00:05<00:41, 21.52it/s]



Training completed.
Best Validation at Epoch: 16
Best Val Acc: 0.6560, Best Val Loss: 1.0558

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  20%|██        | 202/1000 [00:09<00:35, 22.27it/s]



Training completed.
Best Validation at Epoch: 103
Best Val Acc: 0.6820, Best Val Loss: 1.1574

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 113/1000 [00:05<00:40, 21.94it/s]



Training completed.
Best Validation at Epoch: 14
Best Val Acc: 0.6880, Best Val Loss: 0.9874

Number of Runs: 10
Average Accuracy: 0.6774, Std of Accuracy: 0.0129, Average Loss: 1.3398, Std of Loss: 0.3925


In [234]:
test_model(GAT3, "cora", cora_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  15%|█▍        | 146/1000 [00:04<00:25, 34.14it/s]



Training completed.
Best Validation at Epoch: 47
Best Val Acc: 0.8100, Best Val Loss: 0.7617

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 114/1000 [00:03<00:25, 34.78it/s]



Training completed.
Best Validation at Epoch: 15
Best Val Acc: 0.8080, Best Val Loss: 0.7144

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 115/1000 [00:03<00:27, 32.26it/s]



Training completed.
Best Validation at Epoch: 16
Best Val Acc: 0.7980, Best Val Loss: 0.7439

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  19%|█▊        | 187/1000 [00:05<00:23, 34.64it/s]



Training completed.
Best Validation at Epoch: 88
Best Val Acc: 0.7940, Best Val Loss: 0.7181

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 130/1000 [00:03<00:25, 34.38it/s]



Training completed.
Best Validation at Epoch: 31
Best Val Acc: 0.7760, Best Val Loss: 0.8024

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:03<00:27, 32.47it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.7980, Best Val Loss: 0.7070

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 121/1000 [00:03<00:26, 33.26it/s]



Training completed.
Best Validation at Epoch: 22
Best Val Acc: 0.7840, Best Val Loss: 0.7546

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 132/1000 [00:03<00:25, 34.23it/s]



Training completed.
Best Validation at Epoch: 33
Best Val Acc: 0.7840, Best Val Loss: 0.6495

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:03<00:26, 34.12it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.7900, Best Val Loss: 0.7214

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 111/1000 [00:03<00:27, 32.25it/s]



Training completed.
Best Validation at Epoch: 12
Best Val Acc: 0.7860, Best Val Loss: 0.7661

Number of Runs: 10
Average Accuracy: 0.7969, Std of Accuracy: 0.0097, Average Loss: 0.8504, Std of Loss: 0.1993


In [235]:
test_model(GAT3, "cite", cite_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 109/1000 [00:04<00:36, 24.33it/s]



Training completed.
Best Validation at Epoch: 10
Best Val Acc: 0.6920, Best Val Loss: 1.0395

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:04<00:36, 24.69it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.6860, Best Val Loss: 0.9989

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  14%|█▍        | 139/1000 [00:05<00:35, 24.16it/s]



Training completed.
Best Validation at Epoch: 40
Best Val Acc: 0.6700, Best Val Loss: 1.1025

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:04<00:36, 24.69it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.6820, Best Val Loss: 1.0174

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  20%|█▉        | 195/1000 [00:08<00:33, 24.11it/s]



Training completed.
Best Validation at Epoch: 96
Best Val Acc: 0.6740, Best Val Loss: 1.2117

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 126/1000 [00:05<00:35, 24.49it/s]



Training completed.
Best Validation at Epoch: 27
Best Val Acc: 0.6860, Best Val Loss: 1.0443

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 113/1000 [00:04<00:36, 24.19it/s]



Training completed.
Best Validation at Epoch: 14
Best Val Acc: 0.6880, Best Val Loss: 1.0524

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:04<00:37, 23.82it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.6980, Best Val Loss: 1.0415

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  16%|█▋        | 164/1000 [00:06<00:33, 24.76it/s]



Training completed.
Best Validation at Epoch: 65
Best Val Acc: 0.6840, Best Val Loss: 1.0475

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 115/1000 [00:04<00:37, 23.87it/s]


Training completed.
Best Validation at Epoch: 16
Best Val Acc: 0.6740, Best Val Loss: 1.0932

Number of Runs: 10
Average Accuracy: 0.6740, Std of Accuracy: 0.0151, Average Loss: 1.3815, Std of Loss: 0.4053





In [236]:
test_model(GAT4, "cora", cora_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▎        | 125/1000 [00:01<00:09, 95.61it/s]



Training completed.
Best Validation at Epoch: 26
Best Val Acc: 0.7780, Best Val Loss: 0.7529

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 127/1000 [00:01<00:11, 78.35it/s]



Training completed.
Best Validation at Epoch: 28
Best Val Acc: 0.7740, Best Val Loss: 0.7412

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  14%|█▍        | 141/1000 [00:01<00:11, 77.75it/s]



Training completed.
Best Validation at Epoch: 42
Best Val Acc: 0.7740, Best Val Loss: 0.7210

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  14%|█▍        | 142/1000 [00:01<00:08, 98.86it/s] 



Training completed.
Best Validation at Epoch: 43
Best Val Acc: 0.7780, Best Val Loss: 0.7142

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  27%|██▋       | 267/1000 [00:02<00:07, 104.50it/s]



Training completed.
Best Validation at Epoch: 168
Best Val Acc: 0.7700, Best Val Loss: 0.7481

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  15%|█▌        | 154/1000 [00:01<00:08, 101.68it/s]



Training completed.
Best Validation at Epoch: 55
Best Val Acc: 0.7700, Best Val Loss: 0.7405

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:01<00:08, 98.94it/s] 



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.7660, Best Val Loss: 0.7479

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 118/1000 [00:01<00:08, 105.47it/s]



Training completed.
Best Validation at Epoch: 19
Best Val Acc: 0.7760, Best Val Loss: 0.6965

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 109/1000 [00:01<00:08, 102.15it/s]



Training completed.
Best Validation at Epoch: 10
Best Val Acc: 0.7720, Best Val Loss: 0.9949

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 119/1000 [00:01<00:08, 101.83it/s]


Training completed.
Best Validation at Epoch: 20
Best Val Acc: 0.7800, Best Val Loss: 0.6935

Number of Runs: 10
Average Accuracy: 0.7899, Std of Accuracy: 0.0047, Average Loss: 0.6821, Std of Loss: 0.1000





In [237]:
test_model(GAT4, "cite", cite_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  16%|█▋        | 163/1000 [00:02<00:10, 80.98it/s]



Training completed.
Best Validation at Epoch: 64
Best Val Acc: 0.6940, Best Val Loss: 0.9740

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:10, 86.08it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.6900, Best Val Loss: 1.3102

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  23%|██▎       | 226/1000 [00:02<00:09, 77.74it/s]



Training completed.
Best Validation at Epoch: 127
Best Val Acc: 0.6960, Best Val Loss: 0.9594

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  17%|█▋        | 171/1000 [00:02<00:11, 72.35it/s]



Training completed.
Best Validation at Epoch: 72
Best Val Acc: 0.6940, Best Val Loss: 0.9694

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  20%|██        | 201/1000 [00:02<00:09, 84.51it/s]



Training completed.
Best Validation at Epoch: 102
Best Val Acc: 0.7000, Best Val Loss: 0.9660

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  29%|██▉       | 289/1000 [00:03<00:08, 86.07it/s]



Training completed.
Best Validation at Epoch: 190
Best Val Acc: 0.6980, Best Val Loss: 0.9650

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 106/1000 [00:01<00:10, 84.48it/s]



Training completed.
Best Validation at Epoch: 7
Best Val Acc: 0.7160, Best Val Loss: 1.1409

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  23%|██▎       | 229/1000 [00:02<00:09, 84.84it/s]



Training completed.
Best Validation at Epoch: 130
Best Val Acc: 0.7000, Best Val Loss: 0.9709

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 105/1000 [00:01<00:12, 73.64it/s]



Training completed.
Best Validation at Epoch: 6
Best Val Acc: 0.6960, Best Val Loss: 1.2258

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:01<00:12, 72.43it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.6960, Best Val Loss: 1.0149

Number of Runs: 10
Average Accuracy: 0.6835, Std of Accuracy: 0.0082, Average Loss: 1.0380, Std of Loss: 0.1396


In [238]:
test_model(GAT5, "cora", cora_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 117/1000 [00:01<00:12, 72.82it/s]



Training completed.
Best Validation at Epoch: 18
Best Val Acc: 0.7960, Best Val Loss: 0.6336

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 106/1000 [00:01<00:12, 73.76it/s]



Training completed.
Best Validation at Epoch: 7
Best Val Acc: 0.7860, Best Val Loss: 1.0722

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:01<00:12, 72.15it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.8000, Best Val Loss: 0.6631

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:12, 74.25it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.7700, Best Val Loss: 0.8681

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 113/1000 [00:01<00:12, 72.26it/s]



Training completed.
Best Validation at Epoch: 14
Best Val Acc: 0.7940, Best Val Loss: 0.6563

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:01<00:14, 62.41it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.7920, Best Val Loss: 0.6827

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:01<00:15, 55.81it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.7620, Best Val Loss: 1.0123

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  17%|█▋        | 168/1000 [00:02<00:12, 65.62it/s]



Training completed.
Best Validation at Epoch: 69
Best Val Acc: 0.7940, Best Val Loss: 0.6561

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:01<00:12, 71.97it/s]



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.8080, Best Val Loss: 0.7437

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  13%|█▎        | 131/1000 [00:01<00:11, 73.41it/s]



Training completed.
Best Validation at Epoch: 32
Best Val Acc: 0.7760, Best Val Loss: 0.9737

Number of Runs: 10
Average Accuracy: 0.7984, Std of Accuracy: 0.0132, Average Loss: 0.7533, Std of Loss: 0.1671


In [239]:
test_model(GAT5, "cite", cite_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 106/1000 [00:01<00:14, 63.68it/s]



Training completed.
Best Validation at Epoch: 7
Best Val Acc: 0.7160, Best Val Loss: 0.9849

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:16, 52.88it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.6940, Best Val Loss: 1.1836

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:02<00:17, 51.37it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.6780, Best Val Loss: 1.0134

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 103/1000 [00:01<00:13, 66.90it/s]



Training completed.
Best Validation at Epoch: 4
Best Val Acc: 0.7060, Best Val Loss: 1.3012

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:13, 67.39it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.6760, Best Val Loss: 1.2051

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 106/1000 [00:01<00:13, 68.50it/s]



Training completed.
Best Validation at Epoch: 7
Best Val Acc: 0.6900, Best Val Loss: 1.0367

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 105/1000 [00:01<00:13, 67.97it/s]



Training completed.
Best Validation at Epoch: 6
Best Val Acc: 0.6760, Best Val Loss: 1.2042

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 105/1000 [00:01<00:12, 69.20it/s]



Training completed.
Best Validation at Epoch: 6
Best Val Acc: 0.7000, Best Val Loss: 1.0767

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 106/1000 [00:01<00:13, 68.35it/s]



Training completed.
Best Validation at Epoch: 7
Best Val Acc: 0.6980, Best Val Loss: 1.0129

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:16, 55.88it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.6880, Best Val Loss: 1.1843

Number of Runs: 10
Average Accuracy: 0.6788, Std of Accuracy: 0.0141, Average Loss: 1.1255, Std of Loss: 0.1076


In [240]:
summarize(cora_models[0])
summarize(cite_models[0])

Model Summary: GAT5


Total number of params: 109013
Model Summary: GAT5


Total number of params: 254226


In [241]:
test_model(GAT6, "cora", cora_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 115/1000 [00:01<00:11, 77.90it/s]



Training completed.
Best Validation at Epoch: 16
Best Val Acc: 0.8100, Best Val Loss: 0.6318

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 113/1000 [00:01<00:11, 78.44it/s]



Training completed.
Best Validation at Epoch: 14
Best Val Acc: 0.8020, Best Val Loss: 0.6845

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:01<00:11, 80.12it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.8100, Best Val Loss: 0.7370

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 120/1000 [00:01<00:14, 61.96it/s]



Training completed.
Best Validation at Epoch: 21
Best Val Acc: 0.8000, Best Val Loss: 0.7014

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 118/1000 [00:01<00:14, 59.62it/s]



Training completed.
Best Validation at Epoch: 19
Best Val Acc: 0.8060, Best Val Loss: 0.6468

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 116/1000 [00:01<00:11, 77.17it/s]



Training completed.
Best Validation at Epoch: 17
Best Val Acc: 0.7940, Best Val Loss: 0.6862

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:01<00:11, 78.09it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.8080, Best Val Loss: 0.7439

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 112/1000 [00:01<00:11, 78.50it/s]



Training completed.
Best Validation at Epoch: 13
Best Val Acc: 0.8080, Best Val Loss: 0.7444

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 117/1000 [00:01<00:11, 79.37it/s]



Training completed.
Best Validation at Epoch: 18
Best Val Acc: 0.8000, Best Val Loss: 0.6308

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█▏        | 114/1000 [00:01<00:11, 79.82it/s]


Training completed.
Best Validation at Epoch: 15
Best Val Acc: 0.7880, Best Val Loss: 0.6952

Number of Runs: 10
Average Accuracy: 0.8091, Std of Accuracy: 0.0096, Average Loss: 0.6252, Std of Loss: 0.0598





In [242]:
test_model(GAT6, "cite", cite_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:01<00:12, 73.44it/s]



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.6960, Best Val Loss: 0.9252

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:12, 70.90it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.6880, Best Val Loss: 1.0749

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:14, 60.12it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.6980, Best Val Loss: 1.0393

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 106/1000 [00:01<00:15, 57.11it/s]



Training completed.
Best Validation at Epoch: 7
Best Val Acc: 0.7160, Best Val Loss: 1.2456

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 106/1000 [00:01<00:13, 66.88it/s]



Training completed.
Best Validation at Epoch: 7
Best Val Acc: 0.7040, Best Val Loss: 1.2063

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:14, 62.31it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.7160, Best Val Loss: 1.0215

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 106/1000 [00:02<00:22, 39.50it/s]



Training completed.
Best Validation at Epoch: 7
Best Val Acc: 0.7060, Best Val Loss: 1.1807

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 107/1000 [00:02<00:21, 40.71it/s]



Training completed.
Best Validation at Epoch: 8
Best Val Acc: 0.7100, Best Val Loss: 1.1069

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 105/1000 [00:01<00:12, 70.64it/s]



Training completed.
Best Validation at Epoch: 6
Best Val Acc: 0.7140, Best Val Loss: 1.2615

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 109/1000 [00:01<00:15, 59.34it/s]



Training completed.
Best Validation at Epoch: 10
Best Val Acc: 0.7100, Best Val Loss: 0.9591

Number of Runs: 10
Average Accuracy: 0.6871, Std of Accuracy: 0.0107, Average Loss: 1.1138, Std of Loss: 0.1150


In [243]:
test_model(GAT7, "cora", cora_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  22%|██▏       | 217/1000 [00:02<00:10, 75.05it/s]



Training completed.
Best Validation at Epoch: 118
Best Val Acc: 0.7920, Best Val Loss: 0.7282

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  19%|█▉        | 188/1000 [00:03<00:13, 59.96it/s]



Training completed.
Best Validation at Epoch: 89
Best Val Acc: 0.7820, Best Val Loss: 0.8156

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▎        | 125/1000 [00:01<00:13, 63.14it/s]



Training completed.
Best Validation at Epoch: 26
Best Val Acc: 0.7720, Best Val Loss: 0.8262

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 106/1000 [00:01<00:11, 75.10it/s]



Training completed.
Best Validation at Epoch: 7
Best Val Acc: 0.7880, Best Val Loss: 1.0738

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 110/1000 [00:01<00:11, 75.56it/s]



Training completed.
Best Validation at Epoch: 11
Best Val Acc: 0.7860, Best Val Loss: 0.6931

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  30%|██▉       | 299/1000 [00:03<00:09, 76.83it/s]



Training completed.
Best Validation at Epoch: 200
Best Val Acc: 0.8080, Best Val Loss: 0.7263

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 108/1000 [00:01<00:12, 73.98it/s]



Training completed.
Best Validation at Epoch: 9
Best Val Acc: 0.7820, Best Val Loss: 0.8131

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  24%|██▍       | 244/1000 [00:04<00:13, 57.31it/s]



Training completed.
Best Validation at Epoch: 145
Best Val Acc: 0.8000, Best Val Loss: 0.6701

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  12%|█▏        | 121/1000 [00:02<00:15, 57.66it/s]



Training completed.
Best Validation at Epoch: 22
Best Val Acc: 0.7840, Best Val Loss: 1.0049

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  21%|██        | 206/1000 [00:02<00:10, 75.96it/s]



Training completed.
Best Validation at Epoch: 107
Best Val Acc: 0.7960, Best Val Loss: 0.8180

Number of Runs: 10
Average Accuracy: 0.8001, Std of Accuracy: 0.0133, Average Loss: 0.7661, Std of Loss: 0.1573


In [244]:
test_model(GAT7, "cite", cite_data.test_mask, num_runs=10)

Run: 1
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  16%|█▋        | 164/1000 [00:02<00:13, 62.26it/s]



Training completed.
Best Validation at Epoch: 65
Best Val Acc: 0.7040, Best Val Loss: 0.9592

Run: 2
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  22%|██▎       | 225/1000 [00:03<00:12, 59.93it/s]



Training completed.
Best Validation at Epoch: 126
Best Val Acc: 0.6880, Best Val Loss: 0.9748

Run: 3
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:12, 73.28it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.7020, Best Val Loss: 1.2107

Run: 4
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 105/1000 [00:01<00:12, 72.66it/s]



Training completed.
Best Validation at Epoch: 6
Best Val Acc: 0.7000, Best Val Loss: 1.0658

Run: 5
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  19%|█▉        | 193/1000 [00:02<00:11, 73.11it/s]



Training completed.
Best Validation at Epoch: 94
Best Val Acc: 0.7000, Best Val Loss: 1.0357

Run: 6
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  23%|██▎       | 233/1000 [00:03<00:10, 73.60it/s]



Training completed.
Best Validation at Epoch: 134
Best Val Acc: 0.7060, Best Val Loss: 0.9905

Run: 7
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  15%|█▌        | 151/1000 [00:02<00:15, 55.51it/s]



Training completed.
Best Validation at Epoch: 52
Best Val Acc: 0.6900, Best Val Loss: 1.1452

Run: 8
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  16%|█▌        | 161/1000 [00:02<00:13, 63.31it/s]



Training completed.
Best Validation at Epoch: 62
Best Val Acc: 0.6960, Best Val Loss: 1.0807

Run: 9
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  10%|█         | 104/1000 [00:01<00:12, 73.54it/s]



Training completed.
Best Validation at Epoch: 5
Best Val Acc: 0.7160, Best Val Loss: 1.2474

Run: 10
Using cuda, model: cuda:0, data: cuda:0


Training Epochs:  11%|█         | 106/1000 [00:01<00:12, 71.38it/s]


Training completed.
Best Validation at Epoch: 7
Best Val Acc: 0.7120, Best Val Loss: 1.0326

Number of Runs: 10
Average Accuracy: 0.6875, Std of Accuracy: 0.0079, Average Loss: 1.1818, Std of Loss: 0.1483





In [245]:
summarize(cora_models[0])
summarize(cite_models[0])

Model Summary: GAT7


Total number of params: 395119
Model Summary: GAT7


Total number of params: 974614
