In [1]:
import pandas as pd
import numpy as np

import torch 
import torch.nn.functional as F
import torch.nn as nn
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.data import Data

from gcn_net import GraphConvNeuralNet
from gat import GraphAttentionLayer
from gatv1_net import GatNetv1
from gatv2_net import GatNetv2

from utilities import fit, predict, validate, tsne_representation, line_chart, bar_chart, accuracy, ttest


In [2]:
dataset = Planetoid(root = ".", name = "Cora", transform = NormalizeFeatures())
data = dataset[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [3]:
print("\nDataset details")
print("---")
print("Number of graphs: ", len(data))
print("Number of features: ", data.num_features)
print("Number of classes: ", len(data.y.unique()))
print("Number of nodes: ", data.num_nodes)
print("Number of edges: ", data.num_edges)


print("\nGraph details")
print("---")
print("Training data nodes:", sum(data.train_mask).item())
print("Validation data nodes: ", sum(data.val_mask).item())
print("Test data nodes: ", sum(data.test_mask).item())
print("Is directed: ", data.is_directed())
print("Has isolated nodes: ", data.has_isolated_nodes())
print("Has self loops: ", data.has_self_loops())



Dataset details
---
Number of graphs:  6
Number of features:  1433
Number of classes:  7
Number of nodes:  2708
Number of edges:  10556

Graph details
---
Training data nodes: 140
Validation data nodes:  500
Test data nodes:  1000
Is directed:  False
Has isolated nodes:  False
Has self loops:  False


In [4]:
tsne_representation(x = data.x, y = data.y, title = "TSNE Graph Representation")

In [5]:
parameter = {
    "in_dim":data.x.size()[1],
    "out_dim": len(torch.unique(data.y)),
    "epochs": 100,
    "n_hidden_channels": [64],
    "n_attention_heads": [8, 16],
    "learning_rate": [0.01, 0.033],
    "weight_decay": [5e-4]
}


In [6]:
n_iter = 0
size =  len(parameter["learning_rate"]) * len(parameter["weight_decay"]) 
gcn_models = np.zeros([size, 8])
for _, wd in enumerate(parameter["weight_decay"]):
    for _, lr in enumerate(parameter["learning_rate"]):
        GCN = GraphConvNeuralNet(data, 64)
        gcn_model_loss, gcn_train_accuracy, gcn_val_accuracy, gcn_test_accuracy = fit(
            model = GCN, 
            data = data, 
            n_epochs = parameter["epochs"],
            lr = lr, 
            weight_decay = wd,
            pytorch_geometric_implementation = True
        ) 

        gcn_models[n_iter] = [n_iter, parameter["n_hidden_channels"][0], lr, wd, np.round(gcn_train_accuracy[-1],3), np.round(gcn_val_accuracy[-1],3), np.round(gcn_test_accuracy[-1], 3), np.round(gcn_model_loss[-1],3)]
        n_iter += 1
            
        print(f"Model training is completed.\n")


100%|██████████| 100/100 [00:02<00:00, 34.02it/s]


Model training is completed.



100%|██████████| 100/100 [00:02<00:00, 37.06it/s]

Model training is completed.






In [7]:
print(f"Highest validation accuracy is reached by model number {np.argmax(gcn_models[:,4])}")
print(f"Highest test accuracy is reached by model number {np.argmax(gcn_models[:,5])}")
print(f"Lowest cross entropy loss is reached by model number {np.argmin(gcn_models[:,6])}")

Highest validation accuracy is reached by model number 0
Highest test accuracy is reached by model number 0
Lowest cross entropy loss is reached by model number 1


In [11]:

pd.DataFrame(gcn_models)

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.0,64.0,0.01,0.0005,1.0,0.8,0.806,0.218
1,1.0,64.0,0.033,0.0005,1.0,0.798,0.801,0.121


In [8]:
GCN = GraphConvNeuralNet(data, 64)
gcn_model_loss, gcn_train_accuracy, gcn_val_accuracy, gcn_test_accuracy = fit(
    model = GCN, 
    data = data, 
    n_epochs = parameter["epochs"],
    lr = parameter["learning_rate"][0], 
    weight_decay = parameter["weight_decay"][0],
    pytorch_geometric_implementation = True
    ) 

  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [00:02<00:00, 45.10it/s]


In [9]:
plt_gcn_accuracy = line_chart(x = np.arange(0, len(gcn_train_accuracy)), y = gcn_val_accuracy, y_label = "Accuracy")
#plt_gcn_accuracy.add_scatter(x = np.arange(0, len(gcn_train_accuracy)), y = gcn_train_accuracy, name="Training accuracy")
plt_gcn_accuracy.add_scatter(x = np.arange(0, len(gcn_val_accuracy)), y = gcn_val_accuracy, name="Validation accuracy")
plt_gcn_accuracy.add_scatter(x = np.arange(0, len(gcn_test_accuracy)), y = gcn_test_accuracy, name="Test accuracy")

In [10]:
gcn_output = GCN(data.x, data.edge_index)

tsne_representation(x = gcn_output, y = data.y, title = "Graph Convolutional Network")

In [11]:
model = GatNetv1(parameter["in_dim"], parameter["out_dim"]) 

n_iter = 0
size =  len(parameter["learning_rate"]) * len(parameter["n_attention_heads"]) * len(parameter["weight_decay"])
att_v1_models = np.zeros([size, 9])
for _, att_heads in enumerate(parameter["n_attention_heads"]):
    for _, lr in enumerate(parameter["learning_rate"]):
        for _, wd in enumerate(parameter["weight_decay"]):
            gatv1_model_loss, gatv1_train_accuracy, gatv1_val_accuracy, gatv1_test_accuracy = fit(
                model = model, 
                data = data, 
                n_epochs = parameter["epochs"], 
                lr = lr, 
                weight_decay = wd,
                pytorch_geometric_implementation = False
    ) 
    
            att_v1_models[n_iter] = [n_iter, parameter["n_hidden_channels"][0], att_heads, lr, wd, np.round(gatv1_train_accuracy[-1],3), np.round(gatv1_val_accuracy[-1],3), np.round(gatv1_test_accuracy[-1], 3), np.round(gatv1_model_loss[-1],3)]
            n_iter += 1
            
            print(f"Model training is completed.\n")


100%|██████████| 100/100 [06:50<00:00,  4.10s/it]


Model training is completed.



100%|██████████| 100/100 [06:32<00:00,  3.93s/it]


Model training is completed.



100%|██████████| 100/100 [06:22<00:00,  3.82s/it]


Model training is completed.



100%|██████████| 100/100 [06:33<00:00,  3.93s/it]

Model training is completed.






In [12]:
print(f"Highest validation accuracy is reached by model number {np.argmax(att_v1_models[:,4])}")
print(f"Highest test accuracy is reached by model number {np.argmax(att_v1_models[:,5])}")
print(f"Lowest cross entropy loss is reached by model number {np.argmin(att_v1_models[:,6])}")

Highest validation accuracy is reached by model number 0
Highest test accuracy is reached by model number 0
Lowest cross entropy loss is reached by model number 3


In [11]:
model = GatNetv1(parameter["in_dim"], parameter["out_dim"]) 
gatv1_model_loss, gatv1_train_accuracy, gatv1_val_accuracy, gatv1_test_accuracy = fit(
    model = model, 
    data = data, 
    n_epochs = parameter["epochs"], 
    lr = parameter["learning_rate"][0], 
    weight_decay = parameter["weight_decay"][0],
    pytorch_geometric_implementation = False
) 

100%|██████████| 100/100 [06:47<00:00,  4.08s/it]


In [12]:
plt_gatv1_accuracy = line_chart(x = np.arange(0, len(gatv1_train_accuracy)), y = gatv1_val_accuracy, y_label = "Accuracy")
#plt_gatv1_accuracy.add_scatter(x = np.arange(0, len(gatv1_train_accuracy)), y = gatv1_train_accuracy, name = "Training accuracy")
plt_gatv1_accuracy.add_scatter(x = np.arange(0, len(gatv1_val_accuracy)), y = gatv1_val_accuracy, name = "Validation accuracy")
plt_gatv1_accuracy.add_scatter(x = np.arange(0, len(gatv1_test_accuracy)), y = gatv1_test_accuracy, name = "Test accuracy")

In [13]:
gat_output = model(data)

tsne_representation(x = gat_output, y = data.y, title = "Graph Attention Network v1")

In [16]:
model = GatNetv2(parameter["in_dim"], parameter["out_dim"]) 

n_iter = 0
size =  len(parameter["learning_rate"]) * len(parameter["n_attention_heads"]) * len(parameter["weight_decay"])
att_v2_models = np.zeros([size, 9])
for _, att_heads in enumerate(parameter["n_attention_heads"]):
    for _, lr in enumerate(parameter["learning_rate"]):
        for _, wd in enumerate(parameter["weight_decay"]):
            gatv2_model_loss, gatv2_train_accuracy, gatv2_val_accuracy, gatv2_test_accuracy = fit(
                model = model, 
                data = data, 
                n_epochs = parameter["epochs"], 
                lr = lr, 
                weight_decay = wd,
                pytorch_geometric_implementation = False
    ) 
    
            att_v2_models[n_iter] = [n_iter, parameter["n_hidden_channels"][0], att_heads, lr, wd, np.round(gatv2_train_accuracy[-1],3), np.round(gatv2_val_accuracy[-1],3), np.round(gatv2_test_accuracy[-1], 3), np.round(gatv2_model_loss[-1],3)]
            n_iter += 1
            
            print(f"Model training is completed.\n")


Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.

100%|██████████| 100/100 [07:45<00:00,  4.66s/it]


Model training is completed.



 62%|██████▏   | 62/100 [04:57<03:02,  4.79s/it]


KeyboardInterrupt: 

In [None]:
print(f"Highest validation accuracy is reached by model number {np.argmax(att_v2_models[:,4])}")
print(f"Highest test accuracy is reached by model number {np.argmax(att_v2_models[:,5])}")
print(f"Lowest cross entropy loss is reached by model number {np.argmin(att_v2_models[:,6])}")

In [14]:
model = GatNetv2(parameter["in_dim"], parameter["out_dim"]) 

gatv2_model_loss, gatv2_train_accuracy, gatv2_val_accuracy, gatv2_test_accuracy = fit(
    model = model, 
    data = data, 
    n_epochs = parameter["epochs"], 
    lr = parameter["learning_rate"][0], 
    weight_decay = parameter["weight_decay"][0],
    pytorch_geometric_implementation = False
) 
    
print(f"Model training is completed.\n")

plt_gatv2_accuracy = line_chart(x = np.arange(0, len(gatv2_train_accuracy)), y = gatv2_val_accuracy, y_label = "Accuracy")
#plt_gatv2_accuracy.add_scatter(x = np.arange(0, len(gatv2_train_accuracy)), y = gatv2_train_accuracy, name = "Training accuracy")
plt_gatv2_accuracy.add_scatter(x = np.arange(0, len(gatv2_val_accuracy)), y = gatv2_val_accuracy, name = "Validation accuracy")
plt_gatv2_accuracy.add_scatter(x = np.arange(0, len(gatv2_test_accuracy)), y = gatv2_test_accuracy, name = "Test accuracy")

100%|██████████| 100/100 [09:21<00:00,  5.61s/it]

Model training is completed.






In [15]:
gat_output = model(data)

tsne_representation(x = gat_output, y = data.y, title = "Graph Attention Network v2")


In [16]:
plt_models_loss = line_chart(x = np.arange(10,110,10), y = gcn_model_loss, y_label = "Cross Entropy Loss")
plt_models_loss.add_scatter(x = np.arange(10,110,10), y = gcn_model_loss, name = "GCN")
plt_models_loss.add_scatter(x = np.arange(10,110,10), y = gatv1_model_loss, name = "GATv1")
plt_models_loss.add_scatter(x = np.arange(10,110,10), y = gatv2_model_loss, name = "GATv2")