In [1]:
import torch
import random
import matplotlib.pyplot as plt



In [2]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

citeseer_dataset = Planetoid(root = "Citeseer_dataset", name = "Citeseer", transform = NormalizeFeatures())

In [3]:
print(len(citeseer_dataset))
print(citeseer_dataset.num_classes)
print(citeseer_dataset.num_features)
citeseer_graph = citeseer_dataset[0]
citeseer_graph

1
6
3703


Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327])

In [4]:
citeseer_graph.x

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [5]:
print("Training samples: ", citeseer_graph.train_mask.sum().item())
print("Validation samples: ", citeseer_graph.val_mask.sum().item())
print("Test samples: ", citeseer_graph.test_mask.sum().item())

Training samples:  120
Validation samples:  500
Test samples:  1000


In [6]:
citeseer_graph.y

tensor([3, 1, 5,  ..., 3, 1, 5])

In [7]:
print(f'Number of nodes: {citeseer_graph.num_nodes}')
print(f'Number of edges: {citeseer_graph.num_edges}')
print(f'Average node degree: {citeseer_graph.num_edges / citeseer_graph.num_nodes:.2f}')
print(f'Has isolated nodes: {citeseer_graph.has_isolated_nodes()}')
print(f'Has self-loops: {citeseer_graph.has_self_loops()}')
print(f'Is undirected: {citeseer_graph.is_undirected()}')

Number of nodes: 3327
Number of edges: 9104
Average node degree: 2.74
Has isolated nodes: True
Has self-loops: False
Is undirected: True


What we talked about so far is the GatConv layer, but in 2021 Brody et al. introduced an improvement by modifying the order of operations. The weight matrix 𝐖 is applied after the concatenation, and the attention weight matrix 𝐖ₐₜₜ is used after the LeakyReLU function.
Which one should you use? According to Brody et al., Gatv2Conv consistently outperforms GatConv and thus should be preferred.

In [16]:
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GATv2Conv

class GAT2(torch.nn.Module):
    def __init__(self, input_channels, output_channels, hidden_channels = 8, num_heads = 8):
        super().__init__()
        torch.manual_seed(123456)
        
        self.gatconv1 = GATv2Conv(
            in_channels = input_channels, 
            out_channels = hidden_channels, 
            heads = num_heads
        )
        
        self.gatconv2 = GATv2Conv(
            in_channels = hidden_channels * num_heads, 
            out_channels = output_channels, 
            heads = 1
        )
    
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        
        x = F.dropout(x, p = 0.6, training = self.training)
        
        x = self.gatconv1(x, edge_index)
        x = F.elu(x)
        
        x = F.dropout(x, p = 0.6, training = self.training)
        x = self.gatconv2(x, edge_index)
        
        return x
    
    def fit(self, X, y):
        self.gat_model.to(self.device)
        self.gat_model.train()
        optimizer = optim.Adam(self.gat_model.parameters(), lr = 0.005, weight_decay = 5e-4)
        criterion = nn.CrossEntropyLoss()

        # Convert input data to torch tensors
        X = torch.tensor(X, dtype=torch.float32).to(self.device)
        y = torch.tensor(y, dtype=torch.long).to(self.device)

        for epoch in range(self.epochs):
            optimizer.zero_grad()
            output = self.gat_model(X)
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()
        return self

    def predict(self, X):
        self.gat_model.eval()
        X = torch.tensor(X, dtype=torch.float32).to(self.device)
        with torch.no_grad():
            output = self.gat_model(X)
        _, predicted = torch.max(output, 1)
        return predicted.cpu().numpy()

In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

citeseer_graph = citeseer_dataset[0].to(device)

In [18]:
models = []

models.append(('GAT2',GAT2(input_channels = citeseer_dataset.num_features, output_channels =  citeseer_dataset.num_classes).to(device)))

In [19]:
from sklearn import model_selection
# select best models using cross validation

X = citeseer_graph.x.cpu().numpy()
y = citeseer_graph.y.cpu().numpy() 

results = []
names = []

for name, model in models:
    kfold = model_selection.KFold(n_splits=10)
    cv_results = model_selection.cross_val_score(model, X, y, cv=kfold, scoring='accuracy')
    results.append(cv_results)
    names.append(name)
    msg = "{}: {} ({})".format(name, cv_results.mean(), cv_results.std())
    print(msg)

TypeError: Cannot clone object 'GAT2(
  (gatconv1): GATv2Conv(3703, 8, heads=8)
  (gatconv2): GATv2Conv(64, 6, heads=1)
)' (type <class '__main__.GAT2'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' method.