In [20]:
!pip install torch_geometric



In [21]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv, GINConv, GATConv, SAGEConv, TransformerConv
import torch_geometric.transforms as T
from torch.nn import Linear, Sequential, ReLU, BatchNorm1d
import torch_geometric.transforms as T

# load dataset
dataset = Planetoid(root='data/Planetoid', name='CiteSeer', transform=T.NormalizeFeatures())
data = dataset[0]



In [22]:
# Graph Isomorphism Network
class GIN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, dropout=0.5):
        super().__init__()
        nn1 = Sequential(Linear(in_channels, hidden_channels), ReLU(), Linear(hidden_channels, hidden_channels))
        nn2 = Sequential(Linear(hidden_channels, hidden_channels), ReLU(), Linear(hidden_channels, out_channels))
        
        self.conv1 = GINConv(nn1)
        self.bn1 = BatchNorm1d(hidden_channels)
        self.conv2 = GINConv(nn2)
        self.dropout = dropout

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return x

In [23]:
# Graph Attention Network
class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=8, dropout=0.6):
        super().__init__()
        self.conv1 = GATConv(in_channels, hidden_channels, heads=heads, dropout=dropout)
        # second layer uses 1 head for final classification
        self.conv2 = GATConv(hidden_channels * heads, out_channels, heads=1, concat=False, dropout=dropout)
        self.dropout = dropout

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return x

In [24]:
# Graph Sample and aggregate
class GraphSAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, dropout=0.5):
        super().__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, out_channels)
        self.dropout = dropout

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return x

In [25]:
#  GraphTransformer
class GraphTransformer(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=4, dropout=0.5):
        super().__init__()
        self.conv1 = TransformerConv(in_channels, hidden_channels, heads=heads, dropout=dropout)
        self.conv2 = TransformerConv(hidden_channels * heads, out_channels, heads=1, concat=False, dropout=dropout)
        self.dropout = dropout

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return x

In [26]:
def train(model, data):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()


@torch.no_grad()
def evaluate(model, data):
    model.eval()
    out = model(data)
    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        pred = out[mask].argmax(dim=1)
        acc = (pred == data.y[mask]).sum() / mask.sum()
        accs.append(acc.item())
    return accs

In [27]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GIN(dataset.num_node_features, 64, dataset.num_classes).to(device)
data = data.to(device)


optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

best_val_acc = 0
best_test_acc = 0
for epoch in range(1, 201):
    loss = train(model, data)
    train_acc, val_acc, test_acc = evaluate(model, data)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_test_acc = test_acc
    if epoch % 20 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | Train: {train_acc:.3f} | "
              f"Val: {val_acc:.3f} | Test: {test_acc:.3f}")

print(f"Best Validation Acc: {best_val_acc:.3f} | Test Acc: {best_test_acc:.3f}")

Epoch 001 | Loss: 1.8424 | Train: 0.200 | Val: 0.206 | Test: 0.229
Epoch 020 | Loss: 0.0208 | Train: 0.975 | Val: 0.498 | Test: 0.506
Epoch 040 | Loss: 0.0014 | Train: 1.000 | Val: 0.634 | Test: 0.621
Epoch 060 | Loss: 0.0035 | Train: 1.000 | Val: 0.640 | Test: 0.632
Epoch 080 | Loss: 0.0020 | Train: 1.000 | Val: 0.642 | Test: 0.641
Epoch 100 | Loss: 0.0030 | Train: 0.992 | Val: 0.586 | Test: 0.554
Epoch 120 | Loss: 0.0012 | Train: 1.000 | Val: 0.572 | Test: 0.580
Epoch 140 | Loss: 0.0017 | Train: 1.000 | Val: 0.522 | Test: 0.562
Epoch 160 | Loss: 0.0005 | Train: 1.000 | Val: 0.562 | Test: 0.569
Epoch 180 | Loss: 0.0013 | Train: 1.000 | Val: 0.614 | Test: 0.590
Epoch 200 | Loss: 0.0009 | Train: 1.000 | Val: 0.626 | Test: 0.618
Best Validation Acc: 0.666 | Test Acc: 0.648


In [28]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GAT(dataset.num_node_features, 8, dataset.num_classes).to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

best_val_acc = 0
best_test_acc = 0
for epoch in range(1, 201):
    loss = train(model, data)
    train_acc, val_acc, test_acc = evaluate(model, data)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_test_acc = test_acc
    if epoch % 20 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | Train: {train_acc:.3f} | "
              f"Val: {val_acc:.3f} | Test: {test_acc:.3f}")

print(f"Best Validation Acc: {best_val_acc:.3f} | Test Acc: {best_test_acc:.3f}")

Epoch 001 | Loss: 1.7916 | Train: 0.167 | Val: 0.232 | Test: 0.181
Epoch 020 | Loss: 1.5540 | Train: 0.908 | Val: 0.734 | Test: 0.721
Epoch 040 | Loss: 1.2004 | Train: 0.933 | Val: 0.726 | Test: 0.720
Epoch 060 | Loss: 0.9414 | Train: 0.950 | Val: 0.716 | Test: 0.699
Epoch 080 | Loss: 0.7527 | Train: 0.967 | Val: 0.692 | Test: 0.668
Epoch 100 | Loss: 0.8624 | Train: 0.975 | Val: 0.720 | Test: 0.698
Epoch 120 | Loss: 0.7109 | Train: 0.983 | Val: 0.716 | Test: 0.697
Epoch 140 | Loss: 0.6868 | Train: 0.975 | Val: 0.690 | Test: 0.669
Epoch 160 | Loss: 0.8070 | Train: 0.983 | Val: 0.714 | Test: 0.708
Epoch 180 | Loss: 0.7319 | Train: 0.967 | Val: 0.722 | Test: 0.709
Epoch 200 | Loss: 0.7089 | Train: 0.975 | Val: 0.704 | Test: 0.693
Best Validation Acc: 0.748 | Test Acc: 0.728


In [29]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GraphSAGE(dataset.num_node_features, 64, dataset.num_classes).to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

best_val_acc = 0
best_test_acc = 0
for epoch in range(1, 201):
    loss = train(model, data)
    train_acc, val_acc, test_acc = evaluate(model, data)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_test_acc = test_acc
    if epoch % 20 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | Train: {train_acc:.3f} | "
              f"Val: {val_acc:.3f} | Test: {test_acc:.3f}")

print(f"Best Validation Acc: {best_val_acc:.3f} | Test Acc: {best_test_acc:.3f}")

Epoch 001 | Loss: 1.7927 | Train: 0.233 | Val: 0.058 | Test: 0.077
Epoch 020 | Loss: 0.6659 | Train: 1.000 | Val: 0.692 | Test: 0.685
Epoch 040 | Loss: 0.1849 | Train: 1.000 | Val: 0.682 | Test: 0.685
Epoch 060 | Loss: 0.1383 | Train: 1.000 | Val: 0.682 | Test: 0.688
Epoch 080 | Loss: 0.1201 | Train: 1.000 | Val: 0.680 | Test: 0.695
Epoch 100 | Loss: 0.1013 | Train: 1.000 | Val: 0.702 | Test: 0.697
Epoch 120 | Loss: 0.0952 | Train: 1.000 | Val: 0.698 | Test: 0.703
Epoch 140 | Loss: 0.0893 | Train: 1.000 | Val: 0.696 | Test: 0.706
Epoch 160 | Loss: 0.0797 | Train: 1.000 | Val: 0.686 | Test: 0.700
Epoch 180 | Loss: 0.0753 | Train: 1.000 | Val: 0.696 | Test: 0.688
Epoch 200 | Loss: 0.0754 | Train: 1.000 | Val: 0.686 | Test: 0.698
Best Validation Acc: 0.710 | Test Acc: 0.707


In [30]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GraphTransformer(
        in_channels = dataset.num_node_features,
        hidden_channels = 64,
        out_channels = dataset.num_classes,
        heads = 4,
        dropout = 0.5
    ).to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

best_val_acc = 0
best_test_acc = 0
for epoch in range(1, 201):
    loss = train(model, data)
    train_acc, val_acc, test_acc = evaluate(model, data)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_test_acc = test_acc
    if epoch % 20 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | Train: {train_acc:.3f} | "
              f"Val: {val_acc:.3f} | Test: {test_acc:.3f}")

print(f"Best Validation Acc: {best_val_acc:.3f} | Test Acc: {best_test_acc:.3f}")

Epoch 001 | Loss: 1.7906 | Train: 0.333 | Val: 0.172 | Test: 0.191
Epoch 020 | Loss: 0.3443 | Train: 1.000 | Val: 0.698 | Test: 0.696
Epoch 040 | Loss: 0.1784 | Train: 1.000 | Val: 0.682 | Test: 0.691
Epoch 060 | Loss: 0.1031 | Train: 1.000 | Val: 0.688 | Test: 0.693
Epoch 080 | Loss: 0.1158 | Train: 1.000 | Val: 0.682 | Test: 0.675
Epoch 100 | Loss: 0.0993 | Train: 1.000 | Val: 0.692 | Test: 0.689
Epoch 120 | Loss: 0.0830 | Train: 1.000 | Val: 0.636 | Test: 0.651
Epoch 140 | Loss: 0.0979 | Train: 1.000 | Val: 0.700 | Test: 0.696
Epoch 160 | Loss: 0.0764 | Train: 1.000 | Val: 0.682 | Test: 0.701
Epoch 180 | Loss: 0.0666 | Train: 1.000 | Val: 0.676 | Test: 0.701
Epoch 200 | Loss: 0.0920 | Train: 1.000 | Val: 0.666 | Test: 0.680
Best Validation Acc: 0.724 | Test Acc: 0.694


In [31]:
# Investigate the grapth without edges
class DeepMLP(torch.nn.Module):
    def __init__(self, in_feats, hidden_feats, num_classes, dropout=0.6):
        super().__init__()
        self.fc1 = torch.nn.Linear(in_feats, hidden_feats)
        self.fc2 = torch.nn.Linear(hidden_feats, hidden_feats)
        self.fc3 = torch.nn.Linear(hidden_feats, hidden_feats)
        self.fc4 = torch.nn.Linear(hidden_feats, num_classes)
        self.dropout = dropout

    def forward(self, data):
        x = data.x
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = F.relu(self.fc3(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DeepMLP(dataset.num_features, hidden_feats=64, num_classes=dataset.num_classes, dropout=0.6).to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

best_val_acc = 0
for epoch in range(1, 301):
    loss = train(model, data)
    train_acc, val_acc, test_acc = evaluate(model, data)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_test_acc = test_acc
    if epoch % 10 == 0:
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | Val: {val_acc:.4f} | Test: {test_acc:.4f}")

print(f"\nBest Test Accuracy (Deep MLP baseline): {best_test_acc:.4f}")

Epoch 010 | Loss: 1.7915 | Val: 0.1720 | Test: 0.1820
Epoch 020 | Loss: 1.7947 | Val: 0.2120 | Test: 0.2310
Epoch 030 | Loss: 1.7572 | Val: 0.4500 | Test: 0.4020
Epoch 040 | Loss: 1.5099 | Val: 0.4020 | Test: 0.3900
Epoch 050 | Loss: 1.0766 | Val: 0.3500 | Test: 0.3650
Epoch 060 | Loss: 0.8002 | Val: 0.3840 | Test: 0.4230
Epoch 070 | Loss: 0.6698 | Val: 0.4000 | Test: 0.4120
Epoch 080 | Loss: 0.4889 | Val: 0.4040 | Test: 0.4150
Epoch 090 | Loss: 0.6099 | Val: 0.3940 | Test: 0.4180
Epoch 100 | Loss: 0.5398 | Val: 0.3500 | Test: 0.4030
Epoch 110 | Loss: 0.4407 | Val: 0.3400 | Test: 0.3910
Epoch 120 | Loss: 0.3067 | Val: 0.3540 | Test: 0.4040
Epoch 130 | Loss: 0.2570 | Val: 0.3920 | Test: 0.4080
Epoch 140 | Loss: 0.2465 | Val: 0.3660 | Test: 0.4350
Epoch 150 | Loss: 0.3952 | Val: 0.3780 | Test: 0.4330
Epoch 160 | Loss: 0.2394 | Val: 0.3760 | Test: 0.4530
Epoch 170 | Loss: 0.1446 | Val: 0.4040 | Test: 0.4560
Epoch 180 | Loss: 0.3413 | Val: 0.3580 | Test: 0.4140
Epoch 190 | Loss: 0.2564 | V