In [1]:
import torch
import random
import matplotlib.pyplot as plt



In [2]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

citeseer_dataset = Planetoid(root = "Citeseer_dataset", name = "Citeseer", transform = NormalizeFeatures())

In [3]:
# Define label names and colors
label_dict = {
    0: "Agents",
    1: "AI",
    2: "DB",
    3: "IR",
    4: "ML",
    5: "HCI"
}

In [4]:
print(len(citeseer_dataset))
print(citeseer_dataset.num_classes)
print(citeseer_dataset.num_features)
citeseer_graph = citeseer_dataset[0]
citeseer_graph

1
6
3703


Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327])

In [5]:
citeseer_graph.x

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [6]:
print("Training samples: ", citeseer_graph.train_mask.sum().item())
print("Validation samples: ", citeseer_graph.val_mask.sum().item())
print("Test samples: ", citeseer_graph.test_mask.sum().item())

Training samples:  120
Validation samples:  500
Test samples:  1000


In [7]:
citeseer_graph.y

tensor([3, 1, 5,  ..., 3, 1, 5])

In [8]:
print(f'Number of nodes: {citeseer_graph.num_nodes}')
print(f'Number of edges: {citeseer_graph.num_edges}')
print(f'Average node degree: {citeseer_graph.num_edges / citeseer_graph.num_nodes:.2f}')
print(f'Has isolated nodes: {citeseer_graph.has_isolated_nodes()}')
print(f'Has self-loops: {citeseer_graph.has_self_loops()}')
print(f'Is undirected: {citeseer_graph.is_undirected()}')

Number of nodes: 3327
Number of edges: 9104
Average node degree: 2.74
Has isolated nodes: True
Has self-loops: False
Is undirected: True


In [9]:
import torch.nn as nn
import torch.nn.functional as F

class MLP(nn.Module):
    def __init__(self, input_channels, hidden_channels, output_channels):
        super().__init__()

        layers = []
        in_channels = input_channels

        for hidden_units in hidden_channels:
            layers.append(nn.Linear(in_features=in_channels, out_features=hidden_units))
            layers.append(nn.ReLU())
            in_channels = hidden_units

        layers.append(nn.Linear(in_features=in_channels, out_features=output_channels))
        self.layers = nn.Sequential(*layers)

    def forward(self, data):
         # only using node features (x)
        x = data.x 

        output = self.layers(x)

        return output
    

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

citeseer_graph = citeseer_dataset[0].to(device)

input_channels = citeseer_dataset.num_features

hidden_channels = [16]
#hidden_channels = [16, 32, 16]  # Three hidden layers with 16, 32, and 16 neurons respectively.

output_channels = citeseer_dataset.num_classes

In [11]:
model = MLP(
    input_channels = input_channels, 
    hidden_channels = hidden_channels, 
    output_channels = output_channels).to(device)

print(model)

MLP(
  (layers): Sequential(
    (0): Linear(in_features=3703, out_features=16, bias=True)
    (1): ReLU()
    (2): Linear(in_features=16, out_features=6, bias=True)
  )
)


In [12]:
print("Number of parameters: ", sum(p.numel() for p in model.parameters()))

optimizer = torch.optim.Adam(model.parameters(), lr = 0.01, weight_decay = 5e-4)

criterion = nn.CrossEntropyLoss()

Number of parameters:  59366


In [13]:
num_epochs = 200

for epoch in range(num_epochs):

    model.train()
    optimizer.zero_grad()
    out = model(citeseer_graph)

    loss = criterion(out[citeseer_graph.train_mask], citeseer_graph.y[citeseer_graph.train_mask])
    loss.backward()

    optimizer.step()

    # Get predictions on the training data
    pred_train = out.argmax(dim = 1)
    
    correct_train = (
        pred_train[citeseer_graph.train_mask] == citeseer_graph.y[citeseer_graph.train_mask]
    ).sum()
    
    acc_train = int(correct_train) / int(citeseer_graph.train_mask.sum())
    
    # Get predictions on validation data
    model.eval()

    pred_val = model(citeseer_graph).argmax(dim = 1)
    
    correct_val = (
        pred_val[citeseer_graph.val_mask] == citeseer_graph.y[citeseer_graph.val_mask]
    ).sum()
    
    acc_val = int(correct_val) / int(citeseer_graph.val_mask.sum())

    if (epoch + 1) % 10 == 0:
        print(f'Epoch: {epoch + 1:03d}, \
               Train Loss: {loss:.3f}, \
               Train Acc: {acc_train:.3f} Val Acc: {acc_val:.3f}')

Epoch: 010,                Train Loss: 1.714,                Train Acc: 0.825 Val Acc: 0.182
Epoch: 020,                Train Loss: 1.530,                Train Acc: 1.000 Val Acc: 0.380
Epoch: 030,                Train Loss: 1.265,                Train Acc: 1.000 Val Acc: 0.424
Epoch: 040,                Train Loss: 0.964,                Train Acc: 1.000 Val Acc: 0.468
Epoch: 050,                Train Loss: 0.708,                Train Acc: 1.000 Val Acc: 0.518
Epoch: 060,                Train Loss: 0.533,                Train Acc: 1.000 Val Acc: 0.526
Epoch: 070,                Train Loss: 0.424,                Train Acc: 1.000 Val Acc: 0.538
Epoch: 080,                Train Loss: 0.355,                Train Acc: 1.000 Val Acc: 0.548
Epoch: 090,                Train Loss: 0.308,                Train Acc: 1.000 Val Acc: 0.548
Epoch: 100,                Train Loss: 0.274,                Train Acc: 1.000 Val Acc: 0.560
Epoch: 110,                Train Loss: 0.249,                Train Acc

In [14]:
model.eval()

pred = model(citeseer_graph).argmax(dim = 1)

correct = (pred[citeseer_graph.test_mask] == citeseer_graph.y[citeseer_graph.test_mask]).sum()

test_acc = int(correct) / int(citeseer_graph.test_mask.sum())

test_acc

0.595