In [None]:
"""
Here's a simple implementation of a Graph Convolutional Network (GCN) using PyTorch Geometric (PyG) for node classification.
"""

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures 

In [2]:
# 1. Download and preprocess data

# Device Configuration - Purpose: Use GPU if available for faster training.
"""
   torch.cuda.is_available(): Checks if CUDA-enabled GPU is available
   device variable: Will be either 'cuda' (GPU) or 'cpu' (CPU)
"""

device  = 'cuda' if torch.cuda.is_available() else 'cpu'
    
# Load Cora dataset with normalized features
"""
     root: Directory where dataset will be stored/downloaded
     name='Cora': Specifies the Cora dataset (citation network)
     transform=NormalizeFeatures(): Applies feature normalization

    dataset[0]: Accesses the first (and only) graph in the dataset
    .to(device): Moves all tensors (features, edges, etc.) to GPU/CPU
"""
dataset = Planetoid(root='C:/Users/Majid/Downloads/CNN', name='Cora', transform=NormalizeFeatures())
data = dataset[0].to(device)  # Move graph data to GPU/CPU
    
  
print(f"Number of nodes: {data.num_nodes}")
print(f"Number of edges: {data.num_edges}")
print(f"Number of features: {dataset.num_features}")
print(f"Number of classes: {dataset.num_classes}")


Number of nodes: 2708
Number of edges: 10556
Number of features: 1433
Number of classes: 7


In [3]:
# 2. Define a 2-layer GCN model

# Inherits from PyTorch's Module base class (required for all neural networks in PyTorch).
class GCN(torch.nn.Module):

    """
       Constructor (__init__)
       Defines the model's architecture. Takes 3 arguments:
         * num_features: Input feature dimension (e.g., 1433 for Cora dataset).
         * hidden_dim: Size of the hidden layer (e.g., 16).
         * num_classes: Number of output classes (e.g., 7 for Cora).  

          super().__init__()   Initializes the parent class (torch.nn.Module).
          conv1: Defines the first GCN layer:
                   - Input: num_features (e.g., 1433).
                   - Output: hidden_dim (e.g., 16).
                 Performs neighborhood aggregation + linear transformation.

          conv2: Defines the second GCN layer:
                    - Input: hidden_dim (e.g., 16).
                    - Output: num_classes (e.g., 7).
                  Maps hidden features to class logits.
    """
    def __init__(self, num_features, hidden_dim, num_classes):
        super().__init__()
        self.conv1 = GCNConv(num_features, hidden_dim)  # Layer 1
        self.conv2 = GCNConv(hidden_dim, num_classes)   # Layer 2


    """
        Defines how data flows through the network. Takes:
           - x: Node feature matrix (shape: [num_nodes, num_features]).
           - edge_index: Graph connectivity (shape: [2, num_edges]).
    """
    def forward(self, x, edge_index):
        """
            Applies the first GCN layer:
              - Aggregates features from 1-hop neighbors.
              - Transforms features to hidden_dim space.
              
        """
        x = self.conv1(x, edge_index)             # First GCN layer (with ReLU activation)    

        
        """
            Applies ReLU activation (introduces nonlinearity):
              - ReLU(x) = max(0, x).
              - Helps the model learn complex patterns.
        """
        x = F.relu(x)    

        
        """
            Applies dropout (regularization):
              - Randomly zeros some activations during training (self.training=True).
              - Prevents overfitting (default dropout rate: 0.5 in PyG).
        """
        x = F.dropout(x, training=self.training)

        
        """
             Second GCN layer (output logits):
              - Further aggregates features.
              - Maps to num_classes-dimensional output (logits).
        """
        x = self.conv2(x, edge_index)

        """
           Applies log-softmax to logits:
             - Normalizes outputs to log-probabilities.
             - dim=1 ensures normalization across classes for each node.
        """
        return F.log_softmax(x, dim=1)

In [4]:
# 3. Initialize model
model = GCN(
    num_features=dataset.num_features,  # Input feature dimension
    hidden_dim=16,                      # Hidden layer dimension
    num_classes=dataset.num_classes     # Number of classes
)

In [5]:
# 4. Train the model

"""
   optimizer: Adam optimizer with:
   Learning rate (lr=0.1) 
   Weight decay (5e-4) - L2 regularization to prevent overfitting
   criterion: CrossEntropyLoss - Standard for multi-class classification
"""
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()

def train():
    # # Sets model to training mode (important for dropout/batch norm if used)
    model.train()

    # # Resets gradients from previous iteration
    optimizer.zero_grad()

    # Calls the forward method of SGCModel
    # Input: Node features (data.x) and graph structure (data.edge_index)
    # Output: Class predictions for all nodes
    out = model(data.x, data.edge_index)

    # calculates the loss during training using Negative Log Likelihood (NLL) loss.
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])

    # Computes gradients via backpropagation
    loss.backward()

    # Updates model parameters using gradients
    optimizer.step()
    return loss.item()

In [6]:
# 5. Test the model
def test():
    # Puts the model in evaluation mode
    model.eval()

    # Runs the forward pass of the model
    out = model(data.x, data.edge_index)

    # Converts logits to predicted class labels
    pred = out.argmax(dim=1)
    
    # Sums correct predictions (train_correct.sum())
    correct = pred[data.test_mask] == data.y[data.test_mask]
    acc = int(correct.sum()) / int(data.test_mask.sum())
    return acc

In [7]:
# Training loop
for epoch in range(200):
    loss = train()
    if epoch % 10 == 0:
        acc = test()
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Test Acc: {acc:.4f}')

Epoch: 000, Loss: 1.9461, Test Acc: 0.1660
Epoch: 010, Loss: 1.8756, Test Acc: 0.7310
Epoch: 020, Loss: 1.7443, Test Acc: 0.7360
Epoch: 030, Loss: 1.5688, Test Acc: 0.7530
Epoch: 040, Loss: 1.3929, Test Acc: 0.8010
Epoch: 050, Loss: 1.1163, Test Acc: 0.7990
Epoch: 060, Loss: 1.0112, Test Acc: 0.8070
Epoch: 070, Loss: 0.7930, Test Acc: 0.7980
Epoch: 080, Loss: 0.6705, Test Acc: 0.8110
Epoch: 090, Loss: 0.6110, Test Acc: 0.8160
Epoch: 100, Loss: 0.5652, Test Acc: 0.8190
Epoch: 110, Loss: 0.5136, Test Acc: 0.8180
Epoch: 120, Loss: 0.4931, Test Acc: 0.8070
Epoch: 130, Loss: 0.4213, Test Acc: 0.8120
Epoch: 140, Loss: 0.4247, Test Acc: 0.8090
Epoch: 150, Loss: 0.3609, Test Acc: 0.8020
Epoch: 160, Loss: 0.3217, Test Acc: 0.8160
Epoch: 170, Loss: 0.3889, Test Acc: 0.8060
Epoch: 180, Loss: 0.3230, Test Acc: 0.8090
Epoch: 190, Loss: 0.3307, Test Acc: 0.8090
