In [1]:
import torch         # Base PyTorch library for tensor operations.
import torch.nn as nn
import torch.nn.functional as F       # Contains activation functions (e.g., `elu`, `log_softmax`) and loss functions (e.g., `nll_loss`).
from torch_geometric.datasets import Planetoid           # Downloads standard citation network datasets (Cora, Citeseer, Pubmed)
from torch_geometric.transforms import NormalizeFeatures # Normalizes node features to sum to 1 (helps training)
from torch_geometric.nn import SAGEConv                  # Pre-built graphSAGE layer from PyTorch Geometric.
from torch_geometric.loader import NeighborLoader
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

In [2]:
# 1. Download and preprocess data

# Device Configuration - Purpose: Use GPU if available for faster training.
device  = 'cuda' if torch.cuda.is_available() else 'cpu'
    
# Load Cora dataset with normalized features
dataset = Planetoid(root='C:/Users/Majid/Downloads/CNN', name='Cora', transform=NormalizeFeatures())
data = dataset[0].to(device)  # Move graph data to GPU/CPU

# Print dataset info
print(f"Dataset: {dataset}")
print(f"Number of nodes: {data.num_nodes}")
print(f"Number of edges: {data.num_edges}")
print(f"Number of features: {dataset.num_features}")
print(f"Number of classes: {dataset.num_classes}")

Dataset: Cora()
Number of nodes: 2708
Number of edges: 10556
Number of features: 1433
Number of classes: 7


In [3]:
# =============================================
# 2. Define GraphSAGE Model
# =============================================

class GraphSAGE(nn.Module):
    def __init__(self, in_features, hidden_features, out_features):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(in_features, hidden_features)
        self.conv2 = SAGEConv(hidden_features, out_features)
        
    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

# Initialize model
model = GraphSAGE(
    in_features=dataset.num_features,
    hidden_features=128,
    out_features=dataset.num_classes
)

In [4]:
# =============================================
# 3. Training Setup
# =============================================

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
data = data.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.NLLLoss()

In [5]:
# =============================================
# 4. Training Loop
# =============================================

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

In [6]:
# =============================================
# 5. Evaluation
# =============================================
def test():
    model.eval()
    out = model(data.x, data.edge_index)

    pred = out.argmax(dim=1)
        
    # Calculate accuracy for each split
    train_acc = accuracy_score(
        data.y[data.train_mask].cpu(),
        pred[data.train_mask].cpu()
    )
    val_acc = accuracy_score(
        data.y[data.val_mask].cpu(),
        pred[data.val_mask].cpu()
    )
    test_acc = accuracy_score(
        data.y[data.test_mask].cpu(),
        pred[data.test_mask].cpu()
    )
    
    return train_acc, val_acc, test_acc


In [None]:
# Training loop
# Runs for 100 complete passes through the dataset
losses = []
for epoch in range(100):
    
    loss = train()
    losses.append(loss)

    if epoch % 10 == 0:
        train_acc, val_acc, test_acc = test()
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, '
              f'Train: {train_acc:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}')

Epoch: 000, Loss: 1.9467, Train: 0.4714, Val: 0.3960, Test: 0.4050
Epoch: 010, Loss: 1.2394, Train: 1.0000, Val: 0.7640, Test: 0.7580
Epoch: 020, Loss: 0.2966, Train: 1.0000, Val: 0.7900, Test: 0.7990
Epoch: 030, Loss: 0.0969, Train: 1.0000, Val: 0.7700, Test: 0.7980
Epoch: 040, Loss: 0.0965, Train: 1.0000, Val: 0.7660, Test: 0.7910
Epoch: 050, Loss: 0.0765, Train: 1.0000, Val: 0.7640, Test: 0.7920
Epoch: 060, Loss: 0.0758, Train: 1.0000, Val: 0.7700, Test: 0.7870
Epoch: 070, Loss: 0.0722, Train: 1.0000, Val: 0.7720, Test: 0.7980


In [None]:
# Plot training loss
plt.plot(losses)
plt.title("Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()

In [None]:
# Final evaluation
train_acc, val_acc, test_acc = test()
print(f'Final results: Train: {train_acc:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}')