In [None]:
# https://pytorch-geometric.readthedocs.io/en/2.6.1/get_started/introduction.html
# Author: Ziang Li. 08/28/2025

# How to Construct a graph

<div style="background-color:white; display:inline-block;">
  <img src="https://pytorch-geometric.readthedocs.io/en/2.6.1/_images/graph.svg" width="400">
</div>
We show a simple example of an unweighted and undirected graph with three nodes and four edges. Each node contains exactly one feature:


In [1]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)
data

Data(x=[3, 1], edge_index=[2, 4])

In [2]:
print(data.num_nodes)

print(data.has_isolated_nodes())

print(data.has_self_loops())

print(data.is_directed())

# # Transfer data object to GPU.
# device = torch.device('cuda')
# data = data.to(device)

3
False
False
False


Data only stores tensors (nodes/edges), ***not a full graph***.

In [5]:
import torch
from torch_geometric.data import Data
# there is no node 10 in x
edge_index = torch.tensor([[0, 1, 1, 10],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)
data.validate(raise_on_error=True)

ValueError: 'edge_index' contains larger indices than the number of nodes (3) in 'Data' (found 10)

We can also construct from networkx

In [8]:
import torch
import networkx as nx
import numpy as np
from torch_geometric.utils import from_networkx

A = np.array([[0,1,1],
              [1,0,1],
              [1,1,0]])
G = nx.from_numpy_array(A)

data = from_networkx(G)

print(data)
# Data(edge_index=[2, num_edges], num_nodes=3)


Data(edge_index=[2, 6], weight=[6], num_nodes=3)


# Train your first GNN

In [1]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')
dataset.data.num_nodes, dataset.data.num_edges, dataset.num_classes



(2708, 10556, 7)

For a node classification task

In [19]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [20]:
def train_model(model, data):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data = data.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

    # ===== Training loop =====
    model.train()
    for epoch in range(200):
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

    # ===== Evaluation =====
    model.eval()
    pred = model(data).argmax(dim=1)
    correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
    acc = int(correct) / int(data.test_mask.sum())
    print(f'Accuracy: {acc:.4f}')


In [24]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_model(GCN().to(device), dataset)

Accuracy: 0.8090


In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import add_self_loops, degree

# ===== Define a simple message passing layer (like GCNConv) =====
class SimpleMessagePassing(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.linear = nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index):
        num_nodes = x.size(0)

        # Add self-loops to each node
        edge_index, _ = add_self_loops(edge_index, num_nodes=num_nodes)

        row, col = edge_index  # row: target nodes, col: source nodes

        # Compute node degrees (number of incoming edges per node)
        deg = degree(row, num_nodes=num_nodes, dtype=x.dtype)  # shape: [num_nodes]
        deg_inv = deg.pow(-1)  # D^(-1)
        deg_inv[deg_inv == float('inf')] = 0  # handle divide-by-zero

        # Normalize the messages with D^(-1)
        norm = deg_inv[row]  # shape: [num_edges]

        # Compute messages sent along edges
        messages = norm.unsqueeze(1) * x[col]  # shape: [num_edges, in_channels]

        # Initialize aggregated output
        out = torch.zeros_like(x)  # shape: [num_nodes, in_channels]

        # Aggregate messages from neighbors
        out = out.index_add(0, row, messages)

        # Apply linear transformation
        return self.linear(out)

# ===== Define a simple 2-layer GCN =====
class SimpleGCN(nn.Module):
    def __init__(self, dataset, hidden=16):
        super().__init__()
        self.conv1 = SimpleMessagePassing(dataset.num_node_features, hidden)
        self.conv2 = SimpleMessagePassing(hidden, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)  # for classification



In [17]:
# ===== Training setup =====
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleGCN(dataset).to(device)
train_model(model, data)

Accuracy: 0.8150
