Import everything you need — including the model class (here it's GCNConv).

In [None]:
import torch
from torch_geometric.loader import DataLoader
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.nn import GCNConv  # Replace with GATConv, SAGEConv, etc.
from torch_geometric.data import Data
from sklearn.metrics import accuracy_score, mean_squared_error

# 🔁 Load datasets
task = "classification"  # or "regression"

train_data = torch.load(f"../4_train_test_split/random_split/{task}/{task}_train.pt")
val_data = torch.load(f"../4_train_test_split/random_split/{task}/{task}_val.pt")
test_data = torch.load(f"../4_train_test_split/random_split/{task}/{task}_test.pt")

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32)
test_loader = DataLoader(test_data, batch_size=32)

# 🧱 Define Model (GCN or MPNN or GraphSAGE, etc.)
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x

# 🧮 Loss and evaluation setup
if task == "classification":
    num_classes = len(set([int(data.y.item()) for data in train_data]))
    model = GCN(in_channels=train_data[0].x.size(1), hidden_channels=64, out_channels=num_classes)
    criterion = torch.nn.CrossEntropyLoss()
else:
    model = GCN(in_channels=train_data[0].x.size(1), hidden_channels=64, out_channels=1)
    criterion = torch.nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 🏋️ Training loop
def train():
    model.train()
    for batch in train_loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = criterion(out.squeeze(), batch.y if task == "regression" else batch.y.long())
        loss.backward()
        optimizer.step()

# 📈 Evaluation
def evaluate(loader):
    model.eval()
    preds, labels = [], []
    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            out = model(batch)
            preds.append(out.squeeze().cpu())
            labels.append(batch.y.cpu())
    preds = torch.cat(preds)
    labels = torch.cat(labels)

    if task == "classification":
        pred_classes = preds.argmax(dim=1)
        return accuracy_score(labels, pred_classes)
    else:
        return mean_squared_error(labels, preds)

# 🚀 Run training
for epoch in range(1, 101):
    train()
    metric = evaluate(val_loader)
    print(f"Epoch {epoch:03d} - {'Accuracy' if task == 'classification' else 'MSE'}: {metric:.4f}")

# ✅ Final test evaluation
test_metric = evaluate(test_loader)
print(f"\n🧪 Test {'Accuracy' if task == 'classification' else 'MSE'}: {test_metric:.4f}")


load the graph datasets  (the random split .pt files ) and specify what task you are doing

In [None]:
task = "classification"  # or "regression"

train_data = torch.load(f"../4_train_test_split/random_split/{task}/{task}_train.pt")
val_data = torch.load(f"../4_train_test_split/random_split/{task}/{task}_val.pt")
test_data = torch.load(f"../4_train_test_split/random_split/{task}/{task}_test.pt")

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32)
test_loader = DataLoader(test_data, batch_size=32)

Define the model you want to use (GCN or MPNN or GraphSAGE, etc.)

In [None]:
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x

setup loss and evaluation 

In [None]:
if task == "classification":
    num_classes = len(set([int(data.y.item()) for data in train_data]))
    model = GCN(in_channels=train_data[0].x.size(1), hidden_channels=64, out_channels=num_classes)
    criterion = torch.nn.CrossEntropyLoss()
else:
    model = GCN(in_channels=train_data[0].x.size(1), hidden_channels=64, out_channels=1)
    criterion = torch.nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 🏋️ Training loop
def train():
    model.train()
    for batch in train_loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = criterion(out.squeeze(), batch.y if task == "regression" else batch.y.long())
        loss.backward()
        optimizer.step()

# 📈 Evaluation
def evaluate(loader):
    model.eval()
    preds, labels = [], []
    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            out = model(batch)
            preds.append(out.squeeze().cpu())
            labels.append(batch.y.cpu())
    preds = torch.cat(preds)
    labels = torch.cat(labels)

    if task == "classification":
        pred_classes = preds.argmax(dim=1)
        return accuracy_score(labels, pred_classes)
    else:
        return mean_squared_error(labels, preds)


🚀 Model training

In [None]:
for epoch in range(1, 101):
    train()
    metric = evaluate(val_loader)
    print(f"Epoch {epoch:03d} - {'Accuracy' if task == 'classification' else 'MSE'}: {metric:.4f}")

# ✅ Final test evaluation
test_metric = evaluate(test_loader)
print(f"\n🧪 Test {'Accuracy' if task == 'classification' else 'MSE'}: {test_metric:.4f}")