
# End-to-end: `uv` environment (Python 3.12) + CUDA PyTorch + PyG + Link Prediction (GCN)

## 0) Install `uv` (cross-platform hints)

In [None]:

# For Linux / macOS:
# If curl is available:
!curl -LsSf https://astral.sh/uv/install.sh | sh
# If wget is available:
# !wget -qO- https://astral.sh/uv/install.sh | sh
#
# For Windows (PowerShell):
# Set-ExecutionPolicy Bypass -Scope Process -Force; `
#   [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; `
#   irm https://astral.sh/uv/install.ps1 | iex
#
# After installing, you may need to restart your shell for PATH changes to take effect.
# Test it by checking the version.
!uv --version



## 1) Create a Python 3.12 project named `test`
This creates a virtual environment and a `pyproject.toml` you can pin dependencies into.


In [None]:

import pathlib, textwrap

project_dir = pathlib.Path("test").resolve()
if project_dir.exists():
    print(f"Project already exists at: {project_dir}")
else:
    project_dir.mkdir(parents=True, exist_ok=True)
    (project_dir / "src" / "test").mkdir(parents=True, exist_ok=True)
    (project_dir / "src" / "test" / "__init__.py").write_text("")
    (project_dir / "README.md").write_text("# test\n\nProject created by the workshop tutorial notebook.")
    (project_dir / ".gitignore").write_text(".venv\n__pycache__\n*.pyc\n")
    (project_dir / "pyproject.toml").write_text(textwrap.dedent("""
        [project]
        name = "test"
        version = "0.1.0"
        description = "GCN link prediction demo (PyTorch + PyG)"
        requires-python = ">=3.12"

        dependencies = [
            # We will add heavy deps via explicit uv commands below to ensure correct wheels.
        ]

        [tool.uv]
    """))
    print(f"Created project at: {project_dir}")

print("If running locally, now run:")
print(" uv python install 3.12")
print(" cd test && uv venv --python 3.12 .venv")



## 2) Install CUDA-enabled PyTorch and PyTorch Geometric with `uv`

The default package here is **PyTorch `2.4.1` with `cu124` wheels** with an installation of **Pytorch-Geometric**  built for that exact torch/CUDA combination.

You can use `cu124` PyTorch wheels as long as your **driver supports 12.x** (which the Liseda-cluster's drivers should!). For source builds or specifically conda-based CUDA 12.7 envs, adapt the commands accordingly.


In [None]:

import os, pathlib

project_dir = pathlib.Path("test").resolve()
assert project_dir.exists(), "Project folder not found. Run the previous cell first."

TORCH_VERSION = os.environ.get("TORCH_VERSION", "2.4.1")
CUDA_TAG     = os.environ.get("TORCH_CUDA_TAG", "cu124")  # e.g., cu121, cu124
PYG_TAG      = os.environ.get("PYG_TORCH_TAG", f"{TORCH_VERSION}+{CUDA_TAG}")  # e.g., 2.4.1+cu124

print("Planned installs:")
print(f"  torch=={TORCH_VERSION} ({CUDA_TAG})")
print(f"  PyG wheels tag: torch-{PYG_TAG}")

torch_index = f"https://download.pytorch.org/whl/{CUDA_TAG}"
torch_pkgs  = f"torch=={TORCH_VERSION} torchvision torchaudio"
cmd1 = f'cd "{project_dir}" && uv pip install --index-url {torch_index} {torch_pkgs}'
cmd2 = f'cd "{project_dir}" && uv pip install scikit-learn'
pyg_find_links = f"https://data.pyg.org/whl/torch-{PYG_TAG}.html"
cmd3 = f'cd "{project_dir}" && uv pip install pyg -f {pyg_find_links}'

print("\nRun locally in a terminal:")
print(cmd1)
print(cmd2)
print(cmd3)

print("\nVerify CUDA/GPU after installation:")
print(f'  cd "{project_dir}" && uv run python -c "import torch; print(torch.__version__, torch.version.cuda, torch.cuda.is_available())"')


### (Optional) Quick GPU check (after installing PyTorch)

In [None]:

# Example (run in your system shell):
#   cd test
#   uv run python -c "import torch; print(torch.__version__, torch.version.cuda, torch.cuda.is_available())"



## 4) GNNs Use-Case: Link Prediction with PyG's Planetoid Dataset

make sure to run this **after** you have installed all dependencies into the `test/.venv`.  
If you're running the notebook kernel from elsewhere, it’s okay — these cells will still demonstrate the full pipeline (assuming the required packages are importable).


In [None]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

try:
    from torch_geometric.datasets import Planetoid
    import torch_geometric.transforms as T
    from torch_geometric.nn import GCNConv
except Exception as e:
    print("PyTorch Geometric not available in the current kernel. "
          "Run the uv installation commands and then restart the kernel using the 'test/.venv' interpreter.")
    raise

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)


In [None]:

dataset = Planetoid(root="data/Planetoid", name="Cora")
data = dataset[0]

transform = T.RandomLinkSplit(
    num_val=0.05, num_test=0.10, 
    is_undirected=True, 
    add_negative_train_samples=True
)
train_data, val_data, test_data = transform(data)

train_data, val_data, test_data


In [None]:

class GCN(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, dropout=0.5):
        super().__init__()
        self.conv1 = GCNConv(in_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, out_dim)
        self.dropout = dropout

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return x

def link_logits(z, edge_label_index):
    src = z[edge_label_index[0]]
    dst = z[edge_label_index[1]]
    return (src * dst).sum(dim=-1)


In [None]:
## GCN parameters

in_dim = dataset.num_features
hidden_dim = 128
out_dim = 64
model = GCN(in_dim, hidden_dim, out_dim).to(device)

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)

# Loss function (Binary Cross Entropy -- values between 0 and 1 -- is suitable for link prediction)
criterion = nn.BCEWithLogitsLoss()

In [None]:

def run_split(split_data, train=False):
    x = split_data.x.to(device)
    edge_index = split_data.edge_index.to(device)
    edge_label_index = split_data.edge_label_index.to(device)
    edge_label = split_data.edge_label.to(device).float()

    if train:
        model.train()
        optimizer.zero_grad()
        z = model(x, edge_index)
        logits = link_logits(z, edge_label_index)
        loss = criterion(logits, edge_label)
        loss.backward()
        optimizer.step()
        return float(loss.item())
    else:
        model.eval()
        with torch.no_grad():
            z = model(x, edge_index)
            logits = link_logits(z, edge_label_index)
            probs = torch.sigmoid(logits).detach().cpu().numpy()
            preds = (probs >= 0.5).astype("int32")
            labels = edge_label.detach().cpu().numpy()
            acc = accuracy_score(labels, preds)
            f1 = f1_score(labels, preds)
                roc = roc_auc_score(labels, probs)
            return acc, f1, roc


In [None]:

epochs = 100
best_valid = -1.0
best = {"valid": (0,0,0), "test": (0,0,0)}

for epoch in range(1, epochs+1):
    loss = run_split(train_data, train=True)
    val_acc, val_f1, val_roc = run_split(val_data, train=False)
    test_acc, test_f1, test_roc = run_split(test_data, train=False)

    if val_f1 > best_valid:
        best_valid = val_f1
        best["valid"] = (val_acc, val_f1, val_roc)
        best["test"] = (test_acc, test_f1, test_roc)

    if epoch % 10 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | loss={loss:.4f} | val: acc={val_acc:.4f} f1={val_f1:.4f} roc={val_roc:.4f}"
              f" | test: acc={test_acc:.4f} f1={test_f1:.4f} roc={test_roc:.4f}")


In [None]:

print("\nBest (by val F1)")
print("Val:  acc={:.4f}, f1={:.4f}, roc-auc={:.4f}".format(*best["valid"]))
print("Test: acc={:.4f}, f1={:.4f}, roc-auc={:.4f}".format(*best["test"]))
