## Importing Libraries

In [None]:
import os
import time
import torch
import torch.nn.functional as F
import torch_directml
import wandb
import numpy as np
from tqdm import tqdm
import h5py
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool

✅ Using device: privateuseone:0


Creating Graphs: 100%|██████████| 872666/872666 [00:46<00:00, 18712.51it/s]


Epoch 1, Batch 100: Loss = 0.6793, Accuracy = 0.6875
Epoch 1, Batch 200: Loss = 0.7024, Accuracy = 0.5625
Epoch 1, Batch 300: Loss = 0.6968, Accuracy = 0.5625
Epoch 1, Batch 400: Loss = 0.6698, Accuracy = 0.5938
Epoch 1, Batch 500: Loss = 0.6246, Accuracy = 0.6562
Epoch 1, Batch 600: Loss = 0.6981, Accuracy = 0.5000
Epoch 1, Batch 700: Loss = 0.6163, Accuracy = 0.6875
Epoch 1, Batch 800: Loss = 0.7367, Accuracy = 0.5312
Epoch 1, Batch 900: Loss = 0.5997, Accuracy = 0.6250
Epoch 1, Batch 1000: Loss = 0.6382, Accuracy = 0.6250
Epoch 1, Batch 1100: Loss = 0.6256, Accuracy = 0.6250
Epoch 1, Batch 1200: Loss = 0.5501, Accuracy = 0.7812
Epoch 1, Batch 1300: Loss = 0.6225, Accuracy = 0.6875
Epoch 1, Batch 1400: Loss = 0.6035, Accuracy = 0.6875
Epoch 1, Batch 1500: Loss = 0.5577, Accuracy = 0.7500
Epoch 1, Batch 1600: Loss = 0.5302, Accuracy = 0.7188
Epoch 1, Batch 1700: Loss = 0.6545, Accuracy = 0.7500
Epoch 1, Batch 1800: Loss = 0.6947, Accuracy = 0.5938
Epoch 1, Batch 1900: Loss = 0.6130, A

0,1
Batch,▃▄▄▅▆▂█▃▃▄▁▇▇▁▄▅▅▅▆▃▇▇█▂▃▃▆▂▄▄▂▅▂▃▄▇█▂▄▆
Batch Accuracy,▄▇▅▄▅▆▅▆▆▅▅▅▄▅▄▆▆▆▆▅▅▅▁▅▄▆▅▄▆▄▇▃▅▅▄▇▄▂█▄
Batch Loss,▂▅▅▄▆█▅▅▅▆▃▅▆▅▄▃▄▅▄▁▆▄▁▆▅█▁▄▁▅▄▄▅▆▆▇▃▅▃▅
Epoch,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇████
Epoch Time (s),▄▁▇▃▄▇▇██▅▂
Train Accuracy,▂▁▇████████
Train Loss,▇█▂▁▁▁▂▂▁▁▂
Validation Accuracy,▁▂▁▃▃▅█▄▂▆▃

0,1
Batch,21800.0
Batch Accuracy,0.59375
Batch Loss,0.63602
Epoch,10.0
Epoch Time (s),488.05425
Train Accuracy,0.69972
Train Loss,0.59765
Validation Accuracy,0.70323


## Initializing weights and biases and setting up the GPU

In [None]:

# Initialize Weights & Biases (W&B) for experiment tracking
wandb.init(project="higgs_gnn", name="gnn_training_final", config={"epochs": 20, "batch_size": 32})

# Set device to AMD GPU using DirectML
device = torch_directml.device()
print(f"✅ Using device: {device}")


## Loading the daaset and creating the geaph data

In [None]:

# Load HDF5 dataset
hdf5_file = r"C:\Users\vudut\OneDrive\Desktop\Python\MINI Project\Data Sets\jet-images_Mass60-100_pT250-300_R1.25_Pix25.hdf5"
with h5py.File(hdf5_file, "r") as f:
    jet_pt = np.array(f["jet_pt"])
    jet_eta = np.array(f["jet_eta"])
    jet_phi = np.array(f["jet_phi"])
    jet_mass = np.array(f["jet_mass"])
    signal = np.array(f["signal"])  # Labels: 1 = Signal, 0 = Background

# Normalize node features (jet_pt, jet_eta, jet_phi, jet_mass)
features = np.stack([jet_pt, jet_eta, jet_phi, jet_mass], axis=1)  # Shape: [N, 4]
features = (features - features.mean(axis=0)) / features.std(axis=0)  # Standardization


In [None]:

# Create graph data with explicit self-loops
graphs = []
for i in tqdm(range(len(features)), desc="Creating Graphs"):
    x = torch.tensor(features[i], dtype=torch.float).unsqueeze(0)
    edge_index = torch.tensor([[0], [0]], dtype=torch.long)
    y = torch.tensor([int(signal[i])], dtype=torch.long)
    graphs.append(Data(x=x, edge_index=edge_index, y=y))


## Creating Dataloaders

In [None]:

# Create DataLoaders for training and validation
batch_size = 32
train_loader = DataLoader(
    graphs[:int(0.8 * len(graphs))],
    batch_size=batch_size,
    shuffle=True,
    num_workers=0,
    pin_memory=True
)
val_loader = DataLoader(
    graphs[int(0.8 * len(graphs)):],
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)


## Creating a GNN model

In [None]:

# Define the GNN model
class GNN(torch.nn.Module):
    def __init__(self):
        super(GNN, self).__init__()
        self.conv1 = GCNConv(4, 16, add_self_loops=False)
        self.conv2 = GCNConv(16, 32, add_self_loops=False)
        self.fc = torch.nn.Linear(32, 2)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = global_mean_pool(x, batch)
        return self.fc(x)

# Initialize model, optimizer, and loss function
model = GNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = torch.nn.CrossEntropyLoss()


## Training the GNN model

In [None]:

# Training loop with epoch timing
for epoch in range(10):
    start_time = time.time()
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    batch_counter = 0

    for batch in train_loader:
        batch_counter += 1
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = criterion(out, batch.y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        pred = out.argmax(dim=1)
        correct += (pred == batch.y).sum().item()
        total += batch.y.size(0)

        if batch_counter % 100 == 0:
            batch_loss = loss.item()
            batch_acc = (pred == batch.y).sum().item() / batch.y.size(0)
            print(f"Epoch {epoch+1}, Batch {batch_counter}: Loss = {batch_loss:.4f}, Accuracy = {batch_acc:.4f}")
            wandb.log({"Batch Loss": batch_loss, "Batch Accuracy": batch_acc, "Epoch": epoch+1, "Batch": batch_counter})

    epoch_time = time.time() - start_time
    avg_loss = total_loss / len(train_loader)
    train_acc = correct / total
    print(f"Epoch {epoch+1}: Train Loss = {avg_loss:.4f}, Train Accuracy = {train_acc:.4f}, Time = {epoch_time:.2f}s")
    wandb.log({
        "Train Loss": avg_loss,
        "Train Accuracy": train_acc,
        "Epoch Time (s)": epoch_time,
        "Epoch": epoch+1
    })

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in val_loader:
            batch = batch.to(device)
            out = model(batch)
            pred = out.argmax(dim=1)
            correct += (pred == batch.y).sum().item()
            total += batch.y.size(0)
    val_acc = correct / total
    print(f"Validation Accuracy: {val_acc:.4f}")
    wandb.log({"Validation Accuracy": val_acc, "Epoch": epoch+1})


## Saving the model

In [None]:

# Save the trained model and finish W&B
torch.save(model.state_dict(), "higgs_gnn_model_final.pth")
wandb.finish()
