In [70]:
import os
import torch
import torch.nn.functional as F
import torch_directml
import wandb
import numpy as np
from tqdm import tqdm
import h5py
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.utils import dense_to_sparse
from torch_geometric.utils import dense_to_sparse, add_self_loops


In [71]:
# ✅ Initialize Weights & Biases (W&B)
wandb.init(project="higgs_gnn", name="gnn_training_final", config={"epochs": 20, "batch_size": 64})

In [72]:
# ✅ Set device to AMD GPU (DirectML)
device = torch_directml.device()
print(f"✅ Using device: {device}")

✅ Using device: privateuseone:0


In [73]:
# Load HDF5 dataset
import h5py
hdf5_file = r"C:\Users\vudut\OneDrive\Desktop\Python\MINI Project\Data Sets\jet-images_Mass60-100_pT250-300_R1.25_Pix25.hdf5"

with h5py.File(hdf5_file, "r") as f:
    jet_pt = np.array(f["jet_pt"])
    jet_eta = np.array(f["jet_eta"])
    jet_phi = np.array(f["jet_phi"])  # Added
    jet_mass = np.array(f["jet_mass"])  # Added
    signal = np.array(f["signal"])  # Labels: 1 = Signal, 0 = Background

In [74]:
# Normalize features
features = np.stack([jet_pt, jet_eta, jet_phi, jet_mass], axis=1)  # Expanded feature set
features = (features - features.mean(axis=0)) / features.std(axis=0)  # Standardization

In [75]:
# Create Graph Data (Fully Connected Graph)
graphs = []
for i in tqdm(range(len(features)), desc="Creating Graphs"):
    # Node features
    x = torch.tensor(features[i], dtype=torch.float).unsqueeze(0)  
    # Label
    y = torch.tensor([int(signal[i])], dtype=torch.long)
    
    # Create edge index for a single node (self-loop)
    # Using torch.zeros instead of just [0,0] to ensure proper device placement
    edge_index = torch.zeros((2, 1), dtype=torch.long)
    
    # Create graph object
    graph = Data(x=x, edge_index=edge_index, y=y)
    graphs.append(graph)

Creating Graphs: 100%|██████████| 872666/872666 [01:15<00:00, 11514.41it/s]


In [76]:
# Create DataLoader with additional parameters for stability
batch_size = 32  # Reduced batch size for better stability
train_loader = DataLoader(
    graphs[:int(0.8 * len(graphs))],
    batch_size=batch_size,
    shuffle=True,
    num_workers=0,  # Disable multi-processing to avoid potential issues
    pin_memory=True,  # Enable faster data transfer to GPU
    drop_last=True  # Drop the last incomplete batch to ensure consistent batch sizes
)

val_loader = DataLoader(
    graphs[int(0.8 * len(graphs)):],
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=True,
    drop_last=False  # Keep all validation samples
)



In [77]:
class GNN(torch.nn.Module):
    def __init__(self):
        super(GNN, self).__init__()
        # Initialize with add_self_loops=True since we want to include self-loops
        self.conv1 = GCNConv(4, 16, add_self_loops=True)
        self.conv2 = GCNConv(16, 32, add_self_loops=True)
        self.fc = torch.nn.Linear(32, 2)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        
        # Ensure tensors are on the correct device
        x = x.to(edge_index.device)
        
        # Get batch information
        batch = data.batch if hasattr(data, 'batch') else torch.zeros(x.size(0), dtype=torch.long, device=x.device)
        
        # Apply convolutions
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        
        # Global pooling and classification
        x = global_mean_pool(x, batch)
        return self.fc(x)

In [78]:
# Training Loop
model = GNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = torch.nn.CrossEntropyLoss()

for epoch in range(20):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for batch in train_loader:
        try:
            # Move batch to device
            batch = batch.to(device)
            
            optimizer.zero_grad()
            out = model(batch)
            loss = criterion(out, batch.y)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            pred = out.argmax(dim=1)
            correct += (pred == batch.y).sum().item()
            total += batch.y.size(0)
            
        except RuntimeError as e:
            print(f"Error in batch: {e}")
            continue
    
    if total > 0:
        train_acc = correct / total
        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch}: Loss = {avg_loss:.4f}, Accuracy = {train_acc:.4f}")
        
        wandb.log({
            "epoch": epoch,
            "train_loss": avg_loss,
            "train_accuracy": train_acc
        })

Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is incorrect.
Error in batch: The parameter is

KeyboardInterrupt: 

In [1]:
# Import necessary libraries
import os
import torch
import torch.nn.functional as F
import torch_directml
import wandb
import numpy as np
from tqdm import tqdm
import h5py
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool

# Initialize Weights & Biases (W&B) for experiment tracking
wandb.init(project="higgs_gnn", name="gnn_training_fixed", config={"epochs": 20, "batch_size": 32})

# Set device to AMD GPU using DirectML
device = torch_directml.device()
print(f"✅ Using device: {device}")

# Load HDF5 dataset
hdf5_file = r"C:\Users\vudut\OneDrive\Desktop\Python\MINI Project\Data Sets\jet-images_Mass60-100_pT250-300_R1.25_Pix25.hdf5"  # Ensure this path is correct
with h5py.File(hdf5_file, "r") as f:
    jet_pt = np.array(f["jet_pt"])
    jet_eta = np.array(f["jet_eta"])
    jet_phi = np.array(f["jet_phi"])
    jet_mass = np.array(f["jet_mass"])
    signal = np.array(f["signal"])  # Labels: 1 = Signal, 0 = Background

# Normalize node features (jet_pt, jet_eta, jet_phi, jet_mass)
features = np.stack([jet_pt, jet_eta, jet_phi, jet_mass], axis=1)  # Shape: [N, 4]
features = (features - features.mean(axis=0)) / features.std(axis=0)  # Standardization

# Create graph data with explicit self-loops
graphs = []
for i in tqdm(range(len(features)), desc="Creating Graphs"):
    # Node features: Shape [1, 4] for a single node per graph
    x = torch.tensor(features[i], dtype=torch.float).unsqueeze(0)
    # Self-loop edge_index: Shape [2, 1] indicating a self-loop for the node
    edge_index = torch.tensor([[0], [0]], dtype=torch.long)
    # Label: Shape [1]
    y = torch.tensor([int(signal[i])], dtype=torch.long)
    graphs.append(Data(x=x, edge_index=edge_index, y=y))

# Create DataLoaders for training and validation
batch_size = 32  # Reduced from 64 to improve stability
train_loader = DataLoader(
    graphs[:int(0.8 * len(graphs))],
    batch_size=batch_size,
    shuffle=True,
    num_workers=0,  # Set to 0 to avoid multiprocessing issues
    pin_memory=True  # Faster data transfer to GPU
)
val_loader = DataLoader(
    graphs[int(0.8 * len(graphs)):],
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

# Define the GNN model with add_self_loops=False since self-loops are in the data
class GNN(torch.nn.Module):
    def __init__(self):
        super(GNN, self).__init__()
        self.conv1 = GCNConv(4, 16, add_self_loops=False)  # Input channels = 4 features
        self.conv2 = GCNConv(16, 32, add_self_loops=False)
        self.fc = torch.nn.Linear(32, 2)  # Output 2 classes

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = global_mean_pool(x, batch)  # Graph-level representation
        return self.fc(x)

# Initialize model, optimizer, and loss function
model = GNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = torch.nn.CrossEntropyLoss()

# Training loop
for epoch in range(20):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for batch in train_loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = criterion(out, batch.y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        correct += (out.argmax(dim=1) == batch.y).sum().item()
        total += batch.y.size(0)

    train_acc = correct / total
    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}: Train Loss {avg_loss:.4f}, Train Acc {train_acc:.4f}")
    wandb.log({"Train Loss": avg_loss, "Train Accuracy": train_acc})

    # Validation step
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in val_loader:
            batch = batch.to(device)
            out = model(batch)
            correct += (out.argmax(dim=1) == batch.y).sum().item()
            total += batch.y.size(0)

    val_acc = correct / total
    print(f"Validation Accuracy: {val_acc:.4f}")
    wandb.log({"Validation Accuracy": val_acc})

# Save the trained model and finish W&B
torch.save(model.state_dict(), "higgs_gnn_model_fixed.pth")
wandb.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mvuduthasaipraneetham[0m ([33mpraneetham[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


✅ Using device: privateuseone:0


Creating Graphs: 100%|██████████| 872666/872666 [00:38<00:00, 22426.96it/s]
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


KeyboardInterrupt: 

In [1]:
# Import necessary libraries
import os
import torch
import torch.nn.functional as F
import torch_directml
import wandb
import numpy as np
from tqdm import tqdm
import h5py
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool

# Initialize Weights & Biases (W&B) for experiment tracking
wandb.init(project="higgs_gnn", name="gnn_training_final", config={"epochs": 20, "batch_size": 32})

# Set device to AMD GPU using DirectML
device = torch_directml.device()
print(f"✅ Using device: {device}")

# Load HDF5 dataset
hdf5_file = r"C:\Users\vudut\OneDrive\Desktop\Python\MINI Project\Data Sets\jet-images_Mass60-100_pT250-300_R1.25_Pix25.hdf5"
with h5py.File(hdf5_file, "r") as f:
    jet_pt = np.array(f["jet_pt"])
    jet_eta = np.array(f["jet_eta"])
    jet_phi = np.array(f["jet_phi"])
    jet_mass = np.array(f["jet_mass"])
    signal = np.array(f["signal"])  # Labels: 1 = Signal, 0 = Background

# Normalize node features (jet_pt, jet_eta, jet_phi, jet_mass)
features = np.stack([jet_pt, jet_eta, jet_phi, jet_mass], axis=1)  # Shape: [N, 4]
features = (features - features.mean(axis=0)) / features.std(axis=0)  # Standardization

# Create graph data with explicit self-loops
graphs = []
for i in tqdm(range(len(features)), desc="Creating Graphs"):
    # Node features: Shape [1, 4] for a single node per graph
    x = torch.tensor(features[i], dtype=torch.float).unsqueeze(0)
    # Self-loop edge_index: Shape [2, 1] indicating a self-loop for the node
    edge_index = torch.tensor([[0], [0]], dtype=torch.long)
    # Label: Shape [1]
    y = torch.tensor([int(signal[i])], dtype=torch.long)
    graphs.append(Data(x=x, edge_index=edge_index, y=y))

# Create DataLoaders for training and validation
batch_size = 32  # Reduced from 64 to improve stability
train_loader = DataLoader(
    graphs[:int(0.8 * len(graphs))],
    batch_size=batch_size,
    shuffle=True,
    num_workers=0,  # Set to 0 to avoid multiprocessing issues
    pin_memory=True  # Faster data transfer to GPU
)
val_loader = DataLoader(
    graphs[int(0.8 * len(graphs)):],
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

# Define the GNN model with add_self_loops=False since self-loops are in the data
class GNN(torch.nn.Module):
    def __init__(self):
        super(GNN, self).__init__()
        self.conv1 = GCNConv(4, 16, add_self_loops=False)  # Input channels = 4 features
        self.conv2 = GCNConv(16, 32, add_self_loops=False)
        self.fc = torch.nn.Linear(32, 2)  # Output 2 classes

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = global_mean_pool(x, batch)  # Graph-level representation
        return self.fc(x)

# Initialize model, optimizer, and loss function
model = GNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = torch.nn.CrossEntropyLoss()

# Training loop with live progress monitoring
for epoch in range(20):
    model.train()  # Set model to training mode
    total_loss = 0
    correct = 0
    total = 0
    batch_counter = 0  # To track batches for periodic updates

    for batch in train_loader:
        batch_counter += 1
        batch = batch.to(device)
        optimizer.zero_grad()  # Clear previous gradients
        out = model(batch)     # Forward pass
        loss = criterion(out, batch.y)  # Compute loss
        loss.backward()        # Backward pass
        optimizer.step()       # Update weights

        # Accumulate metrics for epoch summary
        total_loss += loss.item()
        pred = out.argmax(dim=1)  # Get predictions
        correct += (pred == batch.y).sum().item()
        total += batch.y.size(0)

        # Print progress every 100 batches
        if batch_counter % 100 == 0:
            batch_loss = loss.item()
            batch_acc = (pred == batch.y).sum().item() / batch.y.size(0)
            print(f"Epoch {epoch+1}, Batch {batch_counter}: Loss = {batch_loss:.4f}, Accuracy = {batch_acc:.4f}")
            # Log to W&B
            wandb.log({"Batch Loss": batch_loss, "Batch Accuracy": batch_acc, "Epoch": epoch+1, "Batch": batch_counter})

    # Epoch summary
    avg_loss = total_loss / len(train_loader)
    train_acc = correct / total
    print(f"Epoch {epoch+1}: Train Loss = {avg_loss:.4f}, Train Accuracy = {train_acc:.4f}")
    # Log epoch metrics to W&B
    wandb.log({"Train Loss": avg_loss, "Train Accuracy": train_acc, "Epoch": epoch+1})

    # Validation step
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in val_loader:
            batch = batch.to(device)
            out = model(batch)
            pred = out.argmax(dim=1)
            correct += (pred == batch.y).sum().item()
            total += batch.y.size(0)
    val_acc = correct / total
    print(f"Validation Accuracy: {val_acc:.4f}")
    wandb.log({"Validation Accuracy": val_acc, "Epoch": epoch+1})

# Save the trained model and finish W&B
torch.save(model.state_dict(), "higgs_gnn_model_final.pth")
wandb.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


✅ Using device: privateuseone:0


Creating Graphs: 100%|██████████| 872666/872666 [00:43<00:00, 20167.28it/s]
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch 1, Batch 100: Loss = 0.7063, Accuracy = 0.4375
Epoch 1, Batch 200: Loss = 0.6800, Accuracy = 0.4688
Epoch 1, Batch 300: Loss = 0.6637, Accuracy = 0.6250
Epoch 1, Batch 400: Loss = 0.7106, Accuracy = 0.6250
Epoch 1, Batch 500: Loss = 0.6269, Accuracy = 0.7188
Epoch 1, Batch 600: Loss = 0.6375, Accuracy = 0.6562
Epoch 1, Batch 700: Loss = 0.6270, Accuracy = 0.5938
Epoch 1, Batch 800: Loss = 0.6933, Accuracy = 0.5938
Epoch 1, Batch 900: Loss = 0.6157, Accuracy = 0.6875
Epoch 1, Batch 1000: Loss = 0.7222, Accuracy = 0.5312
Epoch 1, Batch 1100: Loss = 0.5948, Accuracy = 0.7188
Epoch 1, Batch 1200: Loss = 0.6841, Accuracy = 0.5312
Epoch 1, Batch 1300: Loss = 0.5625, Accuracy = 0.8125
Epoch 1, Batch 1400: Loss = 0.5814, Accuracy = 0.6562
Epoch 1, Batch 1500: Loss = 0.5764, Accuracy = 0.6875
Epoch 1, Batch 1600: Loss = 0.6120, Accuracy = 0.6875
Epoch 1, Batch 1700: Loss = 0.6314, Accuracy = 0.6250
Epoch 1, Batch 1800: Loss = 0.6164, Accuracy = 0.6250
Epoch 1, Batch 1900: Loss = 0.5742, A

KeyboardInterrupt: 