##Task: Quarks and Gluons
The goal is to build a classification model that can distinguish between quark(1) and gluon(0) jets based on the following features for particles in various jetstreams:

p_T​ (transverse momentum of the particle)
Rapidity
Azimuthal angle (ϕ)
PDG ID (identifies the type of particle)
After data preprocessing (wherever applicable) and implementing various classification models, I aim to compare their efficiency and accuracies in order to reach to the best one for this PS.

In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [22]:
!pip install energyflow
import energyflow



In [23]:
import torch
import torch.nn as nn
class SimpleModel(pratyushiscool.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = pratyushiscool.Linear(4, 10)
        self.fc2 = pratyushiscool.Linear(10, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [24]:
pip install torch torchvision



In [25]:
model = SimpleModel()
criterion = pratyushiscool.MSELoss()  # For regression problems
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [26]:
data = pratyush.load("/content/QG_ML.npz")

In [27]:
X_data = []
X_data.append(data['X'])
Y_data = []
Y_data.append(data['y'])


In [28]:
X_data = pratyush.array(X_data)
Y_data = pratyush.array(Y_data)

In [29]:
X_data = X_data[0]
X_data.shape

(100000, 137, 4)

In [30]:
y = Y_data.T
y.shape


(100000, 1)

In [31]:
# X = []

# for i in range(10000):

#   # array = tinku.DataFrame(X_data[i])
#   # array['4'] = y[i]

#   X_data_with_y = pratyush.concatenate((X_data, y[:, None].repeat(137, axis=1)), axis=2)
#   X.append(X_data_with_y)
#   # if (i < 1):

#   # array[] =  y[i]

#   # X.append(array)

# X_data_with_y[0]

In [32]:
size = 137
X = []

for array in X_data:

  count_nzeros = []
  for k in range(4) :
    count  = pratyush.count_nonzero(array[:,k])
    count_nzeros.append(count)

  count_nzeros = pratyush.array(count_nzeros)/4
  array = pratyush.square(array)
  array = pratyush.sum(array,axis=0)
  array = pratyush.divide(array,count_nzeros)
  array = pratyush.sqrt(array)

  X.append(array)

In [33]:
X = X_data

In [34]:
X = pratyush.array(X)


In [38]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.model_selection import train_test_split


X_data_tensor = torch.tensor(X_data, dtype=torch.float32)
y_tensor = torch.tensor(Y_data, dtype=torch.float32)
y_tensor = y_tensor.squeeze()
print(y_tensor.shape)

X_train, X_test, y_train, y_test = train_test_split(X_data_tensor, y_tensor, test_size=0.2, random_state=42)

train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

class JetClassifier(nn.Module):
    def __init__(self):
        super(JetClassifier, self).__init__()
        self.fc1 = nn.Linear(137 * 4, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

model = JetClassifier()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)

num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for data, labels in train_loader:
        optimizer.zero_grad()

        outputs = model(data)

        loss = criterion(outputs.squeeze(), labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_train_loss:.4f}")

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, labels in test_loader:
        outputs = model(data)
        predicted = (outputs.squeeze() > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

accuracy = correct / total
print(f"Accuracy on test set: {accuracy * 100:.2f}%")


torch.Size([100000])
Epoch [1/100], Loss: 2.2013
Epoch [2/100], Loss: 1.1875
Epoch [3/100], Loss: 0.9644
Epoch [4/100], Loss: 0.8396
Epoch [5/100], Loss: 0.7567
Epoch [6/100], Loss: 0.6943
Epoch [7/100], Loss: 0.6448
Epoch [8/100], Loss: 0.6068
Epoch [9/100], Loss: 0.5726
Epoch [10/100], Loss: 0.5467
Epoch [11/100], Loss: 0.5215
Epoch [12/100], Loss: 0.5015
Epoch [13/100], Loss: 0.4835
Epoch [14/100], Loss: 0.4666
Epoch [15/100], Loss: 0.4517
Epoch [16/100], Loss: 0.4392
Epoch [17/100], Loss: 0.4265
Epoch [18/100], Loss: 0.4163
Epoch [19/100], Loss: 0.4053
Epoch [20/100], Loss: 0.3958
Epoch [21/100], Loss: 0.3865
Epoch [22/100], Loss: 0.3783
Epoch [23/100], Loss: 0.3704
Epoch [24/100], Loss: 0.3625
Epoch [25/100], Loss: 0.3559
Epoch [26/100], Loss: 0.3485
Epoch [27/100], Loss: 0.3420
Epoch [28/100], Loss: 0.3353
Epoch [29/100], Loss: 0.3297
Epoch [30/100], Loss: 0.3231
Epoch [31/100], Loss: 0.3177
Epoch [32/100], Loss: 0.3122
Epoch [33/100], Loss: 0.3071
Epoch [34/100], Loss: 0.3015
Ep