In [1]:
import jet
import uproot
import awkward as ak

import pennylane as qml
import pennylane.numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

In [2]:
def get_data_nsubjettiness(channel, num_events):
    root_path = f"../ZprimeToZh_VS_QCD/Analyzer/root_files/{channel}_{num_events}.root"
    root_file = uproot.open(root_path)
    root_events = root_file['jets/Events']
    jet_events = jet.JetEvents(root_events, jet_type="fatjet", keep_by="pt")
    trimmed_events = torch.stack((
        torch.tensor(jet_events.nsubjettiness["tau1"]),
        torch.tensor(jet_events.nsubjettiness["tau2"]),
        torch.tensor(jet_events.nsubjettiness["tau3"]),
    ))
    trimmed_events = torch.transpose(trimmed_events, 0, 1)
    return trimmed_events

def get_data_pt_eta_phi(channel, num_events, num_particles, jet_type):
    root_path = f"../ZprimeToZh_VS_QCD/Analyzer/root_files/{channel}_{num_events}.root"
    root_file = uproot.open(root_path)
    root_events = root_file['jets/Events']
    jet_events = jet.JetEvents(root_events, jet_type, keep_by="pt")

    trimmed_events = torch.zeros((num_events, 3 * num_particles))
    idx_argsort = ak.argsort(jet_events.daughter["pt"], ascending=False)
    for i in range(num_events):
        for j in range(min(len(idx_argsort[i]), num_particles)):
            trimmed_events[i][3*j+0] = jet_events.daughter["pt"][i][idx_argsort[i][j]]
            trimmed_events[i][3*j+1] = jet_events.daughter["eta"][i][idx_argsort[i][j]]
            trimmed_events[i][3*j+2] = jet_events.daughter["phi"][i][idx_argsort[i][j]]
    return trimmed_events


class JetData(Dataset):
    def __init__(self, signal_events, background_events):
        self.x = torch.cat((signal_events ,background_events))
        self.y = torch.cat((torch.ones((len(signal_events)), 1), torch.zeros((len(background_events)), 1)))
        print(self.x.shape, self.y.shape)
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return len(self.y)

In [3]:
class ClassicalModel(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim=0, hidden_layers=0):
        super().__init__()
        if hidden_layers == 0:
            net = [nn.Linear(input_dim, 1), nn.ReLU()]
        else:
            net = []
            net.append(nn.Linear(input_dim, hidden_dim))
            net.append(nn.ReLU())
            for _ in range(hidden_layers):
                net.append(nn.Linear(hidden_dim, hidden_dim))
                net.append(nn.ReLU())
            net.append(nn.Linear(hidden_dim, 1))
        # BCEWithLogitsLoss already contains a sigmoid function
        self.net = nn.Sequential(*net)
    def forward(self, x):
        y = self.net(x)
        return y

class HybridModel(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim=0, hidden_layers=0):
        super().__init__()

In [4]:
def train(model, data_loader, cf):
    loss = nn.BCEWithLogitsLoss(reduction="mean")
    opt = torch.optim.Adam(model.parameters(), lr=cf["learning_rate"], weight_decay=cf["weight_decay"])
    for epoch in range(cf["num_epochs"]):
        train_loss, train_acc, train_count = 0, 0, 0
        test_loss, test_acc = 0, 0
        model.train()
        for x, y_true in data_loader["train"]:
            opt.zero_grad()
            y_pred = model(x)
            batch_loss = loss(y_pred, y_true)
            batch_loss.backward()
            train_count += 1
            train_loss += batch_loss.detach()
            # BCEWithLogitsLoss -> >=0 | Sigmoid + BCELoss -> >=0.5
            train_acc += torch.sum((y_pred >= 0) == y_true) / len(x)
            opt.step()
        model.eval()
        for x, y_true in data_loader["test"]:
            y_pred = model(x)
            batch_loss = loss(y_pred, y_true)
            test_loss += batch_loss.detach() * len(x)
            # BCEWithLogitsLoss -> >=0 | Sigmoid + BCELoss -> >=0.5
            test_acc += torch.sum((y_pred >= 0) == y_true).item()
        train_loss /= train_count
        train_acc /= train_count
        test_loss /= len(data_loader["test"].dataset)
        test_acc /= len(data_loader["test"].dataset)
        print(f"Epoch {epoch} : train = ({train_loss:.2f}, {train_acc:.2f}) | test = ({test_loss:.2f}, {test_acc:.2f})")

In [5]:
num_events = 1000
num_particles = 5
data_ratio = 0.9
num_train = int(data_ratio * num_events)
num_test = num_events - num_train

# signal_channel = "ZprimeToZhToZinvhbb"
signal_channel = "ZprimeToZhToZlephbb"
# background_channel = "QCD_HT1500to2000"
background_channel = "QCD_HT2000toInf"

# signal_events = get_data_nsubjettiness(signal_channel, num_events)
# background_events = get_data_nsubjettiness(background_channel, num_events)
signal_events = get_data_pt_eta_phi(signal_channel, num_events, num_particles, jet_type="fatjet")
background_events = get_data_pt_eta_phi(background_channel, num_events, num_particles, jet_type="fatjet")

data_train = JetData(signal_events[:num_train], background_events[:num_train])
data_test = JetData(signal_events[num_train:], background_events[num_train:])
data_loader = {
    "train":DataLoader(data_train, 32, shuffle=True, drop_last=True),
    "test":DataLoader(data_test, 32, shuffle=False, drop_last=False),
    }

torch.Size([1800, 15]) torch.Size([1800, 1])
torch.Size([200, 15]) torch.Size([200, 1])


In [6]:
cf = {
    "learning_rate":1E-2,
    "weight_decay":0,
    "num_epochs":50,
}

# c_model = ClassicalModel(input_dim=3, hidden_dim=128, hidden_layers=4) # n-subjettiness
c_model = ClassicalModel(input_dim=3*num_particles, hidden_dim=16*num_particles, hidden_layers=2) # pt eta phi
train(c_model, data_loader, cf)

Epoch 0 : train = (1.62, 0.63) | test = (0.53, 0.70)
Epoch 1 : train = (0.49, 0.78) | test = (0.45, 0.82)
Epoch 2 : train = (0.40, 0.85) | test = (0.39, 0.83)
Epoch 3 : train = (0.30, 0.90) | test = (0.39, 0.85)
Epoch 4 : train = (0.29, 0.88) | test = (0.28, 0.88)
Epoch 5 : train = (0.24, 0.91) | test = (0.32, 0.86)
Epoch 6 : train = (0.24, 0.91) | test = (0.26, 0.90)
Epoch 7 : train = (0.21, 0.92) | test = (0.32, 0.87)
Epoch 8 : train = (0.22, 0.92) | test = (0.26, 0.90)
Epoch 9 : train = (0.20, 0.93) | test = (0.28, 0.90)
Epoch 10 : train = (0.21, 0.92) | test = (0.27, 0.89)
Epoch 11 : train = (0.20, 0.93) | test = (0.27, 0.88)
Epoch 12 : train = (0.22, 0.91) | test = (0.31, 0.88)
Epoch 13 : train = (0.23, 0.92) | test = (0.27, 0.90)
Epoch 14 : train = (0.22, 0.91) | test = (0.26, 0.91)
Epoch 15 : train = (0.21, 0.92) | test = (0.29, 0.89)
Epoch 16 : train = (0.21, 0.92) | test = (0.28, 0.89)
Epoch 17 : train = (0.20, 0.93) | test = (0.28, 0.90)
Epoch 18 : train = (0.20, 0.93) | test