In [1]:
import jet
import uproot
import awkward as ak

import pennylane as qml
import pennylane.numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

In [2]:
def get_data_nsubjettiness(channel, num_events, cut=None):
    expressions = ["fatjet_tau1", "fatjet_tau2", "fatjet_tau3"]
    jet_events = jet.JetEvents(channel, num_events, "fatjet", cut, expressions)
    trimmed_events = torch.stack((
        torch.tensor(jet_events.events["fatjet_tau1"]),
        torch.tensor(jet_events.events["fatjet_tau2"]),
        torch.tensor(jet_events.events["fatjet_tau3"]),
        torch.tensor(jet_events.events["fatjet_tau2"] / jet_events.events["fatjet_tau1"]),
        torch.tensor(jet_events.events["fatjet_tau3"] / jet_events.events["fatjet_tau2"]),
    ))
    trimmed_events = torch.transpose(trimmed_events, 0, 1)
    return trimmed_events

def get_data_jet_pt_eta(channel, num_events, jet_type, cut=None):
    expressions = [f"{jet_type}_pt", f"{jet_type}_eta"]
    jet_events = jet.JetEvents(channel, num_events, jet_type, cut, expressions)
    trimmed_events = torch.stack((
        torch.tensor(jet_events.events[f"{jet_type}_pt"]),
        torch.tensor(jet_events.events[f"{jet_type}_eta"]),
    ))
    trimmed_events = torch.transpose(trimmed_events, 0, 1)
    return trimmed_events

def get_data_daughter_pt_eta(channel, num_events, num_particles, jet_type, cut=None):
    expressions = [f"{jet_type}_daughter_{feature}" for feature in ["pt", "eta"]]
    jet_events = jet.JetEvents(channel, num_events, jet_type, cut, expressions)

    trimmed_events = torch.zeros((len(jet_events.events), 2 * num_particles))
    idx_argsort = ak.argsort(jet_events.events[f"{jet_type}_daughter_pt"], ascending=False)
    for i in range(len(jet_events.events)):
        for j in range(min(len(idx_argsort[i]), num_particles)):
            trimmed_events[i][2*j+0] = jet_events.events[f"{jet_type}_daughter_pt"][i][idx_argsort[i][j]]
            trimmed_events[i][2*j+1] = jet_events.events[f"{jet_type}_daughter_eta"][i][idx_argsort[i][j]]
    return trimmed_events

class JetData(Dataset):
    def __init__(self, signal_events, background_events):
        self.x = torch.cat((signal_events ,background_events))
        self.y = torch.cat((torch.ones((len(signal_events)), 1), torch.zeros((len(background_events)), 1)))
        print(self.x.shape, self.y.shape)
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return len(self.y)

In [3]:
class ClassicalModel(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim=0, hidden_layers=0):
        super().__init__()
        if hidden_layers == 0:
            net = [nn.Linear(input_dim, 1), nn.ReLU()]
        else:
            net = []
            net.append(nn.Linear(input_dim, hidden_dim))
            net.append(nn.ReLU())
            for _ in range(hidden_layers):
                net.append(nn.Linear(hidden_dim, hidden_dim))
                net.append(nn.ReLU())
            net.append(nn.Linear(hidden_dim, 1))
        # BCEWithLogitsLoss already contains a sigmoid function
        self.net = nn.Sequential(*net)
    def forward(self, x):
        y = self.net(x)
        return y

class HybridModel(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim=0, hidden_layers=0):
        super().__init__()

In [4]:
def train(model, data_loader, cf):
    loss = nn.BCEWithLogitsLoss(reduction="mean")
    opt = torch.optim.Adam(model.parameters(), lr=cf["learning_rate"], weight_decay=cf["weight_decay"])
    for epoch in range(cf["num_epochs"]):
        train_loss, train_acc, train_count = 0, 0, 0
        test_loss, test_acc = 0, 0
        model.train()
        for x, y_true in data_loader["train"]:
            opt.zero_grad()
            y_pred = model(x)
            batch_loss = loss(y_pred, y_true)
            batch_loss.backward()
            train_count += 1
            train_loss += batch_loss.detach()
            # BCEWithLogitsLoss -> >=0 | Sigmoid + BCELoss -> >=0.5
            train_acc += torch.sum((y_pred >= 0) == y_true) / len(x)
            opt.step()
        model.eval()
        for x, y_true in data_loader["test"]:
            y_pred = model(x)
            batch_loss = loss(y_pred, y_true)
            test_loss += batch_loss.detach() * len(x)
            # BCEWithLogitsLoss -> >=0 | Sigmoid + BCELoss -> >=0.5
            test_acc += torch.sum((y_pred >= 0) == y_true).item()
        train_loss /= train_count
        train_acc /= train_count
        test_loss /= len(data_loader["test"].dataset)
        test_acc /= len(data_loader["test"].dataset)
        print(f"Epoch {epoch} : train = ({train_loss:.2f}, {train_acc:.2f}) | test = ({test_loss:.2f}, {test_acc:.2f})")

In [19]:
jet_type  = "fatjet"
num_events = 1000
data_ratio = 0.9

# signal_channel = "ZprimeToZhToZinvhbb"
signal_channel = "ZprimeToZhToZlephbb"
# background_channel = "QCD_HT1500to2000"
background_channel = "QCD_HT2000toInf"
# cut = None
cut = f"({jet_type}_pt >= 800) & ({jet_type}_pt <= 1200)"

''' n-subjetiness '''
# signal_events = get_data_nsubjettiness(signal_channel, num_events, cut)
# background_events = get_data_nsubjettiness(background_channel, num_events, cut)
# input_dim = 5

''' jet/fatjet '''
# signal_events = get_data_jet_pt_eta(signal_channel, num_events, jet_type, cut)
# background_events = get_data_jet_pt_eta(background_channel, num_events, jet_type, cut)
# input_dim = 2

''' jet/fatjet daughter '''
num_particles = 5
signal_events = get_data_daughter_pt_eta(signal_channel, num_events, num_particles, jet_type, cut)
background_events = get_data_daughter_pt_eta(background_channel, num_events, num_particles, jet_type, cut)
input_dim = 2 * num_particles

In [20]:
cf = {
    "learning_rate":1E-2,
    "weight_decay":0,
    "num_epochs":50,
    "batch_size":32,
}

num_sig, num_bkg = len(signal_events), len(background_events)
num_data = min(num_sig, num_bkg)
num_train = int(data_ratio * num_data)
print(f"length signal = {num_sig} | length background = {num_bkg} | choose number of data = {num_data}")
num_test = num_data - num_train

data_train = JetData(signal_events[:num_train], background_events[:num_train])
data_test = JetData(signal_events[num_train:], background_events[num_train:])
data_loader = {
    "train":DataLoader(data_train, cf["batch_size"], shuffle=True, drop_last=True),
    "test":DataLoader(data_test, cf["batch_size"], shuffle=False, drop_last=False),
    }

c_model = ClassicalModel(input_dim, hidden_dim=20*input_dim, hidden_layers=4)
train(c_model, data_loader, cf)

length signal = 203 | length background = 849 | choose number of data = 203
torch.Size([364, 10]) torch.Size([364, 1])
torch.Size([688, 10]) torch.Size([688, 1])
Epoch 0 : train = (5.50, 0.52) | test = (0.85, 0.49)
Epoch 1 : train = (0.64, 0.63) | test = (1.27, 0.54)
Epoch 2 : train = (0.59, 0.69) | test = (0.55, 0.93)
Epoch 3 : train = (0.58, 0.70) | test = (0.53, 0.97)
Epoch 4 : train = (0.62, 0.65) | test = (0.62, 0.83)
Epoch 5 : train = (0.59, 0.71) | test = (0.75, 0.75)
Epoch 6 : train = (0.53, 0.74) | test = (0.66, 0.88)
Epoch 7 : train = (0.54, 0.74) | test = (0.62, 0.83)
Epoch 8 : train = (0.52, 0.75) | test = (0.56, 0.87)
Epoch 9 : train = (0.54, 0.73) | test = (0.57, 0.87)
Epoch 10 : train = (0.50, 0.75) | test = (0.73, 0.80)
Epoch 11 : train = (0.52, 0.75) | test = (0.53, 0.86)
Epoch 12 : train = (0.51, 0.76) | test = (0.42, 0.94)
Epoch 13 : train = (0.51, 0.74) | test = (0.48, 0.93)
Epoch 14 : train = (0.49, 0.77) | test = (0.40, 0.93)
Epoch 15 : train = (0.48, 0.77) | test