In [2]:
import os
import time
import random
import sys
import numpy as np # type: ignore
import matplotlib.pyplot as plt # type: ignore
from tqdm import tqdm # type: ignore

import torch # type: ignore
import torch.nn as nn # type: ignore
import torch.nn.functional as F # type: ignore
import torch.optim as optim # type: ignore
from torch.utils.data import Dataset, DataLoader # type: ignore

import uproot as ur # type: ignore

print("System Platform: ", sys.platform)
print('System Python Version:', sys.version)
print('PyTorch version', torch.__version__)
print('Numpy version', np.__version__)

System Platform:  linux
System Python Version: 3.10.16 | packaged by conda-forge | (main, Dec  5 2024, 14:16:10) [GCC 13.3.0]
PyTorch version 2.6.0
Numpy version 2.1.3


Preliminary Data Manipulation

In [3]:
background_data_file_name = "train_bkg_data_sideBands_lowQ_wPreselBDT_v5.root"
signal_data_file_name = "train_sig_rare_lowQ_wPreselBDT_v6.root"

background_data_file = ur.open(background_data_file_name)
signal_data_file = ur.open(signal_data_file_name)

In [4]:
features = ['Bprob', 'BsLxy', 'L2iso/L2pt', 'Bcos', 'Kiso/Kpt', 'LKdz', 'LKdr', 'Passymetry', 'Kip3d/Kip3dErr', 'L1id', 'L2id']
sample_weights = 'trig_wgt'
preselection = '(KLmassD0 > 2.) & ((Mll>1.05) & (Mll<2.45))'

sig_dict = signal_data_file['mytree'].arrays(features, library='np', cut=preselection)
bkg_dict = background_data_file['mytree'].arrays(features, library='np', cut=preselection)

In [5]:
backgr = np.stack(list(bkg_dict.values()))
signal = np.stack(list(sig_dict.values()))

sig_weight = signal_data_file['mytree'].arrays(sample_weights, library='np', cut=preselection)[sample_weights]
bkg_weight = np.ones(backgr.shape[1])

X = np.transpose(np.concatenate((signal, backgr), axis=1))
y = np.concatenate((np.ones(signal.shape[1]), np.zeros(backgr.shape[1])))
weights = np.concatenate((sig_weight, bkg_weight))

In [16]:
sample_X = np.array([[1,2,3],
                     [4,5,6],
                     [7,8,9]])
sample_y = np.array([1,1,0])

Dataset & DataLoader Instantiation

In [None]:
class Transform:
    def __call__(self, data: np.ndarray):
        return data

class Normalize(Transform):
    def __call__(self, data: np.ndarray):
        data = data.astype(np.float64)

        min_values = np.min(data, axis=0)
        max_values = np.max(data, axis=0)

        data -= min_values
        data /= (max_values - min_values)

        return data


In [None]:
class ParticlesDataset(Dataset):
    def __init__(self, data: np.ndarray, labels: np.ndarray, transform: Transform = Transform()):
        self.data = torch.tensor(transform(data), dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, index):
        return (self.data[index], self.labels[index])
    
    @property
    def classes(self):
        return ['Background', 'Signal']
    
    @property
    def features(self):
        return ['Bprob', 'BsLxy', 'L2iso/L2pt', 'Bcos', 'Kiso/Kpt', 'LKdz', 'LKdr', 'Passymetry', 'Kip3d/Kip3dErr', 'L1id', 'L2id']
    

In [29]:
dataset = ParticlesDataset(data=X, labels=y, transform=Normalize())
dataloader = DataLoader(dataset=dataset, batch_size=32, shuffle=True)

Shallow/Narrow Architecture:
This will be a very simple and smaller model that will act as a performance baseline. It won't have very many layers or very many nodes in each layer. 
Below is the original starting point I used for this architecture, this may be subject to change. 

Original Layers:

    Input ReLU Linear 12 -> 32

    Hidden ReLU Linear 32 -> 16

    Output Sigmoid Linear 16 -> 1

Deep/Narrow Architecture:
There will be more layers in this model, but the amount of nodes per layer will remain relatively smaller. The amount of nodes per layer will also remain balanced.
Below is the original starting point I used for this architecture, this may be subject to change. 

Original Layers:

    Input ReLU Linear 12 -> 32

    Hidden ReLu Linear 32 -> 32

    Hidden ReLu Linear 32 -> 32

    Hidden ReLu Linear 32 -> 32

    Hidden ReLu Linear 32 -> 32

    Hidden ReLu Linear 32 -> 32

    Hidden ReLu Linear 32 -> 32

    Hidden ReLU Linear 32 -> 16

    Output Sigmoid Linear 16 -> 1

Shallow/Wide Architecture: There will be relatively less layers in this one, but more nodes in each one. 
Below is the original starting point I used for this architecture, this may be subject to change. 

Original Layers:

    Input ReLU Linear 12 -> 64

    Hidden ReLU Linear 64 -> 128

    Hidden ReLU Linear 128 -> 64

    Hidden ReLU Linear 64 -> 32

    Hidden ReLU Linear 32 -> 16

    Output Sigmoid Linear 16 -> 1