In [1]:
from dataloader import load_data
import torch
import torch.nn as nn
import utils
from 

### Let's open the training and validation files containing examples for top quarks (signal) and QCD jets (background)

In [2]:
input_folder = '/global/cfs/cdirs/trn016/transformer'
train_data = load_data('top',input_folder,batch=128,dataset_type='train',num_evt = 100_000)
val_data = load_data('top',input_folder,batch=128,dataset_type='val')

In [3]:
print (f"Loading {len(train_data)} batches of events for training and {len(val_data)} for validation")

Loading 781 batches of events for training and 3148 for validation


### We Now need to create a model that will take the data as input and predict a label for each data entry. Let's create a config file with the network parameters

In [4]:
config = {
    'num_layers': 2,
    'hidden_dim': 64,
    'activation': nn.ReLU(), #https://docs.pytorch.org/docs/stable/nn.html#non-linear-activations-weighted-sum-nonlinearity
}

In [5]:
class SimpleDeepSets(nn.Module):
    def __init__(self, input_dim, config, num_classes=2):
        super().__init__()
        self.input_layer = nn.Linear(input_dim, config["hidden_dim"])
        
        layers = []
        for _ in range(config["num_layers"]):
            layers.append(nn.Linear(config["hidden_dim"], config["hidden_dim"]))
            layers.append(config["activation"])
        self.hidden_layers = nn.ModuleList(layers)

        self.output_layer = nn.Linear(config["hidden_dim"], num_classes)

    def forward(self, inputs):
        zero_pad_mask = (inputs[:, :, 2] != 0).unsqueeze(-1).float()
        x = self.input_layer(inputs) * zero_pad_mask
        for layer in self.hidden_layers:
            x = layer(x) * zero_pad_mask
        x = x.mean(1)  # aggregate over particles
        return self.output_layer(x)

In [6]:
model = SimpleDeepSets(input_dim=4,config=config) #remember the inputs are delta eta, delta phi, log(pT), log(E)

### Now we are going to create the training class that will train the model, but first, let's set up the learning rate and the optimizer

In [7]:
optimizer = torch.optim.Adam
lr = 5e-4
epochs = 100
patience = 10 # Number of consecutive epochs to stop the training if the validation loss does not improve

In [8]:
trainer = utils.Trainer(train_data,val_data,model,lr,optimizer)

### Let's train the model!

In [9]:
trainer.train(epochs)

Epoch 1: train loss=0.5654, validation loss=0.5278
Epoch 2: train loss=0.4296, validation loss=0.3745
Epoch 3: train loss=0.3680, validation loss=0.3725
Epoch 4: train loss=0.3621, validation loss=0.3590
Epoch 5: train loss=0.3584, validation loss=0.3524
Epoch 6: train loss=0.3533, validation loss=0.4112
Epoch 7: train loss=0.3494, validation loss=0.3438
Epoch 8: train loss=0.3460, validation loss=0.3429
Epoch 9: train loss=0.3420, validation loss=0.3390
Epoch 10: train loss=0.3416, validation loss=0.3400
Epoch 11: train loss=0.3405, validation loss=0.3366
Epoch 12: train loss=0.3408, validation loss=0.3367
Epoch 13: train loss=0.3386, validation loss=0.3366
Epoch 14: train loss=0.3382, validation loss=0.3365
Epoch 15: train loss=0.3375, validation loss=0.3435
Epoch 16: train loss=0.3376, validation loss=0.3353
Epoch 17: train loss=0.3372, validation loss=0.3354
Epoch 18: train loss=0.3365, validation loss=0.3376
Epoch 19: train loss=0.3365, validation loss=0.3348
Epoch 20: train loss=

### Now let's evaluate the model

In [10]:
test_data = load_data('top',input_folder,batch=128,dataset_type='test')
predictions, labels = trainer.evaluate(test_data)

In [11]:
utils.print_metrics(predictions,labels)

AUC: 0.9226

ACC: 0.8860

Signal class 1 vs Background class 0:
Class 1 effS at 0.3000534457673921 1.0/effB = 20.925995024875622
Class 1 effS at 0.5000890762789869 1.0/effB = 15.16631610576923


### Try changing the hyperparameters, activation functions, layers, learning rate!