In [1]:
import torch
import yaml

# Local imports
import carl_utils.preprocessing as carl_ppro
import carl_utils.train as carl_train

import logging
logging.getLogger('matplotlib.font_manager').setLevel(logging.ERROR)

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#DEVICE = 'cpu'
print(DEVICE)

cuda:0


In [2]:
total_features = yaml.load(open("carl_features.yaml", 'r'), Loader=yaml.CLoader)
total_features

{'Electron_set': {'set': True,
  'size': 4,
  'subfeatures': ['Electron_pt',
   'Electron_eta',
   'Electron_phi',
   'Electron_mass']},
 'FatJet_set': {'set': True,
  'size': 5,
  'subfeatures': ['FatJet_pt',
   'FatJet_eta',
   'FatJet_phi',
   'FatJet_mass',
   'FatJet_btagCSVV2']},
 'Jet_set': {'set': True,
  'size': 5,
  'subfeatures': ['Jet_pt',
   'Jet_eta',
   'Jet_phi',
   'Jet_mass',
   'Jet_btagCSVV2']},
 'MET_sumEt': {'set': False, 'size': 1, 'subfeatures': ['MET_sumEt']},
 'Muon_set': {'set': True,
  'size': 4,
  'subfeatures': ['Muon_pt', 'Muon_eta', 'Muon_phi', 'Muon_mass']},
 'Photon_set': {'set': True,
  'size': 4,
  'subfeatures': ['Photon_pt', 'Photon_eta', 'Photon_phi', 'Photon_mass']}}

In [3]:
subfeatures = ["Jet_set", "MET_sumEt", "Electron_set", "FatJet_set"]
features = dict(zip(subfeatures, [total_features[feat] for feat in subfeatures]))
features

{'Jet_set': {'set': True,
  'size': 5,
  'subfeatures': ['Jet_pt',
   'Jet_eta',
   'Jet_phi',
   'Jet_mass',
   'Jet_btagCSVV2']},
 'MET_sumEt': {'set': False, 'size': 1, 'subfeatures': ['MET_sumEt']},
 'Electron_set': {'set': True,
  'size': 4,
  'subfeatures': ['Electron_pt',
   'Electron_eta',
   'Electron_phi',
   'Electron_mass']},
 'FatJet_set': {'set': True,
  'size': 5,
  'subfeatures': ['FatJet_pt',
   'FatJet_eta',
   'FatJet_phi',
   'FatJet_mass',
   'FatJet_btagCSVV2']}}

In [4]:
weight_features = ["genWeight", "btagWeight_CSVV2"]

## Initiate the datasets and load the data

In [5]:
n_train_events = int(400000) // 2
n_val_events = int(1e5) // 2

train_nominal_dataset = carl_ppro.DeepSetsDataset(["/data/mdrnevich/AGC/CMS_ttbar_nominal_DeepSets_training_data_new2.root",], features, 0, stop_event=n_train_events)
valid_nominal_dataset = carl_ppro.DeepSetsDataset(["/data/mdrnevich/AGC/CMS_ttbar_nominal_DeepSets_validation_data_new2.root",], features, 0, stop_event=n_val_events)

train_PS_var_dataset = carl_ppro.DeepSetsDataset(["/data/mdrnevich/AGC/CMS_ttbar_PS_var_DeepSets_training_data_new2.root",], features, 1, stop_event=n_train_events)
valid_PS_var_dataset = carl_ppro.DeepSetsDataset(["/data/mdrnevich/AGC/CMS_ttbar_PS_var_DeepSets_validation_data_new2.root",], features, 1, stop_event=n_val_events)

100%|██████████| 200000/200000 [01:40<00:00, 1997.50it/s]
100%|██████████| 50000/50000 [00:25<00:00, 1992.51it/s]
100%|██████████| 200000/200000 [01:38<00:00, 2032.87it/s]
100%|██████████| 50000/50000 [00:23<00:00, 2102.38it/s]


## Prepare the data

In [6]:
train_generator_data = carl_ppro.CombinedDataset(train_nominal_dataset, train_PS_var_dataset)
valid_generator_data = carl_ppro.CombinedDataset(valid_nominal_dataset, valid_PS_var_dataset)

## Define the settings of the model and training

In [7]:
hidden1 = 512
hidden2 = 256
hidden3 = 128
classify1 = 1024
classify2 = 1024
outputs = 1

model_settings = {
    "features": features,
    "phi": [hidden1, hidden2, hidden3],
    "mlp": [classify1, classify2]
}


training_settings = {
    "optimizer": "Adam",
    "learning_rate": 1e-2,
    "batch_size": 1024,
    "n_epochs": 30,
    "patience": 6,
    "device": DEVICE,
    "n_events": n_train_events * 2
}

# Perform the training

In [8]:
model = carl_train.train(model_settings, train_generator_data, valid_generator_data, **training_settings)

Constructing the model
Loading the input data scaling


100%|██████████| 391/391 [00:06<00:00, 58.11it/s]


Training the model


  3%|▎         | 1/30 [03:50<1:51:29, 230.66s/it]                

Epoch: 01, Training Loss:   0.7038
           Validation Loss: 0.6872
New best model saved to: deepsets_model.zip


  7%|▋         | 2/30 [07:38<1:46:58, 229.22s/it]                

Epoch: 02, Training Loss:   0.6852
           Validation Loss: 0.6855
New best model saved to: deepsets_model.zip


 10%|█         | 3/30 [11:26<1:42:51, 228.58s/it]                

Epoch: 03, Training Loss:   0.6825
           Validation Loss: 0.6841
New best model saved to: deepsets_model.zip


 13%|█▎        | 4/30 [15:10<1:38:14, 226.72s/it]                

Epoch: 04, Training Loss:   0.6816
           Validation Loss: 0.6823
New best model saved to: deepsets_model.zip


 17%|█▋        | 5/30 [18:58<1:34:40, 227.21s/it]                

Epoch: 05, Training Loss:   0.6810
           Validation Loss: 0.6827
Stale epoch


 20%|██        | 6/30 [22:41<1:30:15, 225.65s/it]                

Epoch: 06, Training Loss:   0.6808
           Validation Loss: 0.6820
New best model saved to: deepsets_model.zip


 23%|██▎       | 7/30 [26:18<1:25:23, 222.76s/it]                

Epoch: 07, Training Loss:   0.6806
           Validation Loss: 0.6819
New best model saved to: deepsets_model.zip


 27%|██▋       | 8/30 [29:53<1:20:52, 220.57s/it]                

Epoch: 08, Training Loss:   0.6804
           Validation Loss: 0.6819
New best model saved to: deepsets_model.zip


 30%|███       | 9/30 [33:28<1:16:30, 218.58s/it]                

Epoch: 09, Training Loss:   0.6803
           Validation Loss: 0.6815
New best model saved to: deepsets_model.zip


 33%|███▎      | 10/30 [37:04<1:12:35, 217.79s/it]               

Epoch: 10, Training Loss:   0.6801
           Validation Loss: 0.6818
Stale epoch


 37%|███▋      | 11/30 [40:43<1:09:07, 218.27s/it]               

Epoch: 11, Training Loss:   0.6802
           Validation Loss: 0.6815
Stale epoch


 40%|████      | 12/30 [44:21<1:05:26, 218.13s/it]               

Epoch: 12, Training Loss:   0.6800
           Validation Loss: 0.6834
Stale epoch


 43%|████▎     | 13/30 [47:57<1:01:40, 217.66s/it]               

Epoch: 13, Training Loss:   0.6800
           Validation Loss: 0.6844
Stale epoch


 47%|████▋     | 14/30 [51:36<58:05, 217.85s/it]                 

Epoch: 14, Training Loss:   0.6797
           Validation Loss: 0.6813
New best model saved to: deepsets_model.zip


 50%|█████     | 15/30 [55:12<54:21, 217.42s/it]                 

Epoch: 15, Training Loss:   0.6797
           Validation Loss: 0.6808
New best model saved to: deepsets_model.zip


 53%|█████▎    | 16/30 [58:50<50:44, 217.43s/it]                 

Epoch: 16, Training Loss:   0.6796
           Validation Loss: 0.6873
Stale epoch


 57%|█████▋    | 17/30 [1:02:31<47:22, 218.63s/it]               

Epoch: 17, Training Loss:   0.6795
           Validation Loss: 0.6811
Stale epoch


 60%|██████    | 18/30 [1:06:09<43:42, 218.52s/it]               

Epoch: 18, Training Loss:   0.6794
           Validation Loss: 0.6812
Stale epoch


 63%|██████▎   | 19/30 [1:09:47<40:01, 218.31s/it]               

Epoch: 19, Training Loss:   0.6794
           Validation Loss: 0.6816
Stale epoch


 67%|██████▋   | 20/30 [1:13:28<36:31, 219.11s/it]               

Epoch: 20, Training Loss:   0.6793
           Validation Loss: 0.6821
Stale epoch


                                                                 

Epoch: 21, Training Loss:   0.6790
           Validation Loss: 0.6820
Stale epoch
Early stopping after 6 stale epochs
Finished training


