In [1]:
from trainlib.CombinedModel import CombinedModel
from trainlib.CombinedPreprocessor import CombinedPreprocessor
from trainlib.PCAWhiteningPreprocessor import PCAWhiteningPreprocessor
from trainlib.RNNPreprocessor import RNNPreprocessor
from trainlib.FileCollection import FileCollection
from trainlib.utils import read_data
from trainlib.generator import Generator
import trainlib.cuts as cuts
from keras import optimizers

Using TensorFlow backend.


Welcome to JupyROOT 6.10/09


In [2]:
scalar_inputs = ["PFMET", "nCleanedJetsPt30"]
list_inputs = {"Jet": ["JetPt", "JetEta", "JetPhi"], "ExtraLep": ["ExtraLepPt", "ExtraLepEta", "ExtraLepPhi"]}
mod = CombinedModel("multiple_lists", scalar_inputs, list_inputs, {})

ExtraLep
['ExtraLepPt', 'ExtraLepEta', 'ExtraLepPhi']
Jet
['JetPt', 'JetEta', 'JetPhi']


In [3]:
pre = CombinedPreprocessor("multiple_lists", scalar_inputs, PCAWhiteningPreprocessor, list_inputs, RNNPreprocessor, cuts.no_cut)

PCA setup for 'multiple_lists_scalar_inputs': ['PFMET', 'nCleanedJetsPt30']
PCAWhiteningPreprocessor for stream 'multiple_lists_scalar_inputs'
for list input group 'ExtraLep': assigned periodic inputs ['ExtraLepPhi'] and nonperiodic inputs ['ExtraLepPt', 'ExtraLepEta'], sorting according to ExtraLepPt
PCA setup for 'multiple_lists_ExtraLep_base': ['ExtraLepPt', 'ExtraLepEta', 'ExtraLepPhi_sin', 'ExtraLepPhi_cos']
PCAWhiteningPreprocessor for stream 'multiple_lists_ExtraLep_base'
ListPreprocessor for stream 'multiple_lists_ExtraLep'
RNN preprocessor for stream 'multiple_lists_ExtraLep'
for list input group 'Jet': assigned periodic inputs ['JetPhi'] and nonperiodic inputs ['JetPt', 'JetEta'], sorting according to JetPt
PCA setup for 'multiple_lists_Jet_base': ['JetPt', 'JetEta', 'JetPhi_sin', 'JetPhi_cos']
PCAWhiteningPreprocessor for stream 'multiple_lists_Jet_base'
ListPreprocessor for stream 'multiple_lists_Jet'
RNN preprocessor for stream 'multiple_lists_Jet'
total processed columns:

In [4]:
H1_stream = {"/data_CMS/cms/wind/CJLST_NTuples/VBFH125/ZZ4lAnalysis.root" : cuts.no_cut}
H0_stream = {"/data_CMS/cms/wind/CJLST_NTuples/ggH125/ZZ4lAnalysis.root" : cuts.no_cut}

In [5]:
testgen = Generator(H1_stream, H0_stream, pre.processed_columns, preprocessor = None)
setup_len = testgen.setup_training_data()
pre.setup_generator(testgen.raw_generator_scrambled(), len_setupdata = setup_len)

skimming /data_CMS/cms/wind/CJLST_NTuples/VBFH125/ZZ4lAnalysis.root
collection set up: 1 files, 62320 entries in total, 31160 of which will be used
skimming /data_CMS/cms/wind/CJLST_NTuples/ggH125/ZZ4lAnalysis.root
collection set up: 1 files, 110483 entries in total, 55241 of which will be used
H1 contains 31160 entries
H0 contains 55241 entries
using the following chunk sizes: (31 / 55)
setting up preprocessor on 86430 events
Index([u'ExtraLepPt', u'ExtraLepEta', u'ExtraLepPhi', u'JetPt', u'JetEta',
       u'JetPhi', u'PFMET', u'nCleanedJetsPt30'],
      dtype='object')
setting up scalar preprocessor
setting up PCA whitening on 86430 events
86430 remaining after the cuts
setting up list preprocessor for 'ExtraLep'
List: Index([u'ExtraLepPhi_sin', u'ExtraLepPhi_cos', u'ExtraLepPt', u'ExtraLepEta'], dtype='object')
86430 remaining after the cuts
setting up PCA whitening on 228 events
228 remaining after the cuts
found a maximum list length in the setup data of 2: will pad or truncate to

In [None]:
train_gen = Generator(H1_stream, H0_stream, pre.processed_columns, preprocessor = pre.process)
training_len = train_gen.setup_training_data()
val_gen = Generator(H1_stream, H0_stream, pre.processed_columns, preprocessor = pre.process)
validation_len = val_gen.setup_validation_data()

In [None]:
sgd = optimizers.SGD(lr = 0.01, momentum = 0.9)
mod.model.compile(loss = "mean_squared_error", optimizer = sgd, metrics = ["accuracy"])

In [6]:
fcoll = FileCollection({"/data_CMS/cms/wind/CJLST_NTuples/ggH125/ZZ4lAnalysis.root" : cuts.no_cut}, 0.0, 1.0)
setup_data = read_data(fcoll, 0, 10, branches = pre.processed_columns)

skimming /data_CMS/cms/wind/CJLST_NTuples/ggH125/ZZ4lAnalysis.root
collection set up: 1 files, 110483 entries in total, 110483 of which will be used


In [7]:
pre.process(setup_data)

{'multiple_lists_ExtraLep': array([[[ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.]],
 
        [[ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.]],
 
        [[ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.]],
 
        [[ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.]],
 
        [[ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.]],
 
        [[ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.]],
 
        [[ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.]],
 
        [[ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.]],
 
        [[ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.]],
 
        [[ 0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.]]], dtype=float32),
 'multiple_lists_Jet': array([[[-0.17588344, -1.00814199,  0.45338827, -1.33961868],
         [ 0.        ,  0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        

In [8]:
setup_data

Unnamed: 0,ExtraLepPt,ExtraLepEta,ExtraLepPhi,JetPt,JetEta,JetPhi,PFMET,nCleanedJetsPt30
0,[],[],[],[53.6443],[-2.29227],[2.54869],40.490429,1
1,[],[],[],"[36.2919, 21.1634]","[0.326643, -0.80063]","[2.83377, 1.15123]",40.09692,1
2,[],[],[],[],[],[],44.240479,0
3,[],[],[],"[55.005, 28.7824]","[1.21199, -2.61541]","[1.77548, -0.592928]",71.606529,1
4,[],[],[],[29.372],[3.82245],[-2.58757],21.410542,0
5,[],[],[],[21.1895],[-2.82564],[-1.86381],21.15958,0
6,[],[],[],[],[],[],28.228645,0
7,[],[],[],[21.9638],[-1.08136],[-2.47143],34.765644,0
8,[],[],[],[81.062],[3.98602],[1.68111],54.827316,1
9,[],[],[],"[31.6878, 28.9569, 27.7632]","[-2.06541, 1.83295, -0.914617]","[1.32394, 0.201612, 2.14285]",7.474819,1


In [None]:
mod.model.fit_generator(train_gen.preprocessed_generator(), steps_per_epoch = 128, epochs = 5, 
                        verbose = 2, validation_data = val_gen.preprocessed_generator(), validation_steps = 128)