Skip to content

Commit

Permalink
CMSSW impl. config. beforetest
Browse files Browse the repository at this point in the history
  • Loading branch information
jkiesele committed Jul 22, 2017
1 parent 61050e3 commit 5ef7c7a
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 39 deletions.
23 changes: 9 additions & 14 deletions Train/deepFlavour_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from training_base import training_base
from Losses import loss_NLL
from modelTools import fixLayersContaining
from modelTools import fixLayersContaining,printLayerInfosAndWeights

#also does all the parsing
train=training_base(testrun=False)
Expand All @@ -19,11 +19,11 @@
train.compileModel(learningrate=0.001,
loss=['categorical_crossentropy',loss_NLL],
metrics=['accuracy'],
loss_weights=[1., 0.0001])
loss_weights=[1., 0.000000000001])


print(train.keras_model.summary())
model,history = train.trainModel(nepochs=50,
model,history = train.trainModel(nepochs=1,
batchsize=10000,
stop_patience=300,
lr_factor=0.5,
Expand All @@ -34,23 +34,18 @@
maxqsize=100)


print('indentification training finished. Starting regression training...')

train.saveCheckPoint('IDonly')
exit()

train.keras_model=fixLayersContaining(train.keras_model, 'regression', invert=True)
print('fixing input norms...')
train.keras_model=fixLayersContaining(train.keras_model, 'input_batchnorm')
train.compileModel(learningrate=0.001,
loss=['categorical_crossentropy',loss_NLL],
metrics=['accuracy'],
loss_weights=[1., 1])

train.trainedepoches=0
print(train.keras_model.summary())
loss_weights=[1., 0.000000000001])


print(train.keras_model.summary())
#printLayerInfosAndWeights(train.keras_model)

model,history = train.trainModel(nepochs=30,
model,history = train.trainModel(nepochs=50,
batchsize=10000,
stop_patience=300,
lr_factor=0.5,
Expand Down
3 changes: 2 additions & 1 deletion convertFromRoot/convertFromRoot.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def main(argv=None):
from TrainData_deepCSV_PF_Reg import TrainData_deepCSV_PF_Reg
from TrainData_deepJet_Reg import TrainData_deepJet_Reg, TrainData_PF_Reg
from TrainData_deepCSV_PF_binned import TrainData_deepCSV_PF_Binned
from TrainData_deepFlavour import TrainData_deepFlavour_QGOnly_reg,TrainData_deepFlavour_FT,TrainData_deepFlavour_FT_reg,TrainData_deepFlavour_FT_map,TrainData_deepFlavour_FT_map_reg,TrainData_image
from TrainData_deepFlavour import TrainData_deepFlavour_FT_reg_noScale,TrainData_deepFlavour_QGOnly_reg,TrainData_deepFlavour_FT,TrainData_deepFlavour_FT_reg,TrainData_deepFlavour_FT_map,TrainData_deepFlavour_FT_map_reg,TrainData_image
from TrainData_FatJet import TrainData_FatJet_Test
from TrainData_PT_recur import TrainData_PT_recur, TrainData_QG_simple, TrainData_recurrent_fullTruth
from TrainData_deepCSV_int import TrainData_deepCSV_int,TrainData_deepCSV_conv
Expand All @@ -69,6 +69,7 @@ def main(argv=None):
TrainData_deepCSV_PF_Binned,
TrainData_deepFlavour_FT,
TrainData_deepFlavour_FT_reg,
TrainData_deepFlavour_FT_reg_noScale,
TrainData_deepFlavour_FT_map,
TrainData_deepFlavour_QGOnly_reg,
TrainData_image,
Expand Down
14 changes: 11 additions & 3 deletions modules/TrainData.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ def __init__(self):

self.referenceclass='isB'

self.truthclasses=['isB','isBB','isLeptonicB','isLeptonicB_C','isC','isUD','isS','isG','isUndefined']
self.truthclasses=['isB','isBB','isGBB','isLeptonicB','isLeptonicB_C','isC','isCC',
'isGCC','isUD','isS','isG','isUndefined']

self.allbranchestoberead=[]

Expand Down Expand Up @@ -634,13 +635,16 @@ def reduceTruth(self, tuple_in):
if tuple_in is not None:
b = tuple_in['isB'].view(numpy.ndarray)
bb = tuple_in['isBB'].view(numpy.ndarray)
gbb = tuple_in['isGBB'].view(numpy.ndarray)


bl = tuple_in['isLeptonicB'].view(numpy.ndarray)
blc = tuple_in['isLeptonicB_C'].view(numpy.ndarray)
lepb=bl+blc

c = tuple_in['isC'].view(numpy.ndarray)
cc = tuple_in['isCC'].view(numpy.ndarray)
gcc = tuple_in['isGCC'].view(numpy.ndarray)

ud = tuple_in['isUD'].view(numpy.ndarray)
s = tuple_in['isS'].view(numpy.ndarray)
Expand All @@ -649,7 +653,7 @@ def reduceTruth(self, tuple_in):
g = tuple_in['isG'].view(numpy.ndarray)
l = g + uds

return numpy.vstack((b,bb,lepb,c,l)).transpose()
return numpy.vstack((b,bb+gbb,lepb,c+cc+gcc,l)).transpose()



Expand All @@ -664,14 +668,18 @@ def reduceTruth(self, tuple_in):
self.reducedtruthclasses=['isB','isBB','isLeptB','isC','isUDS','isG']
if tuple_in is not None:
b = tuple_in['isB'].view(numpy.ndarray)

bb = tuple_in['isBB'].view(numpy.ndarray)
gbb = tuple_in['isGBB'].view(numpy.ndarray)


bl = tuple_in['isLeptonicB'].view(numpy.ndarray)
blc = tuple_in['isLeptonicB_C'].view(numpy.ndarray)
lepb=bl+blc

c = tuple_in['isC'].view(numpy.ndarray)
cc = tuple_in['isCC'].view(numpy.ndarray)
gcc = tuple_in['isGCC'].view(numpy.ndarray)

ud = tuple_in['isUD'].view(numpy.ndarray)
s = tuple_in['isS'].view(numpy.ndarray)
Expand All @@ -680,7 +688,7 @@ def reduceTruth(self, tuple_in):
g = tuple_in['isG'].view(numpy.ndarray)


return numpy.vstack((b,bb,lepb,c,uds,g)).transpose()
return numpy.vstack((b,bb+gbb,lepb,c+cc+gcc,uds,g)).transpose()


class TrainData_QGOnly(TrainData):
Expand Down
111 changes: 107 additions & 4 deletions modules/TrainData_deepFlavour.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ def __init__(self):
'Cpfcan_BtagPf_trackSip3dVal',
'Cpfcan_BtagPf_trackSip3dSig',
'Cpfcan_BtagPf_trackJetDistVal',
'Cpfcan_BtagPf_trackJetDistSig',
#'Cpfcan_BtagPf_trackJetDistSig',

'Cpfcan_ptrel',
'Cpfcan_drminsv',
'Cpfcan_fromPV',
#'Cpfcan_fromPV',
'Cpfcan_VTX_ass',
'Cpfcan_puppiw',
'Cpfcan_chi2',
Expand Down Expand Up @@ -209,11 +209,11 @@ def __init__(self):
'Cpfcan_BtagPf_trackSip3dVal',
'Cpfcan_BtagPf_trackSip3dSig',
'Cpfcan_BtagPf_trackJetDistVal',
'Cpfcan_BtagPf_trackJetDistSig',
#'Cpfcan_BtagPf_trackJetDistSig',

'Cpfcan_ptrel',
'Cpfcan_drminsv',
'Cpfcan_fromPV',
#'Cpfcan_fromPV',
'Cpfcan_VTX_ass',
'Cpfcan_puppiw',
'Cpfcan_chi2',
Expand Down Expand Up @@ -349,6 +349,109 @@ def readFromRootFile(self,filename,TupleMeanStd, weighter):
self.y=[alltruth,correctionfactor]


class TrainData_deepFlavour_FT_reg_noScale(TrainData_deepFlavour_FT_reg):

def __init__(self):
'''
Constructor
'''
TrainData_deepFlavour_FT_reg.__init__(self)

def readFromRootFile(self,filename,TupleMeanStd, weighter):
from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
import numpy
from stopwatch import stopwatch

sw=stopwatch()
swall=stopwatch()

import ROOT

fileTimeOut(filename,120) #give eos a minute to recover
rfile = ROOT.TFile(filename)
tree = rfile.Get("deepntuplizer/tree")
self.nsamples=tree.GetEntries()

print('took ', sw.getAndReset(), ' seconds for getting tree entries')


# split for convolutional network

x_global = MeanNormZeroPad(filename,None,
[self.branches[0]],
[self.branchcutoffs[0]],self.nsamples)

x_cpf = MeanNormZeroPadParticles(filename,None,
self.branches[1],
self.branchcutoffs[1],self.nsamples)

x_npf = MeanNormZeroPadParticles(filename,None,
self.branches[2],
self.branchcutoffs[2],self.nsamples)

x_sv = MeanNormZeroPadParticles(filename,None,
self.branches[3],
self.branchcutoffs[3],self.nsamples)

#x_reg = MeanNormZeroPad(filename,TupleMeanStd,
# [self.branches[4]],
# [self.branchcutoffs[4]],self.nsamples)

print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')

Tuple = self.readTreeFromRootToTuple(filename)

reg_truth=Tuple['gen_pt_WithNu'].view(numpy.ndarray)
reco_pt=Tuple['jet_corr_pt'].view(numpy.ndarray)

correctionfactor=numpy.zeros(self.nsamples)
for i in range(self.nsamples):
correctionfactor[i]=reg_truth[i]/reco_pt[i]

if self.remove:
notremoves=weighter.createNotRemoveIndices(Tuple)
undef=Tuple['isUndefined']
notremoves-=undef
print('took ', sw.getAndReset(), ' to create remove indices')

if self.weight:
weights=weighter.getJetWeights(Tuple)
elif self.remove:
weights=notremoves
else:
print('neither remove nor weight')
weights=numpy.empty(self.nsamples)
weights.fill(1.)

truthtuple = Tuple[self.truthclasses]
#print(self.truthclasses)
alltruth=self.reduceTruth(truthtuple)



#print(alltruth.shape)
if self.remove:
print('remove')
weights=weights[notremoves > 0]
x_global=x_global[notremoves > 0]
x_cpf=x_cpf[notremoves > 0]
x_npf=x_npf[notremoves > 0]
x_sv=x_sv[notremoves > 0]
alltruth=alltruth[notremoves > 0]

reco_pt=reco_pt[notremoves > 0]
correctionfactor=correctionfactor[notremoves > 0]

newnsamp=x_global.shape[0]
print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
self.nsamples = newnsamp

print(x_global.shape,self.nsamples)

self.w=[weights,weights]
self.x=[x_global,x_cpf,x_npf,x_sv,reco_pt]
self.y=[alltruth,correctionfactor]


class TrainData_deepFlavour_QGOnly_reg(TrainData_QGOnly):
'''
Expand Down
6 changes: 6 additions & 0 deletions modules/modelTools.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@

def printLayerInfosAndWeights(model):
for layer in model.layers:
g=layer.get_config()
h=layer.get_weights()
print (g)
print (h)


def fixLayersContaining(m, fixOnlyContaining, invert=False):
Expand Down
22 changes: 11 additions & 11 deletions modules/models/convolutional.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,15 @@ def model_deepFlavourReference(Inputs,nclasses,nregclasses,dropoutRate=0.1):
that do not include 'regression' and the training can be repeated focusing on the regression part
(check function fixLayersContaining with invert=True)
"""


cpf,npf,vtx = block_deepFlavourConvolutions(charged=Inputs[1],
neutrals=Inputs[2],
vertices=Inputs[3],
globalvars = BatchNormalization(momentum=0.6,name='globals_input_batchnorm') (Inputs[0])
cpf = BatchNormalization(momentum=0.6,name='cpf_input_batchnorm') (Inputs[1])
npf = BatchNormalization(momentum=0.6,name='npf_input_batchnorm') (Inputs[2])
vtx = BatchNormalization(momentum=0.6,name='vtx_input_batchnorm') (Inputs[3])
ptreginput = BatchNormalization(momentum=0.6,name='reg_input_batchnorm') (Inputs[4])

cpf,npf,vtx = block_deepFlavourConvolutions(charged=cpf,
neutrals=npf,
vertices=vtx,
dropoutRate=dropoutRate,
active=True,
batchnorm=True)
Expand All @@ -38,17 +42,13 @@ def model_deepFlavourReference(Inputs,nclasses,nregclasses,dropoutRate=0.1):
vtx = Dropout(dropoutRate)(vtx)


x = Concatenate()( [Inputs[0],cpf,npf,vtx ])
x = Concatenate()( [globalvars,cpf,npf,vtx ])

x = block_deepFlavourDense(x,dropoutRate,active=True,batchnorm=True,batchmomentum=0.6)

flavour_pred=Dense(nclasses, activation='softmax',kernel_initializer='lecun_uniform',name='ID_pred')(x)

regInput = Concatenate()( [flavour_pred, Inputs[4] ] ) #ad hoc normalisation
reg = Dense(32,activation='relu',kernel_initializer='lecun_uniform',name='regression_dense_1',trainable=True)(regInput)
reg = Dropout(dropoutRate,name='regression_dropout_0')(reg)
reg = Dense(32,activation='relu',kernel_initializer='lecun_uniform',name='regression_dense_2',trainable=True)(reg)
reg = Dropout(dropoutRate,name='regression_dropout_1')(reg)
reg = Concatenate()( [flavour_pred, ptreginput ] )

reg_pred=Dense(nregclasses, activation='linear',kernel_initializer='ones',name='regression_pred',trainable=True)(reg)

Expand Down
25 changes: 19 additions & 6 deletions modules/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,13 @@ def MeanNormZeroPadBinned(
means=[]
norms=[]
for b in inbranches:
means.append(MeanNormTuple[b][0])
norms.append(MeanNormTuple[b][1])

if MeanNormTuple==None:
means.append(0)
norms.append(1)
else:
means.append(MeanNormTuple[b][0])
norms.append(MeanNormTuple[b][1])

x_branch, x_center, x_bins, x_width = dimension1
y_branch, y_center, y_bins, y_width = dimension2
Expand Down Expand Up @@ -455,8 +460,12 @@ def MeanNormZeroPadParticles(Filename_in,MeanNormTuple,inbranches,nMax,nevents):
means=[]
norms=[]
for b in inbranches:
means.append(MeanNormTuple[b][0])
norms.append(MeanNormTuple[b][1])
if MeanNormTuple is None:
means.append(0)
norms.append(1)
else:
means.append(MeanNormTuple[b][0])
norms.append(MeanNormTuple[b][1])


c_meanNormZeroPad.particlecluster(array,[norms],[means],[inbranches],[nMax],Filename_in)
Expand Down Expand Up @@ -501,8 +510,12 @@ def MeanNormZeroPad(Filename_in,MeanNormTuple,inbranches_listlist,nMaxslist,neve
means=[]
norms=[]
for b in inbranches:
means.append(MeanNormTuple[b][0])
norms.append(MeanNormTuple[b][1])
if MeanNormTuple is None:
means.append(0)
norms.append(1)
else:
means.append(MeanNormTuple[b][0])
norms.append(MeanNormTuple[b][1])
meanslist.append(means)
normslist.append(norms)

Expand Down
11 changes: 11 additions & 0 deletions modules/training_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,17 @@ def compileModel(self,

def saveModel(self,outfile):
self.keras_model.save(self.outputDir+outfile)
import tensorflow as tf
import keras.backend as K
tfsession=K.get_session()
saver = tf.train.Saver()
tfoutpath=self.outputDir+outfile+'_tfsession/tf'
import os
os.system('rm -f '+tfoutpath)
os.system('mkdir -p '+tfoutpath)
saver.save(tfsession, tfoutpath)


#import h5py
#f = h5py.File(self.outputDir+outfile, 'r+')
#del f['optimizer_weights']
Expand Down

0 comments on commit 5ef7c7a

Please sign in to comment.