In [102]:
#import setGPU
import os
import numpy as np
import h5py
import glob
import itertools
import sys
from sklearn.utils import shuffle
import glob

In [103]:
datasets = [np.array([]), np.array([]), np.array([]), np.array([])]
dataNames = ["genJetConstituents", "recoJetConstituents", "genJetFeatures", "recoJetFeatures"]
for fileIN in glob.glob("../data/*h5"):
    f = h5py.File(fileIN,"r")
    for i in range(4):
        mydata = f.get(dataNames[i])
        datasets[i] = np.concatenate((datasets[i], mydata), axis=0) if datasets[i].size else mydata
genJetParticles = datasets[0]
recoJetParticles = datasets[1]
genJet = datasets[2]
recoJet = datasets[3]
print(genJetParticles.shape, recoJetParticles.shape, genJet.shape, recoJet.shape)

(109984, 100, 3) (109984, 100, 3) (109984, 4) (109984, 4)


In [184]:
# dirty trick: the target is a vector of zeros
y = np.zeros(genJetParticles.shape[0])
print(y.shape)

(109984,)


In [104]:
# add the "noisy" latent space variables to the genjet dataset
rndVars = np.random.normal(0., 1., (genJet.shape[0],10))
print(rndVars.shape)
genJet = np.concatenate((genJet,rndVars), axis =1)
print(genJet.shape)

(109984, 10)
(109984, 14)


In [141]:
# keras imports
from keras.models import Model, Sequential
from keras.layers import Dense, Input, Conv2D, Dropout, Flatten, Concatenate, Reshape, BatchNormalization, Activation, Lambda
from keras.layers import AveragePooling2D, Add
from keras.utils import plot_model
from keras import regularizers
from keras import backend as K
from keras import metrics
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, TerminateOnNaN

In [182]:
# import the EMD model
from keras.models import model_from_json
json_file = open('../models/EMD_Dense_MAE_AsymmetryLarge_1.json')
loaded_model_json = json_file.read()
json_file.close()
emdModel = model_from_json(loaded_model_json)
# load weights into new model
emdModel.load_weights("../models/EMD_Dense_MAE_AsymmetryLarge_1.h5")
print("Loaded model from disk")
for l in emdModel.layers: 
    l.trainable=False
emdModel.summary()

Loaded model from disk
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 100, 6, 1)         0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 100, 6, 1)         4         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 10, 6, 1)          6010      
_________________________________________________________________
batch_normalization_2 (Batch (None, 10, 6, 1)          4         
_________________________________________________________________
activation_1 (Activation)    (None, 10, 6, 1)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 5, 6, 1)           505       
_________________________________________________________________
batch_normalization_3 (Batch (None, 5, 6, 1)         

In [185]:
inputGensize = genJet.shape[1]
nParticles = genJetParticles.shape[1]

# Jet Gen model

In [199]:
# the reco jet
inputRecoImage = Input(shape=(nParticles,3))
xReco = Reshape((100,3,1))(inputRecoImage)

# convert the input gen info into an array of "structured" noise
inputGenJet = Input(shape=(inputGensize,))
x = BatchNormalization(name='noise1')(inputGenJet)
x = Dense(nParticles, activation="relu",name='noise2')(x)
x = Dense(nParticles*3, activation="relu",name='noise3')(x)
x = Reshape((nParticles,3),name='noise4')(x)
outGen = BatchNormalization(name='noise5')(x)

# concatenate the input gen image to the structured noise
inputGenImage = Input(shape=(nParticles,3))
x = BatchNormalization(name='gen1')(inputGenImage)
x = Concatenate(axis=-1,name='gen3')([x,outGen])
x = Reshape((nParticles,6,1),name='gen4')(x)

# process the image and produce the output

x = Conv2D(3, kernel_size=(3,6), strides=(1, 6), data_format="channels_last", padding="same", activation="relu")(x)
x = Dropout(0.2)(x)
x = BatchNormalization()(x)
x = Reshape((100,3))(x)
# skip connection
x = Add()([x,inputGenImage])
x = Reshape((100,3,1))(x)

outGen = Reshape((100,3))(x)

x = Concatenate(axis=-2)([x,xReco])

out = emdModel(x)

model = Model(inputs=(inputRecoImage,inputGenJet,inputGenImage), outputs=out)
generator = Model(inputs=(inputGenJet,inputGenImage), outputs=outGen)

In [200]:
# dirty trick: if the target is a vector of zeros, mae gives the average score (which is what we want)
model.compile(optimizer='adam', loss='mae')
model.summary()
generator.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_144 (InputLayer)          (None, 14)           0                                            
__________________________________________________________________________________________________
noise1 (BatchNormalization)     (None, 14)           56          input_144[0][0]                  
__________________________________________________________________________________________________
noise2 (Dense)                  (None, 100)          1500        noise1[0][0]                     
__________________________________________________________________________________________________
noise3 (Dense)                  (None, 300)          30300       noise2[0][0]                     
__________________________________________________________________________________________________
input_145 

In [201]:
# fit the model
history = model.fit([recoJetParticles, genJet, genJetParticles], y, epochs=500, batch_size=128, verbose = 1,
                  validation_split = 0.3, callbacks = [
                EarlyStopping(monitor='val_loss', patience=10, verbose=1),
                ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1),
                TerminateOnNaN()])

Train on 76988 samples, validate on 32996 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 12/500
Epoch 13/500
Epoch 14/500

Epoch 00014: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500

Epoch 00022: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 23/500
Epoch 24/500

Epoch 00024: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.
Epoch 25/500
Epoch 26/500

Epoch 00026: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-08.
Epoch 27/500
Epoch 28/500

Epoch 00028: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-09.
Epoch 29/500
Epoch 30/500

Epoch 00030: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-10

In [203]:
nameModel = 'JetGen_0'
# store history                                                                                                         
f = h5py.File("../models/%s_history.h5" %nameModel, "w")
f.create_dataset("training_loss", data=np.array(history.history['loss']),compression='gzip')
f.create_dataset("validation_loss", data=np.array(history.history['val_loss']),compression='gzip')
f.close()

# store model                                                                                                           
model_json = generator.to_json()
with open("../models/%s_GENERATOR.json" %nameModel, "w") as json_file:
    json_file.write(model_json)
generator.save_weights("../models/%s_GENERATOR.h5" %nameModel)

# store full model                                                                                                           
model_json = model.to_json()
with open("../models/%s.json" %nameModel, "w") as json_file:
    json_file.write(model_json)
model.save_weights("../models/%s.h5" %nameModel)