In [121]:
import numpy as np
import h5py, sys, glob
import matplotlib.pyplot as plt
plt.style.use('sonic.mplstyle')
%matplotlib inline

# Dataset preparation

In [122]:
from sklearn.utils import shuffle
fileIN = shuffle(glob.glob("../data/training/qcd*SIDEBAND*.h5"), random_state=1111)
i_train = int(0.5*len(fileIN))
i_test = int(0.75*len(fileIN))
X_train = fileIN[:i_train]
X_test = fileIN[i_train:i_test]
print(len(fileIN), len(X_train), len(X_test))

10 5 2


In [123]:
#minN = 99999999
#for fname in fileIN:
#    f = h5py.File(fname)
#    d = f.get("EFP")
#    minN = min(minN,d.shape[0])
#print(minN)
minN = 4096

# Model Definition

In [124]:
# keras imports
from keras.models import Model, Sequential
from keras.layers import Dense, Input
from keras.layers import BatchNormalization
from keras.utils import plot_model
from keras import regularizers
from keras import backend as K
from keras import metrics
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, TerminateOnNaN
from keras.regularizers import l1

In [125]:
inputShape = 102

In [126]:
inputArray = Input(shape=(inputShape,))
x = BatchNormalization()(inputArray)

x = Dense(50, activation="relu", kernel_initializer='lecun_uniform', name='enc_0')(x)
#x = Dropout(self.dropout)(x)
x = Dense(25, activation="relu", kernel_initializer='lecun_uniform', name='enc_1')(x)
#x = Dropout(self.dropout)(x)
enc = Dense(10, activation="linear", kernel_initializer='lecun_uniform', name='enc_2')(x)

x = Dense(25, activation="relu", kernel_initializer='lecun_uniform', name='dec_0')(enc)
#x = Dropout(self.dropout)(x)
x = Dense(50, activation="relu", kernel_initializer='lecun_uniform', name='dec_1')(x)
#x = Dropout(self.dropout)(x)
output = Dense(inputShape, activation="linear", kernel_initializer='lecun_uniform', name='dec_2')(x)

model = Model(inputs=inputArray, outputs=output)
encoder = Model(inputs=inputArray, outputs=enc)

In [127]:
model.compile(optimizer='adam', loss='mse')
encoder.compile(optimizer='adam', loss='mse')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 102)               0         
_________________________________________________________________
batch_normalization_4 (Batch (None, 102)               408       
_________________________________________________________________
enc_0 (Dense)                (None, 50)                5150      
_________________________________________________________________
enc_1 (Dense)                (None, 25)                1275      
_________________________________________________________________
enc_2 (Dense)                (None, 10)                260       
_________________________________________________________________
dec_0 (Dense)                (None, 25)                275       
_________________________________________________________________
dec_1 (Dense)                (None, 50)                1300      
__________

# Data Generator


In [128]:
import numpy as np
import keras
import h5py
import glob
import random
from sklearn.utils import shuffle

# 'Generates data for Keras'
class DataGenerator(keras.utils.Sequence):
    def __init__(self, label, fileList, batch_size, batch_per_file, verbose =0):
        self.verbose = verbose
        self.label = label
        self.fileList = fileList
        self.batch_size = batch_size
        self.batch_per_file = batch_per_file
        # open first file
        self.f =  h5py.File(fileList[0],"r")
        self.X =  np.array(self.f.get('EFP'))
        self.X =  np.concatenate((self.X[:,:,0], self.X[:,:,1]))
        self.X = shuffle(self.X)
        self.y = self.X
        self.nBatch = 0
        self.iFile = 0
        #self.on_epoch_end()

    #def on_epoch_end(self):
    #    print("%s boh" %self.label)

    def __len__(self):
        # 'Denotes the number of batches per epoch'
        if self.verbose: print("%s LEN = %i" %(self.label, self.batch_per_file*len(self.fileList)))
        return self.batch_per_file*len(self.fileList)

    def __getitem__(self, index): 
        if index == 0:
            # reshuffle data
            if self.verbose: print("%s new epoch" %self.label)
            random.shuffle(self.fileList)
            self.iFile = 0
            self.nBatch = 0
            if self.verbose: print("%s new file" %self.label)
            if self.f != None: self.f.close()
            self.f = h5py.File(self.fileList[self.iFile], "r")
            self.X =  np.array(self.f.get('EFP'))
            self.X =  np.concatenate((self.X[:,:,0], self.X[:,:,1]))
            self.X = shuffle(self.X)
            self.y = self.X
        if self.verbose: print("%s: %i" %(self.label,index))

        #'Generate one batch of data'
        iStart = index*self.batch_size
        iStop = min(9999, (index+1)*self.batch_size)
        if iStop == 9999: iStart = iStop-self.batch_size
        myx = self.X[iStart:iStop,:]
        myy = self.y[iStart:iStop,:]
        if self.nBatch == self.batch_per_file-1:
            self.iFile+=1
            if self.iFile >= len(self.fileList):
                if self.verbose: print("%s Already went through all files" %self.label)
            else:
                if self.verbose: print("%s new file" %self.label)
                self.f.close()
                self.f = h5py.File(self.fileList[self.iFile], "r")
                self.X =  np.array(self.f.get('EFP'))
                self.X =  np.concatenate((self.X[:,:,0], self.X[:,:,1]))
                self.X = shuffle(self.X)
                self.y = self.X
            self.nBatch = 0
        else:
            self.nBatch += 1
        return myx, myy 

# Training

In [129]:
batch_size = 128
file_length = minN

In [130]:
my_batch_per_file = int(file_length/batch_size)
myTrainGen = DataGenerator("TRAINING", X_train, batch_size, my_batch_per_file)
myTestGen = DataGenerator("TEST", X_test, batch_size, my_batch_per_file)

In [131]:
n_epochs = 5000
verbosity = 2

In [132]:
history = model.fit_generator(generator=myTrainGen, epochs=n_epochs,
                    steps_per_epoch= my_batch_per_file*len(X_train), validation_data = myTestGen,
                    validation_steps =  my_batch_per_file*len(X_test), verbose=verbosity,
                    callbacks = [EarlyStopping(monitor='val_loss', patience=10, verbose=verbosity),
                                 ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=verbosity),
                                 TerminateOnNaN()])

Epoch 1/5000
 - 2s - loss: 0.0064 - val_loss: 2.2198e-04
Epoch 2/5000
 - 1s - loss: 1.6779e-04 - val_loss: 1.6368e-04
Epoch 3/5000
 - 1s - loss: 1.0791e-04 - val_loss: 6.0769e-05
Epoch 4/5000
 - 1s - loss: 9.1495e-05 - val_loss: 4.3674e-05
Epoch 5/5000
 - 1s - loss: 7.0850e-05 - val_loss: 6.9475e-05
Epoch 6/5000
 - 1s - loss: 8.5471e-05 - val_loss: 4.1998e-05
Epoch 7/5000
 - 1s - loss: 8.9415e-05 - val_loss: 5.9314e-05
Epoch 8/5000
 - 1s - loss: 6.9685e-05 - val_loss: 3.1697e-05

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 9/5000
 - 1s - loss: 4.6651e-05 - val_loss: 2.9321e-05
Epoch 10/5000
 - 1s - loss: 4.4920e-05 - val_loss: 2.9716e-05
Epoch 11/5000
 - 1s - loss: 4.6408e-05 - val_loss: 3.9280e-05
Epoch 12/5000
 - 1s - loss: 4.4029e-05 - val_loss: 2.8188e-05
Epoch 13/5000
 - 1s - loss: 4.0652e-05 - val_loss: 2.5476e-05

Epoch 00013: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 14/5000
 - 1s - loss: 4.9824e-05 - va

# Save Model

In [134]:
nameModel = 'AE_EFP'
# store history                                                                                                         
f = h5py.File("../models/%s_history.h5" %nameModel, "w")
f.create_dataset("training_loss", data=np.array(history.history['loss']),compression='gzip')
f.create_dataset("validation_loss", data=np.array(history.history['val_loss']),compression='gzip')
f.close()

# store model                                                                                                           
model_json = model.to_json()
with open("../models/%s.json" %nameModel, "w") as json_file:
    json_file.write(model_json)
model.save_weights("../models/%s.h5" %nameModel)
model_json = encoder.to_json()
with open("../models/%s_ENCODER.json" %nameModel, "w") as json_file:
    json_file.write(model_json)
encoder.save_weights("../models/%s_ENCODER.h5" %nameModel)