# ECAL Laser correction with Deep Learning

Train a regression of the CMS ECAL transparency correction, using the information
collected from the previous five readouts

The input dataset consists of ROOT files with plain TTrees. Each file corresponds to a single crystal.

In [None]:
# download the dataset
! wget https://github.com/pierinim/tutorials/blob/master/RTA_Workshop/data.tar.gz?raw=true -O "data.tar.gz"
! tar -xzf data.tar.gz
! ls

In [1]:
# we first install uproot to read the input data 
# and convert them to a numpy array
! pip install uproot

/bin/sh: pip: command not found


In [None]:
import uproot as ur
import numpy as np

In [None]:
iXrange = range(6,25)
iYrange = range(131,140)

In [None]:
transparency = np.array([])
time = np.array([])
time_in_fill = np.array([])
lumi = np.array([])
iX = np.array([])
iY = np.array([])
for i in iXrange:
    for j in iYrange:
        file = ur.open("data/BlueLaser_2017_rereco_v2_newformat.root.filter.%i.%i.0.public.root" %(i,j))
        ecalModule = file.get('laser')
        my_transparency = ecalModule["transparency"].array()
        my_size = my_transparency.shape[0]
        my_transparency = np.reshape(my_transparency, (my_size,1))
        my_time = ecalModule["time"].array()
        my_time = np.reshape(my_time, (my_size,1))
        my_time_in_fill = ecalModule["time_in_fill"].array()
        my_time_in_fill = np.reshape(my_time_in_fill, (my_size,1))
        my_lumi = ecalModule["lumi"].array()
        my_lumi = np.reshape(my_lumi, (my_size,1))
        my_iX = i*np.ones((my_size,1))
        my_iY = j*np.ones((my_size,1))
        transparency = np.concatenate((transparency, my_transparency), axis=-1) if transparency.size else my_transparency
        time = np.concatenate((time, my_time), axis=-1) if time.size else my_time
        time_in_fill = np.concatenate((time_in_fill, my_time_in_fill), axis=-1) if time_in_fill.size else my_time_in_fill
        lumi = np.concatenate((lumi, my_lumi), axis=-1) if lumi.size else my_lumi
        iX = np.concatenate((iX, my_iX), axis=-1) if iX.size else my_iX
        iY = np.concatenate((iY, my_iY), axis=-1) if iY.size else my_iY

In [None]:
print(lumi.shape)

In [None]:
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
%matplotlib inline

In [None]:
# luminosity
plt.plot(time, lumi)
plt.ylabel('Luminosity~$*~10^{-34}$')
plt.xlabel('Time [sec]')
plt.show()

In [None]:
# transparency
plt.plot(time, transparency)
plt.ylabel('Transparency')
plt.xlabel('Time [sec]')
plt.show()

In [None]:
# time in fill
plt.plot(time_in_fill, transparency)
plt.ylabel('Time in Fill')
plt.xlabel('Time [sec]')
plt.show()

# A simple DNN application example

We consider one crystal as the training dataset.
The target is the transparency at thhe next readout, given
- the transparency of the last 5 readouts
- their time 
- their time in fill
- the luminosity
- the time of the next readout
- the time in fill of the next readout
- the lumi of the next readout

In [None]:
my_transparency = transparency[:,0]
my_time = time[:,0]
my_time_in_fill = time_in_fill[:,0]
my_lumi = lumi[:,0]

In [None]:
# 5 readouts * (transparency, time, time in fill, luminosity) + 3 features for next readout = 23 features
X = np.array([])
Y = my_transparency[5:]
TimeY = my_time[5:]
for i in range(5,my_transparency.shape[0]):
    mydata = np.array(my_transparency[i-5:i])
    mydata = np.concatenate((mydata,my_time[i-5:i],my_time_in_fill[i-5:i],my_lumi[i-5:i]))
    mydata = np.concatenate((mydata, np.array([my_time[i],my_time_in_fill[i],my_lumi[i]])))
    mydata = np.reshape(mydata, (1,mydata.shape[0]))
    X = np.concatenate((X, mydata)) if X.size else mydata
print(X.shape, Y.shape)

In [None]:
# reserve the last 504 readouts for testing
X_test = X[2600:,:]
Y_test= Y[2600:]
TimeY_test= TimeY[2600:]
X = X[:2600,:]
Y = Y[:2600]

In [None]:
# We now want to split the rest in training and validation in ~2:1 
# first, we define a mask that is true at the 3rd, 6th, 9th, ... entry
entries = np.array(range(X.shape[0]))
one_every_three = (entries+1) % 3 == 0
two_every_three = (entries+1) % 3 != 0
print(one_every_three[:10])
print(two_every_three[:10])

# and then define the dataset
X_val = X[one_every_three]
Y_val = Y[one_every_three]
TimeY_val = Y[one_every_three]
X_train = X[two_every_three]
Y_train = Y[two_every_three]
TimeY_train = Y[two_every_three]

print(X_train.shape, X_val.shape, X_test.shape)
print(Y_train.shape, Y_val.shape, Y_test.shape)
print(TimeY_train.shape, TimeY_val.shape, TimeY_test.shape)

# Model definition and training

In [None]:
# keras imports
from keras.models import Model
from keras.layers import Dense, Input, Dropout, Concatenate, Reshape, BatchNormalization, Activation
from keras.layers import MaxPooling2D
from keras.utils import plot_model
from keras import regularizers
from keras import backend as K
from keras import metrics
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, TerminateOnNaN

In [None]:
inputLayer = Input(shape=(X_train.shape[1],))
x = BatchNormalization()(inputLayer)
#
x = Dense(20, kernel_initializer='lecun_uniform', name='dense_relu1')(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
#
x = Dense(10, kernel_initializer='lecun_uniform', name='dense_relu2')(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)#
x = Dense(30, kernel_initializer='lecun_uniform', name='dense_relu3')(x)
#
x = Dense(5, kernel_initializer='lecun_uniform', name='dense_relu4')(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
#
outputLayer = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name = 'output')(x)
model = Model(inputs=inputLayer, outputs=outputLayer) 

In [None]:
model.compile(optimizer='adam', loss='mape')
model.summary()

In [None]:
# train 
history = model.fit(X_train, Y_train, epochs=500, batch_size=128, verbose = 2,
                  validation_data=(X_val, Y_val),
                 callbacks = [
                EarlyStopping(monitor='val_loss', patience=10, verbose=1),
                ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1),
                TerminateOnNaN()])

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.semilogy()
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
Y_hat = model.predict(X_test)
Y_hat = np.reshape(Y_hat,(Y_hat.shape[0],))

In [None]:
# true distribution
plt.scatter(TimeY_test,Y_test, label = "True")
plt.ylabel('Transparency')
plt.xlabel('Time')
plt.show()

# true distribution
plt.plot(TimeY_test,Y_test, label = "True")
plt.plot(TimeY_test,Y_hat, label = "Predicted")
plt.ylabel('Transparency')
plt.xlabel('Time')
plt.legend()
plt.ylim((0.5,1))
plt.show()

# true distribution
plt.scatter(TimeY_test,Y_test, label = "True", alpha=0.5)
plt.scatter(TimeY_test,Y_hat, label = "Predicted", alpha=0.5)
plt.ylabel('Transparency')
plt.xlabel('Time')
plt.legend()
plt.ylim((0.5,1))
plt.show()


plt.plot(TimeY_test,Y_test-Y_hat, label = "Residual")
plt.ylabel('Transparency Residual')
plt.xlabel('Time')
plt.show()