In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils.np_utils import to_categorical
from keras.utils import plot_model
from keras import regularizers
from sklearn.metrics import roc_curve, auc
from keras.layers import Activation
from keras import backend as K

In [None]:
# load saved df
df = pd.read_pickle('test.pkl')
print (df.keys())

In [None]:
# filter events
df = df.loc[abs(df[b'hcal_ieta']) >= 24]
df = df.loc[df["b'hcal_edepth2'_x"] > 0.00001]
df = df.loc[df["b'hcal_edepth2'_y"] > 0.00001]

In [None]:
# keep originals before scaling
df['hcal_edepth1_un'] = df["b'hcal_edepth1'_x"]
df['hcal_edepth2_un'] = df["b'hcal_edepth2'_x"]
df['hcal_edepth3_un'] = df["b'hcal_edepth3'_x"]
df['hcal_edepth4_un'] = df["b'hcal_edepth4'_x"]
df['hcal_edepth5_un'] = df["b'hcal_edepth5'_x"]
df['hcal_edepth6_un'] = df["b'hcal_edepth6'_x"]
df['hcal_edepth7_un'] = df["b'hcal_edepth7'_x"]

df['hcal_ieta_un'] = df[b'hcal_ieta']

In [None]:
# scaling input vars
cols_to_minmax =[b'pt_of_muon',b'eta_of_muon',b'phi_of_muon',b'energy_of_muon',b'hcal_ieta',
                b'hcal_iphi',     b'IsolationR04',     b'IsolationR03',
                b'ecal_3into3',      b'hcal_3into3', 
                b'ecal_3x3',         b'hcal_1x1',
               "b'hcal_edepth1'_x", "b'hcal_edepth2'_x", "b'hcal_edepth3'_x",
               "b'hcal_edepth4'_x", "b'hcal_edepth5'_x", "b'hcal_edepth6'_x",
               "b'hcal_edepth7'_x"]



df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.min()) /  (x.max() - x.min()))

In [None]:
for i in (df.keys()):
    #print (i)
    plt.hist(df[i],bins=100,label=str(i),alpha=0.4)
    plt.yscale("log")
    plt.legend()
    plt.show()

In [None]:
data = df.values
ntest = 20000
testindx = data.shape[0] - ntest
X_train = data[:testindx,np.r_[0:12,13]]
Y_train = data[:testindx,20]
X_test = data[testindx:,:]
print (X_test.shape) 
print ("shape of X_train:",X_train.shape)
print ("shape of Y_train:",Y_train.shape)

In [None]:
# sample weights
histoAR = Y_train.copy()
nbins = 4 ### works best[4 with no sqrt / 500 with sqrt normed] / 1000 with norm*100 / 4000 ok no sqrt
true_hist = np.histogram(histoAR,bins = nbins,range=(np.min(Y_train)-0.001,np.max(Y_train)+0.001))
binweight = true_hist[0][np.digitize(histoAR,true_hist[1]) - 1]
binweight = 1/binweight
binweight = 100*binweight/np.sum(np.unique(binweight)) ## 5000/5 ok
print("sum weight:",np.sum(np.unique(binweight)))
print("true_hist",true_hist[0])
print("binweight",binweight)
print("weight_vals:",np.unique(binweight))
plt.hist(histoAR,bins=nbins)
for i in true_hist[1]:
    plt.axvline(i,color='r')
plt.yscale("log")

In [None]:
from keras.layers import LeakyReLU
from keras import optimizers
print ("creating model=========>")
model = Sequential()


model.add(Dense(50, input_shape=(X_train.shape[1],)))
model.add(LeakyReLU(alpha=0.3))
model.add(Dropout(0.2))
 
model.add(Dense(600))
model.add(LeakyReLU(alpha=0.3))
model.add(Dropout(0.2))
model.add(Dense(600))
model.add(LeakyReLU(alpha=0.3))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(LeakyReLU(alpha=0.3))
model.add(Dropout(0.2))
model.add(Dense(1))


model.compile(loss='mse',optimizer='adam')
model.summary()
print ("fitting now=========>")
history = model.fit(X_train,Y_train , batch_size=5000, epochs=2000, validation_split=0.2,
                    verbose=1,sample_weight=binweight)


In [None]:
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
# evaluate model
preds = model.predict(X_test[:,np.r_[0:12,13]])
targets = X_test[:,20]
uncorrected = X_test[:,27]

targets = targets.reshape(targets.shape[0],1)
uncorrected = uncorrected.reshape(uncorrected.shape[0],1)

In [None]:
%matplotlib inline
i=0
plt.hist(targets[:,i], bins =100, range=(0,100),label='truth no PU',histtype='step',linewidth=1.5)
plt.hist(preds[:,i], bins =100, range=(0,100),label='predicted',histtype='step',linewidth=1.5)
plt.legend(loc='upper right')
plt.show()

In [None]:
#save model
if not os.path.exists('models'):
    os.makedirs('models')
model.save('models/model.h5')