In [None]:
import ROOT
import numpy as np
import matplotlib.pyplot as plt
import math
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow.keras.backend as K
import tensorflow as tf

In [None]:
p=0.3 #0.3-10GeV
mass=0.139

In [None]:
n_particles = 10000/2
p = np.random.uniform(0.3,10.,n_particles)
mp = np.random.uniform(1/10.,1/0.3,n_particles)
p_mp = 1./mp # viele Werte größer 10???

In [None]:
np.mean(p_mp)

In [None]:
plt.hist(p_mp,50,range=(0.,10.))
plt.plot()

In [None]:
p_ges = np.concatenate([p,p_mp])

In [None]:
plt.hist(p_ges,50,range=(0,10))
plt.plot()

In [None]:
mass_pi = 0.139
mass_mu = 0.105
mass_e = 0.000511
mass_p = 0.938
mass_K = 0.494
masses = [mass_pi, mass_mu, mass_e, mass_p, mass_K]
masses

In [None]:
signals = []

In [None]:
for mass in masses:
    ITS_tmp = []
    TPCROC0_tmp = []
    TPCROC1_tmp = []
    TPCROC2_tmp = []
    TRD_tmp = []
    TOF_tmp = []
    for p in p_ges:
        bg = p/mass
        beta = bg/math.sqrt(1.+ bg*bg);
        BBS = ROOT.AliExternalTrackParam.BetheBlochSolid(bg)
        BBA = ROOT.AliExternalTrackParam.BetheBlochAleph(bg)
        ITS_tmp.append(np.random.normal(BBS,0.1*BBS) ) ## ITS dEdx = smeared gaus 10% 
        TPCROC0_tmp.append(np.random.normal(BBA,0.1*BBA) )## TPC dEdx = smeared gaus 10% for 1st layer
        TPCROC1_tmp.append(np.random.normal(BBA,0.1*BBA) )  ## TPC dEdx = smeared gaus 10% for 2nd layer
        TPCROC2_tmp.append(np.random.normal(BBA,0.1*BBA) )  ## TPC dEdx = smeared gaus 10% for 3d layer
        TRD_tmp.append(np.random.normal(BBA,0.1*BBA) )  ## TRD dEdx = smeared gaus 10% 
        TOF_tmp.append(np.random.normal(beta,0.1*beta) )  ## TOF - smeared with .... gaussian
    signals.append({'ITS': ITS_tmp, 'TPCROC0': TPCROC0_tmp, 'TPCROC1': TPCROC1_tmp, 'TPCROC1': TPCROC1_tmp, 
                    'TPCROC2': TPCROC2_tmp, 'TRD': TRD_tmp, 'TOF': TOF_tmp})

In [None]:
plt.hist2d(p_ges, signals[2]["ITS"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0, 0.004]])
plt.plot()

In [None]:
df_list=[]
for i, val in enumerate(masses):
    df = pd.DataFrame.from_dict(signals[i])
    df['p'] = pd.Series(p_ges, index=df.index)
    df['particle'] = pd.Series(i, index=df.index)
    df_list.append(df)


In [None]:
df_list[3].head()

In [None]:
df_all = pd.concat([df_list[0],df_list[2],df_list[3],df_list[4]], ignore_index=True)

In [None]:
len(df_all)

In [None]:
plt.hist2d(df_all["p"], df_all["TPCROC1"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.plot()

In [None]:
train, test =train_test_split(df_all, test_size=0.5)
test.head()

In [None]:
scaler = StandardScaler()
scaler.fit(train[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]])
#out = scaler.transform(train)

In [None]:
model = Sequential()
model.add(Dense(units=64, activation='selu', input_dim=6))
model.add(Dense(units=64, activation='selu'))
model.add(Dense(units=64, activation='selu'))
model.add(Dense(units=2, activation='selu'))
model.add(Dense(units=64, activation='selu'))
model.add(Dense(units=64, activation='selu'))
model.add(Dense(units=64, activation='selu'))
model.add(Dense(units=6, activation='linear'))
model.compile(loss='mse',
              optimizer='adam',
              metrics=['mse'])
model.summary()

In [None]:
train_data = train[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]]
test_data = test[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]]
model.fit(scaler.transform(train_data), scaler.transform(train_data), epochs=5, batch_size=32, 
          validation_data=[scaler.transform(test_data),scaler.transform(test_data)])

In [None]:
out = scaler.inverse_transform(model.predict(scaler.transform(test_data)))

In [None]:
AE_predict = pd.DataFrame(out)
AE_predict.columns = ["ITS_ae", "TOF_ae", "TPCROC0_ae", "TPCROC1_ae", "TPCROC2_ae", "TRD_ae"]
test_ri = test.reset_index()
df_test = pd.concat([test_ri,AE_predict], axis = 1)
df_test.head()

In [None]:
plt.hist2d(df_all["p"], df_all["TPCROC1"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test["p"], df_test["TPCROC1_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()

In [None]:
particle_id = 2
plt.hist2d(df_all.query("particle ==" +str(particle_id))["p"], df_all.query("particle == " +str(particle_id))["TPCROC1"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test.query("particle ==" +str(particle_id))["p"], df_test.query("particle ==" +str(particle_id))["TPCROC1_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()

In [None]:
plt.hist2d(df_all["p"], df_all["TOF"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 5], [0.4, 1.2]])
plt.show()
plt.hist2d(df_test["p"], df_test["TOF_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 5], [0.4, 1.2]])
plt.show()

In [None]:
plt.hist2d(df_all["p"], df_all["ITS"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0, 0.006]])
plt.show()
plt.hist2d(df_test["p"], df_test["ITS_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0, 0.006]])
plt.show()

# Include zero entries

In [None]:
def flip(p):
    return 1 if np.random.random() < p else 0
N=40000
flips = []
for i in range(6):
    flips.append([flip(0.95) for i in xrange(N)])
ones = np.ones(N)
flip_dict = {"A" : flips[0], "B" : flips[1], "C" : flips[2], "D" : flips[3], 
             "E" : flips[4], "F" : flips[5], "G" : ones, "H" : ones}
flip_df =  pd.DataFrame.from_dict(flip_dict)
np.array(flip_df)

In [None]:
df_all_zeros = np.multiply(df_all,flip_df)
train_0, test_0, flipdftrain, flipdftest =train_test_split(df_all_zeros, flip_df, test_size=0.5)
train_data_0 = train_0[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]]
test_data_0 = test_0[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]]

In [None]:
scaler_0 = StandardScaler()
scaler_0.fit(train[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]])

In [None]:
model.fit(scaler_0.transform(train_data_0), scaler_0.transform(train_data_0), epochs=5, batch_size=32, 
          validation_data=[scaler_0.transform(test_data_0),scaler_0.transform(test_data_0)])

In [None]:
out_0 = scaler_0.inverse_transform(model.predict(scaler_0.transform(test_data_0)))
AE_predict_0 = pd.DataFrame(out_0)
AE_predict_0.columns = ["ITS_ae", "TOF_ae", "TPCROC0_ae", "TPCROC1_ae", "TPCROC2_ae", "TRD_ae"]
test_0 = test_0.reset_index()
df_test_0 = pd.concat([test_0,AE_predict_0], axis = 1)
df_test_0.head()

In [None]:
plt.hist2d(df_all_zeros["p"], df_all_zeros["TPCROC1"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test_0["p"], df_test_0["TPCROC1_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test_0.query("TPCROC1 == 0")["p"], df_test_0.query("TPCROC1 == 0")["TPCROC1_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test_0.query("TPCROC1 > 0")["p"], df_test_0.query("TPCROC1 > 0")["TPCROC1_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()

In [None]:
particle_id = 4
plt.hist2d(df_all_zeros.query("particle ==" +str(particle_id))["p"], df_all_zeros.query("particle == " +str(particle_id))["TPCROC1"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test_0.query("particle ==" +str(particle_id))["p"], df_test_0.query("particle ==" +str(particle_id))["TPCROC1_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test_0.query("particle ==" +str(particle_id)+"and TPCROC1==0")["p"], 
           df_test_0.query("particle ==" +str(particle_id)+"and TPCROC1==0")["TPCROC1_ae"], bins=(100, 100), 
           cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test_0.query("particle ==" +str(particle_id)+"and TPCROC1>0")["p"], 
           df_test_0.query("particle ==" +str(particle_id)+"and TPCROC1>0")["TPCROC1_ae"], bins=(100, 100), 
           cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()

# Labeled Zero Entries
in without entry, trained on data with entries

In [None]:
train_l0 = np.multiply(train, flip_df.sample(n=20000))
test_l0 = np.multiply(test, flip_df.sample(n=20000))
train_data_l0 = train_l0[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]]
test_data_l0 = test_l0[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]]

In [None]:
scaler_l0in = StandardScaler()
scaler_l0out = StandardScaler()
scaler_l0in.fit(train_data_l0)
scaler_l0out.fit(train_data)

In [None]:

model.fit(scaler_l0in.transform(train_data_l0), scaler_l0out.transform(train_data), epochs=5, batch_size=32, 
          validation_data=[scaler_l0in.transform(test_data_l0),scaler_l0out.transform(test_data)])

In [None]:
out_l0 = scaler_l0in.inverse_transform(model.predict(scaler_l0in.transform(test_data_l0)))
AE_predict_l0 = pd.DataFrame(out_l0)
AE_predict_l0.columns = ["ITS_ae", "TOF_ae", "TPCROC0_ae", "TPCROC1_ae", "TPCROC2_ae", "TRD_ae"]
test_l0 = test_l0.reset_index()
df_test_l0 = pd.concat([test_l0,AE_predict_l0], axis = 1)
df_test_l0.head()

In [None]:
plt.hist2d(df_all_zeros["p"], df_all_zeros["TPCROC1"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test_l0["p"], df_test_l0["TPCROC1_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test_0["p"], df_test_0["TPCROC1_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test_l0.query("TPCROC1 == 0")["p"], df_test_l0.query("TPCROC1 == 0")["TPCROC1_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test_l0.query("TPCROC1 > 0")["p"], df_test_l0.query("TPCROC1 > 0")["TPCROC1_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()

# Loss disregarding zero entries

In [None]:
zerofinder = pd.DataFrame(scaler_0.transform(train_data_0))
zerofinder.columns = ["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]
mins = []
for el in zerofinder.columns:
    minimum = np.min(np.array(zerofinder[[el]]))
    mins.append(minimum)
    print(minimum)

In [None]:
def loss_nozeros(y_true, y_pred):
    mins_tensor = tf.convert_to_tensor(mins)
    mins_tensor_corrdim = tf.ones([tf.shape(y_true)[0]])*mins_tensor
    greater = tf.greater(y_true,mins_tensor_corrdim)
    x = tf.where(greater, K.mean(K.square(y_true-y_pred)))
    return x

In [None]:
#model.compile(loss=loss_nozeros,
#              optimizer='adam')
#model.fit(scaler_0.transform(train_data_0), scaler_0.transform(train_data_0), epochs=5, batch_size=32, 
#          validation_data=[scaler_0.transform(test_data_0),scaler_0.transform(test_data_0)])

# input transformation (two autoencoder iterations)

In [None]:
out_tr0 = scaler.inverse_transform(model.predict(scaler.transform(train_data_0)))
AE_predict_tr0 = pd.DataFrame(out_tr0)
AE_predict_tr0.columns = ["ITS_ae", "TOF_ae", "TPCROC0_ae", "TPCROC1_ae", "TPCROC2_ae", "TRD_ae"]
train_0 = train_0.reset_index()
df_train_0 = pd.concat([train_0,AE_predict_0], axis = 1)
df_train_0.head()

In [None]:
ITS_1it = np.where(df_train_0.ITS==0, df_train_0.ITS_ae, df_train_0.ITS)
TOF_1it = np.where(df_train_0.TOF==0, df_train_0.TOF_ae, df_train_0.TOF)
TPCROC0_1it = np.where(df_train_0.TPCROC0==0, df_train_0.TPCROC0_ae, df_train_0.TPCROC0)
TPCROC1_1it = np.where(df_train_0.TPCROC1==0, df_train_0.TPCROC1_ae, df_train_0.TPCROC1)
TPCROC2_1it = np.where(df_train_0.TPCROC2==0, df_train_0.TPCROC2_ae, df_train_0.TPCROC2)
TRD_1it = np.where(df_train_0.TRD==0, df_train_0.TRD_ae, df_train_0.TRD)

In [None]:
train_1it = pd.DataFrame(np.array([ITS_1it,TOF_1it,TPCROC0_1it,TPCROC1_1it,TPCROC2_1it,TRD_1it]).T)
train_1it.columns=["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]

In [None]:
train_1it.head()

In [None]:
out_tt0 = scaler.inverse_transform(model.predict(scaler.transform(test_data_0)))
AE_predict_tt0 = pd.DataFrame(out_tt0)
AE_predict_tt0.columns = ["ITS_ae", "TOF_ae", "TPCROC0_ae", "TPCROC1_ae", "TPCROC2_ae", "TRD_ae"]
test_0 = test_0.reset_index()
df_test_0 = pd.concat([test_0,AE_predict_0], axis = 1)
df_test_0 = df_test_0.drop("index",axis = 1)
df_test_0 = df_test_0.drop("level_0",axis = 1)
df_test_0.head()
ITS_1it_tt = np.where(df_test_0.ITS==0, df_test_0.ITS_ae, df_test_0.ITS)
TOF_1it_tt = np.where(df_test_0.TOF==0, df_test_0.TOF_ae, df_test_0.TOF)
TPCROC0_1it_tt = np.where(df_test_0.TPCROC0==0, df_test_0.TPCROC0_ae, df_test_0.TPCROC0)
TPCROC1_1it_tt = np.where(df_test_0.TPCROC1==0, df_test_0.TPCROC1_ae, df_test_0.TPCROC1)
TPCROC2_1it_tt = np.where(df_test_0.TPCROC2==0, df_test_0.TPCROC2_ae, df_test_0.TPCROC2)
TRD_1it_tt = np.where(df_test_0.TRD==0, df_test_0.TRD_ae, df_test_0.TRD)

In [None]:
test_1it = pd.DataFrame(np.array([ITS_1it_tt,TOF_1it_tt,TPCROC0_1it_tt,TPCROC1_1it_tt,TPCROC2_1it_tt,TRD_1it_tt]).T)
test_1it.columns=["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]

In [None]:
df_test_0.head()

In [None]:
scaler_1it = StandardScaler()
scaler_1it.fit(train_1it[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]])
model.fit(scaler_1it.transform(train_1it[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]]), 
          scaler_1it.transform(train_1it[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]]), epochs=5, batch_size=32, 
          validation_data=[scaler_1it.transform(test_1it[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]]),
                           scaler_1it.transform(test_1it[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]])])

In [None]:
out_1it = scaler_1it.inverse_transform(model.predict(scaler_1it.transform(test_1it[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]])))
AE_predict_1it = pd.DataFrame(out_1it)
AE_predict_1it.columns = ["ITS_ae", "TOF_ae", "TPCROC0_ae", "TPCROC1_ae", "TPCROC2_ae", "TRD_ae"]
#test_1it = test_1it.reset_index()
df_test_1it = pd.concat([test_1it,AE_predict_1it], axis = 1)
df_test_1it.head()
df_test_1it['p'] = pd.Series(df_test_0["p"], index=df.index)
df_test_1it['particle'] = pd.Series(df_test_0["particle"], index=df.index)

In [None]:
df_test_1it = df_test_1it.drop("index",axis=1)

In [None]:
plt.hist2d(df_all_zeros["p"], df_all_zeros["TPCROC1"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test_1it["p"], df_test_1it["TPCROC1_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test_0["p"], df_test_0["TPCROC1_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test_1it.query("TPCROC1 > 0")["p"], df_test_1it.query("TPCROC1 > 0")["TPCROC1_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()

In [None]:
particle_id = 4
plt.hist2d(df_all.query("particle ==" +str(particle_id))["p"], df_all.query("particle == " +str(particle_id))["TPCROC1"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()
plt.hist2d(df_test_1it.query("particle ==" +str(particle_id))["p"], df_test_1it.query("particle ==" +str(particle_id))["TPCROC1_ae"], bins=(100, 100), cmap=plt.cm.jet, range = [[0.2, 2], [0.5, 3]])
plt.show()

# plot latent space 

In [None]:
f = K.function([model.layers[0].input, K.learning_phase()], [model.layers[3].output])

In [None]:
lat_space = f([pd.DataFrame(test_1it[["ITS", "TOF", "TPCROC0", "TPCROC1", "TPCROC2", "TRD"]]),1])

In [None]:
np.array(lat_space)[-1].T[0].shape

In [None]:
plt.hist2d(np.array(lat_space)[-1].T[0],np.array(lat_space)[-1].T[1], range=[[-0.1,0.75],[1,2.1]], bins=100)
plt.show()