# Importing Required Libraries

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sb
sb.set_style('darkgrid')
from tensorflow import keras
from keras.layers import Dense,Dropout,Input
from keras.models import Model
from keras.utils import plot_model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import os

# Loading Data

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
data = pd.read_csv('/kaggle/input/cern-electron-collision-data/dielectron.csv')

In [None]:
data.head()

**Data Information**
* Run: The run number of the event.
* Event: The event number.
* E1, E2: The total energy of the electron (GeV) for electrons 1 and 2.
* px1,py1,pz1,px2,py2,pz2: The components of the momemtum of the electron 1 and 2 (GeV).
* pt1, pt2: The transverse momentum of the electron 1 and 2 (GeV).
* eta1, eta2: The pseudorapidity of the electron 1 and 2.
* phi1, phi2: The phi angle of the electron 1 and 2 (rad).
* Q1, Q2: The charge of the electron 1 and 2.
* M: The invariant mass of two electrons (GeV).

# Checking for Null Values

In [None]:
data.isnull().sum()

In [None]:
data['M'] = data['M'].fillna(data['M'].median())

# EDA

In [None]:
data.describe()

In [None]:
plt.figure(figsize = (25,15))
sb.heatmap(data.corr(),annot = True)


**Run and Invariant M**

In [None]:
plt.figure(figsize = (25,5))
plt.plot(data['Run'])
plt.xlabel('Count')
plt.ylabel('Run')
plt.title('Run distribution')
plt.show()

In [None]:
plt.figure(figsize = (25,5))
sb.regplot(data['Run'],data['M'])
plt.title('M vs Run')

* we can see from the regression plot that the electrons whose invariant mass was less was found at lesser runs and the electrons whose invariant mass is more were determined at higher runs

**Evnet and Invariant Mass**

In [None]:
plt.figure(figsize = (25,5))
sb.kdeplot(data['Event'],color = 'red');
plt.title('Event Distribution');

In [None]:
plt.figure(figsize = (25,10))
sb.regplot(data['Event'],data['M'],color = 'red')

* the regression plot is nearly as straight line thus Invariant Mass of electron doesnt depend much on the event

**E1 and Invariant Mass**

In [None]:
plt.figure(figsize = (25,5))
plt.plot(data['E1'],color = 'green',label = 'E1')
plt.plot(data['E2'],color = 'greenyellow',label = 'E2')
plt.legend()
plt.title('E1,E2 Distribution')
plt.xlabel('E1,E2')


In [None]:
plt.figure(figsize = (25,5))
sb.kdeplot(data['E1'],color = 'green',label = 'E1')
sb.kdeplot(data['E2'],color = 'lawngreen',label = 'E2')
plt.legend
plt.title('Kernel Density Estimate for E1,E2');

In [None]:
plt.figure(figsize = (25,10))
sb.regplot(data['E1'],data['M'],color = 'violet')
plt.title('E1 vs M')

* As E1 increases invariant mass tends to increase

In [None]:
plt.figure(figsize = (25,10))
sb.regplot(data['E2'],data['M'],color = 'orange')
plt.title('E2 vs M')

* As E2 increase Invariant Mass tends to increase

In [None]:
plt.figure(figsize = (25,10))
sb.regplot(data['E1'],data['E2'],color = 'pink')
plt.title('E1 vs E2')

**px1,py1,pz1 and Invariant Mass**

In [None]:
fig,ax = plt.subplots(2,3,figsize = (25,10))
ax[0][0].plot(data['px1 '],color = 'deeppink')
ax[0][0].set_title('px1')
ax[0][1].plot(data['py1'],color = 'deepskyblue')
ax[0][1].set_title('py1')
ax[0][2].plot(data['pz1'],color = 'yellow')
ax[0][2].set_title('pz1')
ax[1][0].plot(data['px2'],color = 'deeppink')
ax[1][0].set_title('px2')
ax[1][1].plot(data['py2'],color = 'deepskyblue')
ax[1][1].set_title('py2')
ax[1][2].plot(data['pz2'],color = 'yellow')
ax[1][2].set_title('pz2')

* The above distribution plot shows the presence of outliers

In [None]:
plt.figure(figsize = (25,10))
sb.kdeplot(data['px1 '],color = 'blueviolet',label = 'px1');
sb.kdeplot(data['px2'],color = 'red',label = 'px2');
plt.legend();
plt.title('px1 and px2 kernel density estimates');

In [None]:
plt.figure(figsize = (25,10))
sb.kdeplot(data['py1'],color = 'dodgerblue',label = 'py1');
sb.kdeplot(data['py2'],color = 'deeppink',label = 'py');
plt.legend();
plt.title('py1 and py2 kernel density estimates');

In [None]:
plt.figure(figsize = (25,10))
sb.kdeplot(data['pz1'],color = 'aqua',label = 'pz1');
sb.kdeplot(data['pz2'],color = 'violet',label = 'pz2');
plt.legend();
plt.title('pz1 and pz2 kernel density estimates');

In [None]:
plt.figure(figsize = (25,10))
sb.regplot(data['px1 '],data['M'],color = 'red',label = 'px1')
sb.regplot(data['px2'],data['M'],color = 'darkorchid',label = 'px2')
plt.legend()
plt.title('Regression Plot of px1 and px2 with Invariant Mass')
plt.show()


* As px1 increases Invariant Mass tends to increase slightly
* As px2 increases Invariant Mass tends to decrease slightly

In [None]:
plt.figure(figsize = (25,10))
sb.regplot(data['py1'],data['M'],color = 'crimson',label = 'py1')
sb.regplot(data['py2'],data['M'],color = 'orchid',label = 'py2')
plt.legend()
plt.title('Regression Plot of py1 and py2 with Invariant Mass')
plt.show()

* as py1 increases invariant mass increases
* as py2 increases invariant mass decreases

In [None]:
plt.figure(figsize = (25,10))
sb.regplot(data['pz1'],data['M'],color = 'limegreen',label = 'pz1')
sb.regplot(data['pz2'],data['M'],color = 'darkorange',label = 'pz2')
plt.legend()
plt.title('Regression Plot of pz1 and pz2 with Invariant Mass')
plt.show()

* as both pz1 and pz2 increase Invariant Mass tends to increase

**eta1,eta2(psuedorapidity) and Invariant Mass**

In [None]:
plt.figure(figsize = (25,10))
plt.plot(data['eta1'])
plt.plot(data['eta2'])
plt.title('eta1 and eta2 distribution')


Few outliers present in eta2

In [None]:
plt.figure(figsize = (25,10))
sb.kdeplot(data['eta1'],color = 'aqua',label = 'eta1');
sb.kdeplot(data['eta2'],color = 'violet',label = 'eta2');
plt.legend();
plt.title('eta1 and eta2 kernel density estimates');

In [None]:
plt.figure(figsize = (25,10))
sb.regplot(data['eta1'],data['M'],color = 'springgreen',label = 'eta1')
plt.legend()
plt.title('Regression Plot of eta1 with Invariant Mass')
plt.show()

In [None]:
plt.figure(figsize = (25,10))
sb.regplot(data['eta2'],data['M'],color = 'royalblue',label = 'eta2')

* Invariant Mass tends to increase slightly with increase in psuedorapidity

**phi1,phi2 and Invariant Mass**

In [None]:
plt.figure(figsize = (25,10))
plt.plot(data['phi1'])
plt.plot(data['phi2'])
plt.title('Phi Distribution')


No visible outliers

In [None]:
plt.figure(figsize = (25,10))
sb.kdeplot(data['phi1'],color = 'aqua',label = 'phi1');
sb.kdeplot(data['phi2'],color = 'violet',label = 'phi2');
plt.legend();
plt.title('phi1 and phi2 kernel density estimates');

In [None]:
plt.figure(figsize = (25,10))
sb.regplot(data['phi1'],data['M'],color = 'blue',label = 'phi1')
plt.title('Regression Plot of phi1with Invariant Mass')
plt.show()

In [None]:
plt.figure(figsize = (25,10))
sb.regplot(data['phi2'],data['M'],color = 'crimson',label = 'phi2')
plt.legend()
plt.title('Regression Plot of phi2 with Invariant Mass')
plt.show()

Regression plots are straight line for phi1 and phi2 thus Invariant Mass doesnt depend much on phi angles

**Charge of electron and Invariant Mass**

In [None]:
plt.figure(figsize = (25,10))
sb.kdeplot(data['Q1'],color = 'gold')
sb.kdeplot(data['Q2'],color = 'crimson')

No Outliers detected

In [None]:
plt.figure(figsize = (25,10))
sb.regplot(data['Q1'],data['M'],color = 'lime',label = 'Q1')
sb.regplot(data['Q2'],data['M'],color = 'slateblue',label = 'Q2')
plt.legend()
plt.title('Regression Plot of Q1 and Q2 with Invariant Mass')
plt.show()

Straight Regression line means Invariant Mass doesnt depend much on Q1 and Q2 directly

# Model for prediction of Invariant Mass of electron

In [None]:
data

In [None]:
sc = StandardScaler()
data['Run'] = sc.fit_transform(data['Run'].values.reshape(-1,1))
data['Event'] = sc.fit_transform(data['Event'].values.reshape(-1,1))

In [None]:
data

In [None]:
data.shape

In [None]:
X = data.iloc[:,:18]
y = data['M']

In [None]:
X

In [None]:
y

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.25)

In [None]:
inputs = Input(shape = (18,))
x = inputs
x = Dense(256,activation = 'relu')(x)
x = Dense(512,activation = 'relu')(x)
x = Dense(256,activation = 'relu')(x)
outputs = Dense(1)(x)
model = Model(inputs,outputs)
model.summary()

In [None]:
model.compile(optimizer = 'rmsprop',loss = 'mse')

In [None]:
model.fit(X,y,batch_size = 25,epochs = 40)

In [None]:
loss = model.evaluate(X_test,y_test,batch_size = 50)
print('Loss is :',loss)

In [None]:
Invariant_mass = model.predict(X_test)

In [None]:
y_test

In [None]:
Invariant_mass = np.array(Invariant_mass)

In [None]:
print(Invariant_mass)

In [None]:
electron = [[-0.891368,-0.319691,58.71410,-7.311320,10.531000,-57.29740,12.82020,-2.202670,2.177660,1,11.28360,-1.032340,-1.88066,-11.077800,2.14537,-2.344030,-2.072810,-1]]
print("Invariant Mass is:",model.predict(electron))