# Using Deep Neural Network for Regression Analysis
 

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Dataset

A look at the dataset-

In [None]:
data = pd.read_csv("/kaggle/input/cern-electron-collision-data/dielectron.csv")
data.head()

# Preprocessing -

We can disregard variables used for identification. The input is then normalized, while the output is rescaled to a logarithmic scale:

In [None]:
s_data = data.drop(["Run", "Event"], axis = 1).dropna()
Xn_data = np.array(s_data.drop(["M"], axis = 1))
# Normalize X_data
X_data = (Xn_data - np.mean(Xn_data)) / np.std(Xn_data)
y_data = np.log(np.array(s_data["M"]))

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

Splitting the training, validation and test sets:

In [None]:
train_X, dev_test_X, train_y, dev_test_y = train_test_split(X_data, y_data, test_size = 0.2, random_state = 20) 
dev_X, test_X, dev_y, test_y = train_test_split(dev_test_X, dev_test_y, test_size = 0.5, random_state = 21)

Custon Metric for $R^{2}$

source: https://stackoverflow.com/questions/45250100/kerasregressor-coefficient-of-determination-r2-score

In [None]:
def r2_score(y_true, y_pred):
    from keras import backend as K
    SS_res =  K.sum(K.square( y_true-y_pred ))
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) )
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

# Framework for Deep Neural Network

In [None]:
from tensorflow.keras import regularizers
# Neural Network Design
model = keras.Sequential([
    layers.Dense(128, activation = 'relu', input_shape = [16]),
    layers.Dense(64, activation = 'relu'),
    layers.Dense(32, activation = 'relu'),
    layers.Dense(16, activation = 'relu'),
    layers.Dense(1)
])

model.compile(optimizer = 'Adam', loss = 'mse', metrics = [r2_score])


# Callback

A callback to store the model parameters with the best performance:

In [None]:
model_callback = keras.callbacks.ModelCheckpoint(
    filepath = "/tmp/checkpoint",
    monitor="val_loss",
    verbose=0,
    save_best_only=True,
    save_weights_only=True,
    mode="auto",
    save_freq="epoch",
    options=None,
)

In [None]:
history = model.fit(train_X, train_y, batch_size = 512, epochs = 300, validation_data = (dev_X, dev_y), callbacks = [model_callback])

In [None]:
# Plot loss of output
from matplotlib import pyplot as plt
plt.plot(history.history['loss'], label = "")
plt.plot(history.history['val_loss'])
plt.axis

In [None]:
model.load_weights("/tmp/checkpoint")

# Performance on the test set:

Estimates for (MSE, $R^{2}$ score)

In [None]:
model.test_on_batch(dev_X, dev_y)