<a href="https://colab.research.google.com/github/mdfardinxyz/Deep-Learning-Higgs-Boson-Dataset-/blob/main/Deep_Learning_withM_Sazzat.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
import pandas as pd

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz"
columns = ['label', 'lepton_pT', 'lepton_eta', 'lepton_phi', 'missing_energy_magnitude',
           'missing_energy_phi', 'jet1_pt', 'jet1_eta', 'jet1_phi', 'jet1_b-tag', 'jet2_pt',
           'jet2_eta', 'jet2_phi', 'jet2_b-tag', 'jet3_pt', 'jet3_eta', 'jet3_phi', 'jet3_b-tag',
           'jet4_pt', 'jet4_eta', 'jet4_phi', 'jet4_b-tag', 'm_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb']

df = pd.read_csv(url, compression='gzip', names=columns, nrows=5000)
df.head()

Unnamed: 0,label,lepton_pT,lepton_eta,lepton_phi,missing_energy_magnitude,missing_energy_phi,jet1_pt,jet1_eta,jet1_phi,jet1_b-tag,...,jet4_eta,jet4_phi,jet4_b-tag,m_jj,m_jjj,m_lv,m_jlv,m_bb,m_wbb,m_wwbb
0,1.0,0.869293,-0.635082,0.22569,0.32747,-0.689993,0.754202,-0.248573,-1.092064,0.0,...,-0.010455,-0.045767,3.101961,1.35376,0.979563,0.978076,0.920005,0.721657,0.988751,0.876678
1,1.0,0.907542,0.329147,0.359412,1.49797,-0.31301,1.095531,-0.557525,-1.58823,2.173076,...,-1.13893,-0.000819,0.0,0.30222,0.833048,0.9857,0.978098,0.779732,0.992356,0.798343
2,1.0,0.798835,1.470639,-1.635975,0.453773,0.425629,1.104875,1.282322,1.381664,0.0,...,1.128848,0.900461,0.0,0.909753,1.10833,0.985692,0.951331,0.803252,0.865924,0.780118
3,0.0,1.344385,-0.876626,0.935913,1.99205,0.882454,1.786066,-1.646778,-0.942383,0.0,...,-0.678379,-1.360356,0.0,0.946652,1.028704,0.998656,0.728281,0.8692,1.026736,0.957904
4,1.0,1.105009,0.321356,1.522401,0.882808,-1.205349,0.681466,-1.070464,-0.921871,0.0,...,-0.373566,0.113041,0.0,0.755856,1.361057,0.98661,0.838085,1.133295,0.872245,0.808487


In [None]:
X = df.iloc[
    :, 1:
].values  # all rows, all columns except 0th column (0th column is the label)
y = df.iloc[:, 0].values  # all rows, 0th column only (0th column is the label)

# train_size = int(len(df) * 0.8)

# for newbies. Should we take random samples?
# X_train = X.iloc[:train_size]
# X_test = X.iloc[train_size:]
# y_train = y.iloc[:train_size]
# y_test = y.iloc[train_size:]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [None]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size)
        self.b1 = np.random.randn(hidden_size)
        self.W2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.random.randn(output_size)

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoid_derivative(self, z):
        return z * (1 - z)

    def forward(self, X):
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = self.sigmoid(self.Z1)
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        A2 = self.sigmoid(self.Z2)

        return A2

    def backward(self, X, y, y_hat, learning_rate):
        dZ2 = np.dot(-2 * (y - y_hat), self.sigmoid_derivative(y_hat))
        dW2 = np.dot(self.A1.T, dZ2)
        db2 = np.sum(dZ2, axis=0)
        dZ1 = np.dot(dZ2, self.W2.T) * self.sigmoid_derivative(self.A1)
        dW1 = np.dot(X.T, dZ1)
        db1 = np.sum(dZ1, axis=0)

        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            loss = 0
            # batch train
            for i in range(0, len(X), 256):
                X_batch = X[i : i + 256]
                y_batch = y[i : i + 256]

                y_hat = self.forward(X_batch)
                loss += self.loss(y_batch, y_hat)
                self.backward(X_batch, y_batch, y_hat, learning_rate)
                count = i + 256.0
                print(f"Iteration: {i+1}/{len(X)} | Loss: {loss / count}", end="\r")
            print("=" * 20)
            print(f"Epoch: {epoch + 1}/{epochs} | Loss: {loss / len(X)}")
            print("=" * 20)

    def loss(self, y, y_hat):
        return np.sum((y - y_hat) ** 2)

    def predict(self, X):
        pred = self.forward(X)
        return (pred > 0.5).astype(int)

In [None]:
model = NeuralNetwork(input_size=X_train.shape[1], hidden_size=64, output_size=1)
model.train(X_train, y_train, epochs=5, learning_rate=0.1)

y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)

print(f"Accuracy: {acc}")

Epoch: 1/5 | Loss: 135.65968941255008
Epoch: 2/5 | Loss: 135.65957818181818
Epoch: 3/5 | Loss: 135.65957818181818
Epoch: 4/5 | Loss: 135.65957818181818
Epoch: 5/5 | Loss: 135.65957818181818
Accuracy: 0.47007954545454544
