## Importing the libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Importing the dataset

In [None]:
dataset = pd.read_csv('diabetes.csv')
# dataset = pd.read_csv('Credit_Card_Score.csv')
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, -1].values.reshape(-1,1)

## Normalization

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

## Split Train Test

In [30]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


## Activation Functions

In [31]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def d_sigmoid(x):
    s = sigmoid(x)
    return s * (1 - s)

def relu(x):
    return np.maximum(0,x)

def d_relu(x):
    return (x > 0).astype(float)

def softmax(x):
    exp = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp / np.sum(exp, axis=1, keepdims=True)

## Loss Function

In [32]:
# ----- Loss functions -----

def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

def d_mse(y_true, y_pred):
    return (y_pred - y_true) / y_true.shape[0]

def cross_entropy_loss(y_true, y_pred):
    # avoid log(0)
    eps = 1e-9
    return -np.mean(np.sum(y_true * np.log(y_pred + eps), axis=1))


def binary_cross_entropy(y_true, y_pred, eps=1e-12):
    y_pred = np.clip(y_pred, eps, 1 - eps)
    return -np.mean(y_true * np.log(y_pred) + (1-y_true) * np.log(1 - y_pred))

def binary_cross_entropy_grad(y_true, y_pred, eps=1e-12):
    y_pred = np.clip(y_pred, eps, 1-eps)
    return (-(y_true/y_pred) + (1-y_true) / (1-y_pred)) / y_true.shape[0]

## MLP Class

In [33]:
class MLP:
    def __init__(self, layer_sizes, task='classification', lr=0.001):
        """
        layer_sizes: list, e.g., [n_inputs_dime, hidden1, hidden2, ..., n_outputs_dim]
        task: 'classification' | 'multiclass' | 'regression'
        """
        
        self.lr = lr
        self.layer_sizes = layer_sizes
        self.task = task

        self.W = []
        self.B = []

        for i in range(len(layer_sizes)-1):
            print(layer_sizes[i])
            self.W.append(np.random.randn(layer_sizes[i], layer_sizes[i+1]) * 0.01)
            self.B.append(np.zeros((1, layer_sizes[i+1])))

    # ------------Forward Pass-------------
    def forward(self, X):
        self.A = [X]
        self.Z = []

        for i in range(len(self.W)):
            z = self.A[-1] @ self.W[i] + self.B[i]
            self.Z.append(z)

            #Use ReLu for hidden layers
            if i < len(self.W) - 1:
                self.A.append(relu(z))
            else:
                if self.task == 'classification':
                    self.A.append(sigmoid(z))
                elif self.task == 'multiclass':
                    self.A.append(softmax(z))
                else:
                    self.A.append(z) # Regression
        return self.A[-1] 


    def backward(self, y_true):

        m = y_true.shape[0]
        grads_W = [None] * len(self.W)
        grads_B = [None] * len(self.B)

        # Output layer gradient
        if self.task == 'classification':
            dA = (self.A[-1] - y_true) # binary cross entropy simplified
        elif self.task == 'multiclass':
            dA = (self.A[-1] - y_true) # softmax + CE gradient
        else:
            dA = d_mse(y_true, self.A[-1])

        # Backpropagation
        for i in reversed(range(len(self.W))):
            if i == len(self.W) - 1: # output layer
                dZ = dA
            else:
                dZ = dA * d_relu(self.Z[i])

            grads_W[i] = self.A[i].T @ dZ
            grads_B[i] = np.sum(dZ, axis=0, keepdims=True)

            dA = dZ @ self.W[i].T
        # Gradient descent update
        for i in range(len(self.W)):
            self.W[i] -= self.lr * grads_W[i]
            self.B[i] -= self.lr * grads_B[i]
        

     # ---------- Training ----------
    def fit(self, X, y, epochs=500, verbose=True):
        losses = []
        
        for epoch in range(1, epochs+1):
            y_pred = self.forward(X)
            
            if self.task in ["classification", "multiclass"]:
                loss = cross_entropy_loss(y, y_pred)
            else:
                loss = mse_loss(y, y_pred)
                
            self.backward(y)
            losses.append(loss)
            
            if verbose and epoch % 100 == 0:
                print(f"Epoch {epoch} - Loss: {loss:.4f}")
        return losses

    # ---------- Prediction ----------
    def predict(self, X):
        out = self.forward(X)
        
        if self.task == "classification":
            return (out > 0.5).astype(int)
        
        if self.task == "multiclass":
            return np.argmax(out, axis=1)
        
        return out  # regression outputs

## Binary Classification

In [34]:
# Generate Dummy Data
np.random.seed(0)
X = np.random.randn(1000,2)
y = (X[:,0] + X[:,1] > 1).astype(int).reshape(-1,1)

mlp_bin = MLP([2,8,4,1], task='classification', lr = 0.05)
mlp_bin.fit(X,y, epochs=1000)

pred = mlp_bin.predict(X)
print("Accuracy: ", np.mean(pred==y))

2
8
4
Epoch 100 - Loss: 4.6613
Epoch 200 - Loss: 0.0986
Epoch 300 - Loss: 4.6627
Epoch 400 - Loss: 4.6600
Epoch 500 - Loss: 1.1522
Epoch 600 - Loss: 4.6537
Epoch 700 - Loss: 4.6625
Epoch 800 - Loss: 3.8958
Epoch 900 - Loss: 0.0009
Epoch 1000 - Loss: 0.0276
Accuracy:  0.775


## Multiclassification

In [35]:
# Generate dataset
np.random.seed(0)
N = 300
X = np.random.randn(N,2)
y_raw = np.random.randint(0,3,N)

# One-hot
y = np.zeros((N,3))
y[np.arange(N), y_raw] = 1

mlp_multi = MLP([2,16,8,3], task='multiclass', lr = 0.1)
mlp_multi.fit(X,y, epochs=1000)

pred = mlp_multi.predict(X)
print("Accuracy: ", np.mean(pred==y_raw))

2
16
8
Epoch 100 - Loss: 6.9693
Epoch 200 - Loss: 11.1755
Epoch 300 - Loss: 10.8533
Epoch 400 - Loss: 7.6196
Epoch 500 - Loss: 10.8182
Epoch 600 - Loss: 5.2246
Epoch 700 - Loss: 12.3492
Epoch 800 - Loss: 4.3333
Epoch 900 - Loss: 7.0651
Epoch 1000 - Loss: 10.4512
Accuracy:  0.37333333333333335


## Regression / Prediction

In [36]:
# y = 3x_1- 2x_2 + noise

np.random.seed(0)
X = np.random.randn(500,2)
y = (3 * X[:,0] - 2 * X[:,1] + np.random.randn(500) * 0.2).reshape(-1,1)

mlp_reg = MLP([2,8,4,1], task='regression', lr=0.01)
mlp_reg.fit(X,y,epochs=500)

pred = mlp_reg.predict(X)
print("MSE: ", mse_loss(y,pred))

2
8
4
Epoch 100 - Loss: 12.7576
Epoch 200 - Loss: 12.7553
Epoch 300 - Loss: 12.7550
Epoch 400 - Loss: 12.7549
Epoch 500 - Loss: 12.7549
MSE:  12.75491392379049


## Train and Predict Model on the test set (Binary Classification)

In [37]:
mlp = MLP([X_train.shape[1],16,8,1], task='classification', lr=0.01)
# mlp = MLP([X[0].size,16,8,1], task='multiclass', lr=0.01)
mlp.fit(X_train,y_train, epochs=800)

y_pred = mlp.predict(X_test)
acc = np.mean(y_pred == y_test)
print("Accuracy: ", acc)


8
16
8
Epoch 100 - Loss: 0.1833
Epoch 200 - Loss: 0.1659
Epoch 300 - Loss: 0.1549
Epoch 400 - Loss: 0.1456
Epoch 500 - Loss: 0.0976
Epoch 600 - Loss: 0.1283
Epoch 700 - Loss: 0.1215
Epoch 800 - Loss: 0.1834
Accuracy:  0.7532467532467533


## Train and Predict Model on the Test set (Multi Classification)

## Visualization of loss History

In [38]:
plt.plot(loss_history)
plt.title("Training Loss Curve")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.show()

NameError: name 'loss_history' is not defined

## Evaluation

## Sample Prediction

## Confusion Matrix

In [None]:
np.zeros((1,8))