<a href="https://colab.research.google.com/github/sahiiljadhav/nn-optimizer-comparison/blob/main/Untitled15.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [None]:
# Activation functions
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)

In [None]:
# Loss function
def categorical_crossentropy(y_true, y_pred):
    return -np.mean(np.sum(y_true * np.log(y_pred + 1e-15), axis=1))

def categorical_crossentropy_derivative(y_true, y_pred):
    return y_pred - y_true

In [None]:
# Generate Synthetic Dataset
X, y = make_classification(n_samples=1000, n_features=4, n_classes=3, n_informative=4, n_redundant=0, random_state=42)
one_hot = OneHotEncoder()
y_encoded = one_hot.fit_transform(y.reshape(-1, 1)).toarray()

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [None]:
# Neural network architecture
np.random.seed(42)
input_neurons = 4
hidden_neurons1 = 3
hidden_neurons2 = 4
output_neurons = 3

In [None]:
# Initialize weights and biases
weights = {
    "W1": np.random.randn(input_neurons, hidden_neurons1) * 0.01,
    "W2": np.random.randn(hidden_neurons1, hidden_neurons2) * 0.01,
    "W3": np.random.randn(hidden_neurons2, output_neurons) * 0.01
}

biases = {
    "b1": np.zeros((1, hidden_neurons1)),
    "b2": np.zeros((1, hidden_neurons2)),
    "b3": np.zeros((1, output_neurons))
}


In [None]:
# Forward propagation
def forward_propagation(X, weights, biases):
    Z1 = np.dot(X, weights["W1"]) + biases["b1"]
    A1 = relu(Z1)
    Z2 = np.dot(A1, weights["W2"]) + biases["b2"]
    A2 = relu(Z2)
    Z3 = np.dot(A2, weights["W3"]) + biases["b3"]
    A3 = softmax(Z3)
    return Z1, A1, Z2, A2, Z3, A3

In [None]:
# Backward propagation
def backward_propagation(X, y, Z1, A1, Z2, A2, Z3, A3, weights):
    m = X.shape[0]
    dZ3 = categorical_crossentropy_derivative(y, A3)
    dW3 = np.dot(A2.T, dZ3) / m
    db3 = np.sum(dZ3, axis=0, keepdims=True) / m

    dA2 = np.dot(dZ3, weights["W3"].T)
    dZ2 = dA2 * relu_derivative(Z2)
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    dA1 = np.dot(dZ2, weights["W2"].T)
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    return {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2, "dW3": dW3, "db3": db3}

In [None]:
# Optimizer functions
def gradient_descent(weights, biases, gradients, lr):
    for key in weights.keys():
        weights[key] -= lr * gradients["d" + key]
        biases[key.replace("W", "b")] -= lr * gradients["d" + key.replace("W", "b")]

def train_nn(optimizer, epochs=100, lr=0.01):
    global weights, biases
    history = []
    for epoch in range(epochs):
        Z1, A1, Z2, A2, Z3, A3 = forward_propagation(X_train, weights, biases)
        gradients = backward_propagation(X_train, y_train, Z1, A1, Z2, A2, Z3, A3, weights)

        optimizer(weights, biases, gradients, lr)

        # Calculate loss and accuracy
        train_loss = categorical_crossentropy(y_train, A3)
        train_accuracy = np.mean(np.argmax(A3, axis=1) == np.argmax(y_train, axis=1))
        history.append((train_loss, train_accuracy))

    return history

In [None]:
# Example usage
train_nn(gradient_descent)

[(1.0986124233240318, 0.26375),
 (1.0986099111000127, 0.34625),
 (1.0986074158088202, 0.34625),
 (1.098604939197796, 0.34625),
 (1.0986024792890037, 0.34625),
 (1.0986000361847084, 0.34625),
 (1.0985976092594552, 0.34625),
 (1.0985951983267148, 0.34625),
 (1.0985928033018288, 0.34625),
 (1.0985904240697542, 0.34625),
 (1.0985880606793332, 0.34625),
 (1.0985857129155454, 0.34625),
 (1.09858338073437, 0.34625),
 (1.0985810643583054, 0.34625),
 (1.098578763681313, 0.34625),
 (1.0985764783544536, 0.34625),
 (1.0985742083068937, 0.34625),
 (1.098571953464515, 0.34625),
 (1.0985697136701598, 0.34625),
 (1.0985674890435717, 0.34625),
 (1.098565279401888, 0.34625),
 (1.0985630846233376, 0.34625),
 (1.0985609046250266, 0.34625),
 (1.0985587392158918, 0.34625),
 (1.0985565882227506, 0.34625),
 (1.0985544515167052, 0.34625),
 (1.0985523291860124, 0.34625),
 (1.0985502210712241, 0.34625),
 (1.0985481270574597, 0.34625),
 (1.0985460472231485, 0.34625),
 (1.0985439812809945, 0.34625),
 (1.0985419289