A8. Create a Neural network architecture from scratch in Python and use it to do multi-class classification on any data.
Parameters to be considered while creating the neural network from scratch are specified as:
(1) No of hidden layers : 1 or more
(2) No. of neurons in hidden layer: 100
(3) Non-linearity in the layer : Relu
(4) Use more than 1 neuron in the output layer. Use a suitable threshold value
Use appropriate Optimisation algorithm

In [None]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# Load and preprocess data
iris = datasets.load_iris()
X = iris.data
y = iris.target.reshape(-1, 1)

encoder = OneHotEncoder(sparse_output=False)
y = encoder.fit_transform(y)

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize parameters
def initialize_parameters(input_size, hidden_size, output_size):
    W1 = np.random.randn(input_size, hidden_size)
    b1 = np.zeros((1, hidden_size))
    W2 = np.random.randn(hidden_size, output_size)
    b2 = np.zeros((1, output_size))
    return W1, b1, W2, b2

# Activation functions
def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return (Z > 0).astype(float)

def softmax(Z):
    exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)

# Forward pass
def forward_pass(X, W1, b1, W2, b2):
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

# Compute loss
def compute_loss(y_true, A2):
    return -np.mean(y_true * np.log(A2 + 1e-8))

# Backward pass
def backward_pass(X, y_true, W2, Z1, A1, A2):
    m = X.shape[0]
    dZ2 = A2 - y_true
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    return dW1, db1, dW2, db2

# Update parameters
def update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    return W1, b1, W2, b2

# Predict
def predict(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_pass(X, W1, b1, W2, b2)
    return np.argmax(A2, axis=1)

# Training loop
def train_network(X_train, y_train, input_size, hidden_size, output_size, learning_rate, epochs):
    W1, b1, W2, b2 = initialize_parameters(input_size, hidden_size, output_size)

    for epoch in range(epochs):
        Z1, A1, Z2, A2 = forward_pass(X_train, W1, b1, W2, b2)
        loss = compute_loss(y_train, A2)

        dW1, db1, dW2, db2 = backward_pass(X_train, y_train, W2, Z1, A1, A2)
        W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)

        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss:.4f}")

    return W1, b1, W2, b2

# Train the model
input_size = 4
hidden_size = 100
output_size = 3
learning_rate = 0.01
epochs = 1000

W1, b1, W2, b2 = train_network(X_train, y_train, input_size, hidden_size, output_size, learning_rate, epochs)

# Evaluate the model
predictions = predict(X_test, W1, b1, W2, b2)
y_test_labels = np.argmax(y_test, axis=1)
accuracy = np.mean(predictions == y_test_labels)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Epoch 0, Loss: 2.6277
Epoch 100, Loss: 0.0973
Epoch 200, Loss: 0.0667
Epoch 300, Loss: 0.0532
Epoch 400, Loss: 0.0450
Epoch 500, Loss: 0.0394
Epoch 600, Loss: 0.0349
Epoch 700, Loss: 0.0313
Epoch 800, Loss: 0.0286
Epoch 900, Loss: 0.0262
Test Accuracy: 93.33%
