**Implement a neural network to solve a multi-class classification problem and evaluate different
weight initialization techniques.**

Step 1: Import Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder


Step 2: Load and Prepare Iris Dataset

In [2]:
# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Convert labels to one-hot encoding for multi-class classification
encoder = OneHotEncoder()
y = encoder.fit_transform(y.reshape(-1, 1)).toarray()

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Step 3: Implement Neural Network

In [3]:
class NeuralNetwork:
    def __init__(self, input_dim, hidden_dim, output_dim):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        # Weight initialization techniques
        self.weight_init_techniques = {
            'random': self.random_init,
            'xavier': self.xavier_init,
            'he': self.he_init
        }

        # Initialize weights and biases
        self.weights1, self.bias1, self.weights2, self.bias2 = self.weight_init_techniques['random']()

    def random_init(self):
        weights1 = np.random.rand(self.input_dim, self.hidden_dim)
        bias1 = np.zeros((1, self.hidden_dim))
        weights2 = np.random.rand(self.hidden_dim, self.output_dim)
        bias2 = np.zeros((1, self.output_dim))
        return weights1, bias1, weights2, bias2

    def xavier_init(self):
        weights1 = np.random.randn(self.input_dim, self.hidden_dim) * np.sqrt(1 / self.input_dim)
        bias1 = np.zeros((1, self.hidden_dim))
        weights2 = np.random.randn(self.hidden_dim, self.output_dim) * np.sqrt(1 / self.hidden_dim)
        bias2 = np.zeros((1, self.output_dim))
        return weights1, bias1, weights2, bias2

    def he_init(self):
        weights1 = np.random.randn(self.input_dim, self.hidden_dim) * np.sqrt(2 / self.input_dim)
        bias1 = np.zeros((1, self.hidden_dim))
        weights2 = np.random.randn(self.hidden_dim, self.output_dim) * np.sqrt(2 / self.hidden_dim)
        bias2 = np.zeros((1, self.output_dim))
        return weights1, bias1, weights2, bias2

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def relu(self, x):
        return np.maximum(x, 0)

    def softmax(self, x):
        exp_x = np.exp(x)
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def forward(self, X):
        self.hidden_layer = np.dot(X, self.weights1) + self.bias1
        self.hidden_layer = self.relu(self.hidden_layer)

        self.output_layer = np.dot(self.hidden_layer, self.weights2) + self.bias2
        self.output_layer = self.softmax(self.output_layer)

        return self.output_layer

    def cross_entropy_loss(self, predictions, labels):
        return -np.mean(np.sum(labels * np.log(predictions), axis=1))

    def train(self, X, y, learning_rate=0.01, epochs=1000):
        for epoch in range(epochs):
            predictions = self.forward(X)
            loss = self.cross_entropy_loss(predictions, y)

            # Backpropagation
            d_output = predictions - y
            d_weights2 = np.dot(self.hidden_layer.T, d_output)
            d_bias2 = np.sum(d_output, axis=0, keepdims=True)

            d_hidden_layer = d_output.dot(self.weights2.T) * (self.hidden_layer > 0).astype(int)
            d_weights1 = np.dot(X.T, d_hidden_layer)
            d_bias1 = np.sum(d_hidden_layer, axis=0, keepdims=True)

            # Update weights and biases
            self.weights1 -= learning_rate * d_weights1
            self.bias1 -= learning_rate * d_bias1
            self.weights2 -= learning_rate * d_weights2
            self.bias2 -= learning_rate * d_bias2

            if epoch % 100 == 0:
                print(f'Epoch {epoch+1}, Loss: {loss}')

    def evaluate(self, X, y):
        predictions = self.forward(X)
        predicted_classes = np.argmax(predictions, axis=1)
        actual_classes = np.argmax(y, axis=1)
        accuracy = np.mean(predicted_classes == actual_classes)
        return accuracy


Step 4: Evaluate Different Weight Initialization Techniques

In [4]:
# Initialize neural networks with different weight initialization techniques
nn_random = NeuralNetwork(4, 10, 3)
nn_xavier = NeuralNetwork(4, 10, 3)
nn_he = NeuralNetwork(4, 10, 3)

# Set different initialization techniques
nn_random.weights1, nn_random.bias1, nn_random.weights2, nn_random.bias2 = nn_random.weight_init_techniques['random']()
nn_xavier.weights1, nn_xavier.bias1, nn_xavier.weights2, nn_xavier.bias2 = nn_xavier.weight_init_techniques['xavier']()
nn_he.weights1, nn_he.bias1, nn_he.weights2, nn_he.bias2 = nn_he.weight_init_techniques['he']()

# Train neural networks
nn_random.train(X_train, y_train)
nn_xavier.train(X_train, y_train)
nn_he.train(X_train, y_train)

# Evaluate neural networks
accuracy_random = nn_random.evaluate(X_test, y_test)
accuracy_xavier = nn_xavier.evaluate(X_test, y_test)
accuracy_he = nn_he.evaluate(X_test, y_test)

print(f"Random Initialization Accuracy: {accuracy_random}")
print(f"Xavier Initialization Accuracy: {accuracy_xavier}")
print(f"He Initialization Accuracy: {accuracy_he}")


Epoch 1, Loss: 15.667858768002098
Epoch 101, Loss: 1.0984039336279603
Epoch 201, Loss: 1.0984039336279603
Epoch 301, Loss: 1.0984039336279603
Epoch 401, Loss: 1.0984039336279603
Epoch 501, Loss: 1.0984039336279603
Epoch 601, Loss: 1.0984039336279603
Epoch 701, Loss: 1.0984039336279603
Epoch 801, Loss: 1.0984039336279603
Epoch 901, Loss: 1.0984039336279603
Epoch 1, Loss: 5.0731876128667865
Epoch 101, Loss: 1.0984039336279603
Epoch 201, Loss: 1.0984039336279603
Epoch 301, Loss: 1.0984039336279603
Epoch 401, Loss: 1.0984039336279603
Epoch 501, Loss: 1.0984039336279603
Epoch 601, Loss: 1.0984039336279603
Epoch 701, Loss: 1.0984039336279603
Epoch 801, Loss: 1.0984039336279603
Epoch 901, Loss: 1.0984039336279603
Epoch 1, Loss: 2.2797051965483432
Epoch 101, Loss: 1.0984039336279603
Epoch 201, Loss: 1.0984039336279603
Epoch 301, Loss: 1.0984039336279603
Epoch 401, Loss: 1.0984039336279603
Epoch 501, Loss: 1.0984039336279603
Epoch 601, Loss: 1.0984039336279603
Epoch 701, Loss: 1.098403933627960