<a href="https://colab.research.google.com/github/mehmetgul/artificial_intelligence/blob/main/CS7375_01_prog_assign_3_Spring_2025.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import numpy as np
import time
from sklearn import datasets
from sklearn.preprocessing import OneHotEncoder

# -----------------------------
# Neural Network Implementation
# -----------------------------
class NeuralNetwork:
    def __init__(self, layer_sizes, learning_rate=0.1, epochs=1000):
        self.layer_sizes = layer_sizes
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = []
        self.biases = []
        # Initialize weights and biases for each layer
        for i in range(len(layer_sizes) - 1):
            # Using He initialization for weights
            W = np.random.randn(layer_sizes[i], layer_sizes[i + 1]) * np.sqrt(2 / layer_sizes[i])
            self.weights.append(W)
            b = np.zeros((1, layer_sizes[i + 1]))
            self.biases.append(b)

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoid_derivative(self, a):
        return a * (1 - a)

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def forward(self, X):
        activations = [X]
        zs = []  # list to store all z vectors per layer
        # Hidden layers (using sigmoid)
        for i in range(len(self.weights) - 1):
            z = np.dot(activations[-1], self.weights[i]) + self.biases[i]
            zs.append(z)
            a = self.sigmoid(z)
            activations.append(a)
        # Output layer (using softmax)
        z = np.dot(activations[-1], self.weights[-1]) + self.biases[-1]
        zs.append(z)
        a = self.softmax(z)
        activations.append(a)
        return activations, zs

    def backward(self, activations, zs, Y):
        grads_w = [None] * len(self.weights)
        grads_b = [None] * len(self.biases)
        # Compute error at the output layer
        delta = activations[-1] - Y  # (softmax with cross-entropy loss simplifies the gradient)
        grads_w[-1] = np.dot(activations[-2].T, delta) / Y.shape[0]
        grads_b[-1] = np.sum(delta, axis=0, keepdims=True) / Y.shape[0]

        # Backpropagate the error through hidden layers
        for l in range(2, len(self.layer_sizes)):
            z = zs[-l]
            # 'a' is already computed for layer l (activation)
            a = activations[-l]
            sp = self.sigmoid_derivative(a)
            delta = np.dot(delta, self.weights[-l + 1].T) * sp
            grads_w[-l] = np.dot(activations[-l - 1].T, delta) / Y.shape[0]
            grads_b[-l] = np.sum(delta, axis=0, keepdims=True) / Y.shape[0]
        return grads_w, grads_b

    def train(self, X, Y):
        for epoch in range(self.epochs):
            activations, zs = self.forward(X)
            grads_w, grads_b = self.backward(activations, zs, Y)
            # Update weights and biases
            for i in range(len(self.weights)):
                self.weights[i] -= self.learning_rate * grads_w[i]
                self.biases[i] -= self.learning_rate * grads_b[i]

    def predict(self, X):
        activations, _ = self.forward(X)
        predictions = np.argmax(activations[-1], axis=1)
        return predictions

    def accuracy(self, X, Y_true):
        predictions = self.predict(X)
        Y_true_labels = np.argmax(Y_true, axis=1)
        return np.mean(predictions == Y_true_labels)

# -----------------------------
# Main Function
# -----------------------------
def main():
    # Load the Iris dataset
    iris = datasets.load_iris()
    data = iris.data
    target = iris.target.reshape(-1, 1)

    # One-hot encode the target labels
    encoder = OneHotEncoder(sparse_output=False)
    target_onehot = encoder.fit_transform(target)

    # Split the data: first half of each class for training, second half for testing
    # Iris dataset is ordered by class: first 50 samples = class 0, next 50 = class 1, last 50 = class 2.
    train_idx = list(range(25)) + list(range(50, 75)) + list(range(100, 125))
    test_idx = list(range(25, 50)) + list(range(75, 100)) + list(range(125, 150))
    X_train = data[train_idx]
    Y_train = target_onehot[train_idx]
    X_test = data[test_idx]
    Y_test = target_onehot[test_idx]

    # Define network architectures
    arch1 = [4, 3, 3]      # ANN-1: 4-3-3
    arch2 = [4, 6, 3]      # ANN-2: 4-6-3
    arch3 = [4, 5, 7, 3]   # ANN-3: 4-5-7-3

    # Initialize the networks with a chosen learning rate and epochs
    nn1 = NeuralNetwork(arch1, learning_rate=0.1, epochs=1000)
    nn2 = NeuralNetwork(arch2, learning_rate=0.1, epochs=1000)
    nn3 = NeuralNetwork(arch3, learning_rate=0.1, epochs=1000)

    # Train the networks and measure training time
    start = time.time()
    nn1.train(X_train, Y_train)
    t1 = time.time() - start

    start = time.time()
    nn2.train(X_train, Y_train)
    t2 = time.time() - start

    start = time.time()
    nn3.train(X_train, Y_train)
    t3 = time.time() - start

    # Compute training and testing accuracies
    acc_train_1 = nn1.accuracy(X_train, Y_train)
    acc_test_1  = nn1.accuracy(X_test, Y_test)

    acc_train_2 = nn2.accuracy(X_train, Y_train)
    acc_test_2  = nn2.accuracy(X_test, Y_test)

    acc_train_3 = nn3.accuracy(X_train, Y_train)
    acc_test_3  = nn3.accuracy(X_test, Y_test)

    # Print the statistics table
    print("Statistics Table:")
    print("{:<20} {:<15} {:<15} {:<15}".format(" ", "ANN-1", "ANN-2", "ANN-3"))
    print("{:<20} {:<15.4f} {:<15.4f} {:<15.4f}".format("Training Accuracy", acc_train_1, acc_train_2, acc_train_3))
    print("{:<20} {:<15.4f} {:<15.4f} {:<15.4f}".format("Testing Accuracy", acc_test_1, acc_test_2, acc_test_3))
    print("{:<20} {:<15.4f} {:<15.4f} {:<15.4f}".format("Training Time (s)", t1, t2, t3))

if __name__ == "__main__":
    main()

Statistics Table:
                     ANN-1           ANN-2           ANN-3          
Training Accuracy    0.0000          1.0000          0.9867         
Testing Accuracy     0.0000          0.9467          0.9867         
Training Time (s)    0.0801          0.0853          0.1354         
