In [47]:
import numpy as np

class NeuralNetwork:
    def __init__(self, num_layers, neurons_per_layer, learning_rate=0.01):
        self.num_layers = num_layers
        self.neurons_per_layer = neurons_per_layer
        self.weights = []
        self.biases = []
        self.learning_rate = learning_rate  # Initialize learning rate
        self.init_weights_and_biases()
        
    def init_weights_and_biases(self):
        for i in range(1, len(self.neurons_per_layer)):
            weight = np.random.randn(self.neurons_per_layer[i], self.neurons_per_layer[i-1]) * 0.01
            bias = np.zeros((self.neurons_per_layer[i], 1))
            self.weights.append(weight)
            self.biases.append(bias)
    
    def relu(self, Z):
        return np.maximum(0, Z)
    
    def relu_derivative(self, Z):
        return Z > 0
    
    def compute_loss(self, y_true, scores):
        num_samples = scores.shape[1]
        correct_class_scores = scores[y_true, np.arange(num_samples)]
        margins = np.maximum(0, scores - correct_class_scores + 1)  # Delta = 1
        margins[y_true, np.arange(num_samples)] = 0
        loss = np.sum(margins) / num_samples
        return loss
    
    def forward_propagation(self, X):
        cache = {"A0": X}
        A = X
        for i, (w, b) in enumerate(zip(self.weights, self.biases), 1):
            Z = np.dot(w, A) + b
            A = self.relu(Z)
            cache["Z" + str(i)] = Z
            cache["A" + str(i)] = A
        return A, cache
    
    def backward_propagation(self, y_true, cache):
        num_samples = cache["A0"].shape[1]
        scores = cache["A" + str(self.num_layers - 1)]
        grad_scores = np.zeros_like(scores)

        # Correct class scores calculation fix
        for i in range(num_samples):
            correct_class_score = scores[y_true[i], i]
            for j in range(scores.shape[0]):
                if j == y_true[i]:
                    continue
                margin = scores[j, i] - correct_class_score + 1  # Delta = 1
                if margin > 0:
                    grad_scores[j, i] += 1
                    grad_scores[y_true[i], i] -= 1

        # Backpropagate the gradient to the weights and biases
        grad_weights = [np.zeros_like(w) for w in self.weights]
        grad_biases = [np.zeros_like(b) for b in self.biases]
        grad_A = grad_scores
        for l in reversed(range(self.num_layers - 1)):
            grad_Z = grad_A * self.relu_derivative(cache["Z" + str(l + 1)])
            grad_A = np.dot(self.weights[l].T, grad_Z)
            grad_weights[l] = np.dot(grad_Z, cache["A" + str(l)].T) / num_samples
            grad_biases[l] = np.sum(grad_Z, axis=1, keepdims=True) / num_samples

            # Update weights and biases based on gradient
            self.weights[l] -= self.learning_rate * grad_weights[l]
            self.biases[l] -= self.learning_rate * grad_biases[l]

    def predict(self, X):
        scores, _ = self.forward_propagation(X)
        predictions = np.argmax(scores, axis=0)
        return predictions
    
    def fit(self, X, y, epochs):
        for epoch in range(epochs):
            scores, cache = self.forward_propagation(X)
            loss = self.compute_loss(y, scores)
            self.backward_propagation(y, cache)
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")


In [48]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Step 3: Load the Iris dataset
X, y = load_iris(return_X_y=True)

# Step 4: Preprocess the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Assuming you have the NeuralNetwork class defined and ready
# Step 5: Initialize the neural network
# Note: Adjust the number of neurons in the layers according to your needs
nn = NeuralNetwork(num_layers=3, neurons_per_layer=[4, 10, 3], learning_rate = 0.1)

# Step 6: Train the neural network
nn.fit(X_train_scaled.T, y_train, epochs=2000)


# Step 7: Evaluate the performance
predictions = nn.predict(X_test_scaled.T)
accuracy = accuracy_score(y_test, predictions)
print(f"Test set accuracy: {accuracy}")


Epoch 0, Loss: 1.999759284784261
Epoch 100, Loss: 0.9527250348308439
Epoch 200, Loss: 0.7189462420704354
Epoch 300, Loss: 0.7052370547438914
Epoch 400, Loss: 0.7014653376488726
Epoch 500, Loss: 0.7000808716581571
Epoch 600, Loss: 0.6985573354839751
Epoch 700, Loss: 0.6968792731300595
Epoch 800, Loss: 0.6950774178858458
Epoch 900, Loss: 0.6930381790604491
Epoch 1000, Loss: 0.6912879359349624
Epoch 1100, Loss: 0.6901691471110138
Epoch 1200, Loss: 0.6889013225228344
Epoch 1300, Loss: 0.6872990696392987
Epoch 1400, Loss: 0.6859247639732898
Epoch 1500, Loss: 0.6848874867634323
Epoch 1600, Loss: 0.6837709108714443
Epoch 1700, Loss: 0.6831448318288851
Epoch 1800, Loss: 0.682108772895567
Epoch 1900, Loss: 0.6818750358176879
Test set accuracy: 0.6333333333333333
