# Lab 3

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import matplotlib.pyplot as plt

# Load the Iris dataset
data = load_iris()
X = data.data  # Features
y = data.target  # Labels

# One-hot encode labels
encoder = OneHotEncoder(sparse=False)
y = encoder.fit_transform(y.reshape(-1, 1))

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)




In [2]:
# Activation Functions
def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return Z > 0

def softmax(Z):
    exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)


In [3]:
# Cross-entropy loss function
def cross_entropy_loss(y_true, y_pred):
    n_samples = y_true.shape[0]
    logp = - np.log(y_pred[range(n_samples), np.argmax(y_true, axis=1)])
    loss = np.sum(logp) / n_samples
    return loss

# Accuracy
def accuracy(y_true, y_pred):
    return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))


In [4]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        # Initialize weights
        self.W1 = np.random.randn(input_size, hidden_size1) * 0.01
        self.b1 = np.zeros((1, hidden_size1))
        self.W2 = np.random.randn(hidden_size1, hidden_size2) * 0.01
        self.b2 = np.zeros((1, hidden_size2))
        self.W3 = np.random.randn(hidden_size2, output_size) * 0.01
        self.b3 = np.zeros((1, output_size))
    
    def forward(self, X):
        # Forward propagation
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = relu(self.Z1)
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = relu(self.Z2)
        self.Z3 = np.dot(self.A2, self.W3) + self.b3
        self.A3 = softmax(self.Z3)
        return self.A3
    
    def backward(self, X, y, y_pred):
        # Backward propagation (manually computed gradients)
        m = y.shape[0]

        # Output layer gradient
        dZ3 = y_pred - y
        self.dW3 = np.dot(self.A2.T, dZ3) / m
        self.db3 = np.sum(dZ3, axis=0, keepdims=True) / m

        # Second hidden layer gradient
        dA2 = np.dot(dZ3, self.W3.T)
        dZ2 = dA2 * relu_derivative(self.Z2)
        self.dW2 = np.dot(self.A1.T, dZ2) / m
        self.db2 = np.sum(dZ2, axis=0, keepdims=True) / m

        # First hidden layer gradient
        dA1 = np.dot(dZ2, self.W2.T)
        dZ1 = dA1 * relu_derivative(self.Z1)
        self.dW1 = np.dot(X.T, dZ1) / m
        self.db1 = np.sum(dZ1, axis=0, keepdims=True) / m


In [6]:
class Optimizer:
    def __init__(self, nn, learning_rate=0.01, optimizer='sgd'):
        self.nn = nn
        self.lr = learning_rate
        self.optimizer = optimizer
        
        # Adam optimizer variables
        if optimizer == 'adam':
            self.mW1 = np.zeros_like(nn.W1)
            self.vW1 = np.zeros_like(nn.W1)
            self.mW2 = np.zeros_like(nn.W2)
            self.vW2 = np.zeros_like(nn.W2)
            self.mW3 = np.zeros_like(nn.W3)
            self.vW3 = np.zeros_like(nn.W3)
            self.mb1 = np.zeros_like(nn.b1)
            self.vb1 = np.zeros_like(nn.b1)
            self.mb2 = np.zeros_like(nn.b2)
            self.vb2 = np.zeros_like(nn.b2)
            self.mb3 = np.zeros_like(nn.b3)
            self.vb3 = np.zeros_like(nn.b3)
            self.beta1 = 0.9
            self.beta2 = 0.999
            self.eps = 1e-8
            self.t = 0
    
    def update(self):
        if self.optimizer == 'sgd':
            # SGD updates
            self.nn.W1 -= self.lr * self.nn.dW1
            self.nn.b1 -= self.lr * self.nn.db1
            self.nn.W2 -= self.lr * self.nn.dW2
            self.nn.b2 -= self.lr * self.nn.db2
            self.nn.W3 -= self.lr * self.nn.dW3
            self.nn.b3 -= self.lr * self.nn.db3
        
        elif self.optimizer == 'adam':
            # Adam updates
            self.t += 1
            self._adam_update('W1', 'b1')
            self._adam_update('W2', 'b2')
            self._adam_update('W3', 'b3')
    
    def _adam_update(self, weight, bias):
        W = getattr(self.nn, weight)
        dW = getattr(self.nn, 'd' + weight)
        mW = getattr(self, 'm' + weight)
        vW = getattr(self, 'v' + weight)
        
        b = getattr(self.nn, bias)
        db = getattr(self.nn, 'd' + bias)
        mb = getattr(self, 'm' + bias)
        vb = getattr(self, 'v' + bias)
        
        # Update biased moments
        mW = self.beta1 * mW + (1 - self.beta1) * dW
        vW = self.beta2 * vW + (1 - self.beta2) * (dW ** 2)
        mb = self.beta1 * mb + (1 - self.beta1) * db
        vb = self.beta2 * vb + (1 - self.beta2) * (db ** 2)
        
        # Bias correction
        mW_hat = mW / (1 - self.beta1 ** self.t)
        vW_hat = vW / (1 - self.beta2 ** self.t)
        mb_hat = mb / (1 - self.beta1 ** self.t)
        vb_hat = vb / (1 - self.beta2 ** self.t)
        
        # Update parameters
        W -= self.lr * mW_hat / (np.sqrt(vW_hat) + self.eps)
        b -= self.lr * mb_hat / (np.sqrt(vb_hat) + self.eps)
        
        # Save updated moments
        setattr(self, 'm' + weight, mW)
        setattr(self, 'v' + weight, vW)
        setattr(self, 'm' + bias, mb)
        setattr(self, 'v' + bias, vb)


In [7]:
def train_model(X_train, y_train, X_test, y_test, optimizer='sgd', epochs=100, lr=0.01):
    # Initialize the neural network
    input_size = X_train.shape[1]
    hidden_size1 = 64
    hidden_size2 = 32
    output_size = y_train.shape[1]
    nn = NeuralNetwork(input_size, hidden_size1, hidden_size2, output_size)
    
    # Initialize the optimizer
    opt = Optimizer(nn, learning_rate=lr, optimizer=optimizer)
    
    train_loss = []
    test_loss = []
    for epoch in range(epochs):
        # Forward pass
        y_pred = nn.forward(X_train)
        
        # Compute loss
        loss = cross_entropy_loss(y_train, y_pred)
        train_loss.append(loss)
        
        # Backward pass
        nn.backward(X_train, y_train, y_pred)
        
        # Update weights
        opt.update()
        
        # Test loss
        y_test_pred = nn.forward(X_test)
        loss_test = cross_entropy_loss(y_test, y_test_pred)
        test_loss.append(loss_test)
        if epoch % 10 == 0:
            acc = accuracy(y_test, y_test_pred)
            print(f'Epoch {epoch}, Loss: {loss:.4f}, Test Loss: {loss_test:.4f}, Test Accuracy: {acc:.4f}')

    return train_loss, test_loss

