In [None]:
def backward(self, x, y_true, y_pred):
    # Example gradient calculations; specifics depend on your network architecture
    # This pseudocode assumes a very simple network and loss for illustration
    
    # Calculate gradient of loss w.r.t. output of the network (dL/dy_pred)
    # Adjust this based on your actual loss calculation
    error = y_pred - y_true
    
    # Backpropagate through output layer
    dZ2 = error  # For simplicity, assuming final layer's derivative
    dW2 = np.dot(self.a1.T, dZ2) / len(y_true)  # a1 is activation from first layer
    dB2 = np.sum(dZ2, axis=0, keepdims=True) / len(y_true)
    
    # Backpropagate through first layer
    dA1 = np.dot(dZ2, self.weights2.T)
    dZ1 = dA1 * self.a1 * (1 - self.a1)  # Derivative of sigmoid
    dW1 = np.dot(x.T, dZ1) / len(y_true)
    dB1 = np.sum(dZ1, axis=0, keepdims=True) / len(y_true)
    
    return dW1, dB1, dW2, dB2


def fit(self, x_train, y_train, x_val, y_val, epochs=100, lr=0.01):
    for epoch in range(epochs):
        # Forward pass on training data
        y_pred_train = self.forward(x_train)
        loss = self.calculate_cross_entropy_loss(y_pred_train, y_train)
        
        # Backpropagation to get gradients
        grad_w1, grad_b1, grad_w2, grad_b2 = self.backward(x_train, y_train, y_pred_train)
        
        # Update weights and biases for both layers
        self.weights1 -= lr * grad_w1
        self.bias1 -= lr * grad_b1
        self.weights2 -= lr * grad_w2
        self.bias2 -= lr * grad_b2
        
        # Similar steps for validation as you've written, adjusting as needed



In [None]:
import numpy as np

class MultiLayerNetwork:
    def __init__(self, layer_dims):
        self.parameters = {}
        self.L = len(layer_dims) - 1  # Number of layers excluding input layer
        
        # Initialize weights and biases for each layer
        for l in range(1, self.L + 1):
            self.parameters[f'W{l}'] = np.random.randn(layer_dims[l-1], layer_dims[l]) * np.sqrt(2 / layer_dims[l-1])
            self.parameters[f'b{l}'] = np.zeros((1, layer_dims[l]))
    
    def activation_sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def activation_softmax(self, z):
        e_x = np.exp(z - np.max(z, keepdims=True))
        return e_x / np.sum(e_x, axis=1, keepdims=True)
    
    def forward(self, x):
        A = x
        for l in range(1, self.L):
            Z = np.dot(A, self.parameters[f'W{l}']) + self.parameters[f'b{l}']
            A = self.activation_sigmoid(Z)
        
        # Softmax applied in the last layer
        ZL = np.dot(A, self.parameters[f'W{self.L}']) + self.parameters[f'b{self.L}']
        AL = self.activation_softmax(ZL)
        return AL
    
    # Dummy backward function for example
    def backward(self, x, y_true, y_pred):
        # This would be where you implement backpropagation
        # For simplicity, we're just returning dummy gradients
        return np.ones_like(self.parameters['W1']), np.ones_like(self.parameters['b1'])
    
    # Adjusted fit method with update mechanism
    def fit(self, x_train, y_train, x_val, y_val, epochs=100, lr=0.01):
        for epoch in range(epochs):
            y_pred_train = self.forward(x_train)
            loss = self.calculate_cross_entropy_loss(y_pred_train, y_train)
            # Update mechanism example for a single layer
            grad_weight, grad_bias = self.backward(x_train, y_train, y_pred_train)
            
            for l in range(1, self.L + 1):
                self.parameters[f'W{l}'] -= lr * grad_weight  # Dummy update
                self.parameters[f'b{l}'] -= lr * grad_bias    # Dummy update
            
            # The accuracy calculation and printing logic remains the same...
    
    def calculate_cross_entropy_loss(self, y_pred, y_true):
        y_pred = np.clip(y_pred, 1e-10, 1-1e-10)
        loss = -np.sum(y_true * np.log(y_pred)) / len(y_true)
        return loss
