In [7]:
import numpy as np
import matplotlib.pyplot as plt


In [8]:
# Layer base class
class Layer:
    def __init__(self):
        pass

    def forward(self, input):
        pass

    def backward(self, output_gradient):
        pass

In [9]:
# Dense layer
class Dense(Layer):
    def __init__(self, input_size, output_size, learning_rate=0.01):
        super().__init__()
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.biases = np.zeros((1, output_size))
        self.learning_rate = learning_rate

    def forward(self, input):
        self.input = input
        return np.dot(input, self.weights) + self.biases

    def backward(self, output_gradient):
        weights_gradient = np.dot(self.input.T, output_gradient)
        input_gradient = np.dot(output_gradient, self.weights.T)
        self.weights -= self.learning_rate * weights_gradient
        self.biases -= self.learning_rate * np.sum(output_gradient, axis=0, keepdims=True)
        return input_gradient

# Activation layer with Relu 
class ReLu(Layer):
    def __init__(self, activation, activation_derivative):
        super().__init__()
        self.activation = activation
        self.activation_derivative = activation_derivative

    def forward(self, input):
        self.input = input
        return self.activation(input)
        
    def backward(self, output_gradient):
        return np.multiply(output_gradient, self.activation_derivative(self.input))
            
    def relu(x):
        return np.maximum(0, x)

    def relu_derivative(x):
        return np.where(x > 0, 1, 0)

In [10]:
class NeuralNetwork:
    def __init__(self, layers=None):
        self.layers = layers if layers is not None else []
    
    def add(self, layer):
        self.layers.append(layer)
    
    def forward(self, input):
        output = input
        for layer in self.layers:
            output = layer.forward(output)
        return output
    
    def backward(self, output_gradient):
        for layer in reversed(self.layers):
            output_gradient = layer.backward(output_gradient)
        return output_gradient
    
    def train(self, x_train, y_train, epochs, batch_size=None, verbose=True):
        # Use full batch if batch_size not specified
        batch_size = x_train.shape[0] if batch_size is None else batch_size
        
        for epoch in range(epochs):
            # Shuffle data at each epoch
            indices = np.random.permutation(x_train.shape[0])
            x_shuffled, y_shuffled = x_train[indices], y_train[indices]
            
            # Process mini-batches
            for i in range(0, x_train.shape[0], batch_size):
                # Get current batch
                x_batch = x_shuffled[i:i+batch_size]
                y_batch = y_shuffled[i:i+batch_size]
                
                # Forward pass and compute error
                predictions = self.forward(x_batch)
                error = predictions - y_batch
                
                # Backward pass
                self.backward(error)
       
            # Print progress
            if verbose and epoch % 1 == 0:
                loss = np.mean(np.square(self.forward(x_train) - y_train))
                print(f"Epoch {epoch}: loss={loss:.6f}")

In [11]:
# Tests
def test_initialization():
    nn = NeuralNetwork()
    assert nn.layers == [], "Initialization failed: layers should be an empty list"

    # Softmax activation layer
class Softmax(Layer):
    def forward(self, input):
        exp_values = np.exp(input - np.max(input, axis=1, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities
        return self.output

    def backward(self, output_gradient):
        # Initialize gradient array
        input_gradient = np.zeros_like(output_gradient)

        for i, (single_output, single_output_gradient) in enumerate(zip(self.output, output_gradient)):
            # Flatten output array
            single_output = single_output.reshape(-1, 1)
            # Calculate Jacobian matrix of the softmax function
            jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)
            # Calculate the gradient
            input_gradient[i] = np.dot(jacobian_matrix, single_output_gradient)

        return input_gradient
    
def test_add_layer():
    nn = NeuralNetwork()
    layer = Dense(3, 2)
    nn.add(layer)
    
    assert nn.layers == [layer], "Adding layer failed: layer not added correctly"
    
def test_forward_pass():
    nn = NeuralNetwork()
    layer = Dense(3, 2)
    nn.add(layer)
    input_data = np.array([[1, 2, 3]])
    output_data = nn.forward(input_data)
    
    assert output_data.shape == (1, 2), "Forward pass failed: output shape not as expected"
    
def test_backward_pass():
    nn = NeuralNetwork()
    layer = Dense(3, 2)
    nn.add(layer)
    input_data = np.array([[1, 2, 3]])
    nn.forward(input_data)
    output_gradient = np.array([[1, 2]])
    input_gradient = nn.backward(output_gradient)
    
    assert input_gradient.shape == (1, 3), "Backward pass failed: gradient shape not as expected"
    
def test_training():
    nn = NeuralNetwork()
    layer = Dense(3, 2)
    activation = ReLu(ReLu.relu, ReLu.relu_derivative)
    nn.add(layer)
    nn.add(activation)
    x_train = np.array([[1, 2, 3], [4, 5, 6]])
    y_train = np.array([[0, 1], [1, 0]])
    nn.train(x_train, y_train, epochs=10, batch_size=1, verbose=True)
    loss = np.mean(np.square(nn.forward(x_train) - y_train))

    assert loss < 1, "Training failed: loss not reduced as expected"
    
if __name__ == "__main__":
    test_initialization()
    test_add_layer()
    test_forward_pass()
    test_backward_pass()
    test_training()
    print("All tests passed!")

Epoch 0: loss=0.266219
Epoch 1: loss=0.237855
Epoch 2: loss=0.248618
Epoch 3: loss=0.250086
Epoch 4: loss=0.246440
Epoch 5: loss=0.241795
Epoch 6: loss=0.215387
Epoch 7: loss=0.211570
Epoch 8: loss=0.220052
Epoch 9: loss=0.222087
All tests passed!
