# Lab 3: Neural Networks from Scratch

**Day 2 - Deep Learning**

| Duration | Difficulty | Prerequisites |
|----------|------------|---------------|
| 90 min | Intermediate | Labs 1-2 |

## Learning Objectives

- Understand neuron and layer structure
- Implement activation functions
- Build forward propagation
- Understand gradient descent basics

In [None]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)

---

## Exercise 1: Activation Functions

Activation functions introduce non-linearity, allowing neural networks to learn complex patterns.

**Your Task:** Implement common activation functions.

In [None]:
def sigmoid(x):
    """
    Sigmoid activation: f(x) = 1 / (1 + e^(-x))
    Output range: (0, 1)
    """
    # TODO: Implement sigmoid
    pass

In [None]:
def tanh(x):
    """
    Tanh activation: f(x) = (e^x - e^(-x)) / (e^x + e^(-x))
    Output range: (-1, 1)
    """
    # TODO: Implement tanh (hint: use np.tanh)
    pass

In [None]:
def relu(x):
    """
    ReLU activation: f(x) = max(0, x)
    Output range: [0, inf)
    """
    # TODO: Implement ReLU
    pass

In [None]:
def leaky_relu(x, alpha=0.01):
    """
    Leaky ReLU: f(x) = x if x > 0, else alpha * x
    """
    # TODO: Implement Leaky ReLU
    pass

In [None]:
# Visualize activation functions
x = np.linspace(-5, 5, 100)

plt.figure(figsize=(12, 3))
activations = [('Sigmoid', sigmoid), ('Tanh', tanh), ('ReLU', relu), ('Leaky ReLU', leaky_relu)]

for i, (name, func) in enumerate(activations):
    plt.subplot(1, 4, i + 1)
    if func(x) is not None:
        plt.plot(x, func(x), linewidth=2)
        plt.axhline(y=0, color='k', linewidth=0.5)
        plt.axvline(x=0, color='k', linewidth=0.5)
    plt.title(name)
    plt.xlabel('x')
    plt.ylabel('f(x)')
    plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

---

## Exercise 2: Single Neuron

A neuron computes: output = activation(sum(inputs * weights) + bias)

**Your Task:** Implement a single neuron.

In [None]:
class Neuron:
    def __init__(self, n_inputs, activation='sigmoid'):
        """
        Initialize a neuron with random weights and bias.
        
        Args:
            n_inputs: Number of input features
            activation: 'sigmoid', 'relu', or 'tanh'
        """
        # TODO: Initialize weights randomly (small values)
        # Hint: np.random.randn(n_inputs) * 0.1
        self.weights = None
        
        # TODO: Initialize bias to 0
        self.bias = None
        
        # Store activation function
        self.activation_name = activation
        self.activation = {'sigmoid': sigmoid, 'relu': relu, 'tanh': tanh}[activation]
    
    def forward(self, inputs):
        """
        Compute the output of the neuron.
        
        1. Calculate weighted sum: z = sum(inputs * weights) + bias
        2. Apply activation: output = activation(z)
        """
        # TODO: Calculate weighted sum (z)
        # Hint: np.dot(inputs, self.weights) + self.bias
        z = None
        
        # TODO: Apply activation function
        output = None
        
        return output

In [None]:
# Test Exercise 2
neuron = Neuron(n_inputs=3, activation='sigmoid')
inputs = np.array([1.0, 2.0, 3.0])

if neuron.weights is not None:
    print(f"Weights: {neuron.weights}")
    print(f"Bias: {neuron.bias}")
    output = neuron.forward(inputs)
    print(f"Output: {output}")
else:
    print("Implement the Neuron class")

---

## Exercise 3: Dense Layer

A dense (fully connected) layer contains multiple neurons.

**Your Task:** Implement a dense layer.

In [None]:
class DenseLayer:
    def __init__(self, n_inputs, n_neurons, activation='relu'):
        """
        Initialize a dense layer.
        
        Args:
            n_inputs: Number of input features
            n_neurons: Number of neurons in this layer
            activation: Activation function name
        """
        # TODO: Initialize weight matrix (n_inputs x n_neurons)
        # Use Xavier initialization: * np.sqrt(2.0 / n_inputs)
        self.weights = None
        
        # TODO: Initialize bias vector (n_neurons,)
        self.biases = None
        
        self.activation_name = activation
        self.activation = {'sigmoid': sigmoid, 'relu': relu, 'tanh': tanh, 'none': lambda x: x}[activation]
    
    def forward(self, inputs):
        """
        Forward pass through the layer.
        
        inputs shape: (batch_size, n_inputs) or (n_inputs,)
        output shape: (batch_size, n_neurons) or (n_neurons,)
        """
        # TODO: Calculate z = inputs @ weights + biases
        z = None
        
        # TODO: Apply activation
        output = None
        
        return output

In [None]:
# Test Exercise 3
layer = DenseLayer(n_inputs=4, n_neurons=3, activation='relu')

if layer.weights is not None:
    print(f"Weights shape: {layer.weights.shape}")
    print(f"Biases shape: {layer.biases.shape}")
    
    # Single sample
    single_input = np.array([1.0, 2.0, 3.0, 4.0])
    output = layer.forward(single_input)
    print(f"Single input output: {output}")
    
    # Batch of samples
    batch_input = np.random.randn(5, 4)
    batch_output = layer.forward(batch_input)
    print(f"Batch output shape: {batch_output.shape}")
else:
    print("Implement the DenseLayer class")

---

## Exercise 4: Simple Neural Network

**Your Task:** Stack layers to create a neural network.

In [None]:
class SimpleNeuralNetwork:
    def __init__(self, layer_sizes, activations):
        """
        Create a neural network with given architecture.
        
        Args:
            layer_sizes: List of layer sizes [input, hidden1, hidden2, ..., output]
            activations: List of activation functions for each layer (except input)
        
        Example:
            layer_sizes = [4, 8, 4, 1]  # 4 inputs, 2 hidden layers, 1 output
            activations = ['relu', 'relu', 'sigmoid']
        """
        self.layers = []
        
        # TODO: Create layers
        # For i in range(len(layer_sizes) - 1):
        #   Create DenseLayer(layer_sizes[i], layer_sizes[i+1], activations[i])
        #   Append to self.layers
        pass
    
    def forward(self, x):
        """
        Forward pass through all layers.
        """
        # TODO: Pass input through each layer sequentially
        pass
    
    def summary(self):
        """Print network architecture."""
        print("Neural Network Summary")
        print("=" * 50)
        total_params = 0
        for i, layer in enumerate(self.layers):
            n_params = layer.weights.size + layer.biases.size
            total_params += n_params
            print(f"Layer {i+1}: {layer.weights.shape[0]} -> {layer.weights.shape[1]} ({layer.activation_name})")
            print(f"         Parameters: {n_params}")
        print("=" * 50)
        print(f"Total parameters: {total_params}")

In [None]:
# Test Exercise 4
nn = SimpleNeuralNetwork(
    layer_sizes=[4, 8, 4, 1],
    activations=['relu', 'relu', 'sigmoid']
)

if len(nn.layers) > 0:
    nn.summary()
    
    # Test forward pass
    test_input = np.random.randn(10, 4)
    output = nn.forward(test_input)
    print(f"\nInput shape: {test_input.shape}")
    print(f"Output shape: {output.shape}")
    print(f"Output range: [{output.min():.4f}, {output.max():.4f}]")
else:
    print("Implement SimpleNeuralNetwork")

---

## Exercise 5: Loss Functions

Loss functions measure how wrong our predictions are.

**Your Task:** Implement common loss functions.

In [None]:
def mse_loss(y_true, y_pred):
    """
    Mean Squared Error for regression.
    
    MSE = mean((y_true - y_pred)^2)
    """
    # TODO: Implement MSE loss
    pass

In [None]:
def binary_cross_entropy(y_true, y_pred, epsilon=1e-15):
    """
    Binary Cross-Entropy for classification.
    
    BCE = -mean(y_true * log(y_pred) + (1 - y_true) * log(1 - y_pred))
    
    Note: Clip y_pred to avoid log(0)
    """
    # TODO: Clip predictions to [epsilon, 1-epsilon]
    y_pred_clipped = None
    
    # TODO: Calculate BCE
    loss = None
    
    return loss

In [None]:
# Test Exercise 5
y_true = np.array([1, 0, 1, 1, 0])
y_pred = np.array([0.9, 0.1, 0.8, 0.7, 0.2])

mse = mse_loss(y_true, y_pred)
bce = binary_cross_entropy(y_true, y_pred)

print(f"MSE Loss: {mse}")
print(f"BCE Loss: {bce}")

---

## Exercise 6: Gradient Descent Visualization

**Your Task:** Understand how gradient descent finds the minimum.

In [None]:
def gradient_descent_demo(learning_rate=0.1, n_iterations=50):
    """
    Demonstrate gradient descent on f(x) = x^2.
    
    Derivative: df/dx = 2x
    Update rule: x_new = x - learning_rate * gradient
    """
    # Function and its derivative
    f = lambda x: x ** 2
    df = lambda x: 2 * x
    
    # Starting point
    x = 4.0
    history = [(x, f(x))]
    
    # TODO: Implement gradient descent loop
    for i in range(n_iterations):
        # TODO: Calculate gradient
        gradient = None
        
        # TODO: Update x
        # x = x - learning_rate * gradient
        
        # Record history
        if gradient is not None:
            history.append((x, f(x)))
    
    return history

In [None]:
# Test and visualize
history = gradient_descent_demo(learning_rate=0.1, n_iterations=30)

if len(history) > 1:
    plt.figure(figsize=(12, 4))
    
    # Plot 1: Function with descent path
    plt.subplot(1, 2, 1)
    x_range = np.linspace(-5, 5, 100)
    plt.plot(x_range, x_range ** 2, 'b-', label='f(x) = x^2')
    
    xs, ys = zip(*history)
    plt.plot(xs, ys, 'ro-', markersize=5, label='Gradient descent')
    plt.xlabel('x')
    plt.ylabel('f(x)')
    plt.title('Gradient Descent Path')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Plot 2: Loss over iterations
    plt.subplot(1, 2, 2)
    plt.plot(ys, 'g-o')
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Loss Over Time')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print(f"Final x: {xs[-1]:.6f}")
    print(f"Final loss: {ys[-1]:.6f}")
else:
    print("Implement gradient_descent_demo()")

---

## Checkpoint

Congratulations! You've completed Lab 3.

### Key Takeaways:
- Neurons compute weighted sums + activation
- Activation functions add non-linearity
- Layers stack neurons; networks stack layers
- Loss functions measure prediction error
- Gradient descent minimizes loss

**Next:** Lab 4 - PyTorch Fundamentals