# Module 1 - Exercise 3: First Step with MLP

## Learning Objectives
- Understand the structure of nn.Linear layers (input and output dimensions)
- Learn how to use basic activation functions (ReLU, Sigmoid, Tanh)
- Build simple neural networks using nn.Sequential
- Calculate the number of parameters in a neural network
- Perform forward pass operations through the network

## Test Framework Setup

In [None]:
# Clone the test repository
!git clone https://github.com/racousin/data_science_practice.git /tmp/tests 2>/dev/null || true

# Import required modules
import sys
sys.path.append('/tmp/tests/tests/python_deep_learning')

# Import the improved test utilities
from test_utils import NotebookTestRunner, create_inline_test
from module1.test_exercise3 import Exercise3Validator, EXERCISE3_SECTIONS

# Create test runner and validator
test_runner = NotebookTestRunner("module1", 3)
validator = Exercise3Validator()

## Environment Setup

In [None]:
import torch
import torch.nn as nn
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

## Section 1: Understanding nn.Linear

The `nn.Linear` layer is the fundamental building block of MLPs. It performs a linear transformation: `y = xW^T + b`
where x is the input, W is the weight matrix, and b is the bias vector.

In [None]:
# TODO: Create a linear layer that transforms input from 10 features to 5 features
linear_layer_1 = None

# Display layer information
if linear_layer_1 is not None:
    print(f"Linear layer: {linear_layer_1}")
    print(f"Weight shape: {linear_layer_1.weight.shape}")
    print(f"Bias shape: {linear_layer_1.bias.shape}")

In [None]:
# TODO: Create a linear layer that transforms 5 features to 3 features
linear_layer_2 = None

# TODO: Calculate the total number of parameters in linear_layer_2
# Remember: parameters = (input_size * output_size) + bias_size
num_params_layer2 = None

if linear_layer_2 is not None and num_params_layer2 is not None:
    print(f"Linear layer 2: {linear_layer_2}")
    print(f"Calculated parameters: {num_params_layer2}")
    actual_params = sum(p.numel() for p in linear_layer_2.parameters())
    print(f"Actual parameters: {actual_params}")

In [None]:
# Test Section 1: Understanding nn.Linear
section_tests = [(getattr(validator, name), desc) for name, desc in EXERCISE3_SECTIONS["Section 1: Understanding nn.Linear"]]
test_runner.test_section("Section 1: Understanding nn.Linear", validator, section_tests, locals())

## Section 2: Activation Functions

Activation functions introduce non-linearity into neural networks, allowing them to learn complex patterns.
- **ReLU**: f(x) = max(0, x) - Most commonly used
- **Sigmoid**: f(x) = 1/(1+e^(-x)) - Outputs between 0 and 1
- **Tanh**: f(x) = (e^x - e^(-x))/(e^x + e^(-x)) - Outputs between -1 and 1

In [None]:
# TODO: Create instances of the three main activation functions
relu_activation = None
sigmoid_activation = None
tanh_activation = None

# Test the activations with sample input
test_input = torch.tensor([-2.0, -1.0, 0.0, 1.0, 2.0])

if relu_activation is not None:
    print(f"Input: {test_input}")
    print(f"ReLU output: {relu_activation(test_input)}")
if sigmoid_activation is not None:
    print(f"Sigmoid output: {sigmoid_activation(test_input)}")
if tanh_activation is not None:
    print(f"Tanh output: {tanh_activation(test_input)}")

In [None]:
# TODO: Apply ReLU activation to the output of linear_layer_1
# First create some input data
input_data = torch.randn(2, 10)  # Batch size 2, 10 features

# TODO: Pass input_data through linear_layer_1
linear_output = None

# TODO: Apply ReLU activation to linear_output
activated_output = None

if linear_output is not None and activated_output is not None:
    print(f"Input shape: {input_data.shape}")
    print(f"Linear output shape: {linear_output.shape}")
    print(f"Activated output shape: {activated_output.shape}")
    print(f"Number of negative values before ReLU: {(linear_output < 0).sum().item()}")
    print(f"Number of negative values after ReLU: {(activated_output < 0).sum().item()}")

In [None]:
# Test Section 2: Activation Functions
section_tests = [(getattr(validator, name), desc) for name, desc in EXERCISE3_SECTIONS["Section 2: Activation Functions"]]
test_runner.test_section("Section 2: Activation Functions", validator, section_tests, locals())

## Section 3: Building Networks with nn.Sequential

`nn.Sequential` allows us to stack layers and create a neural network pipeline. The output of each layer becomes the input to the next.

In [None]:
# TODO: Create a simple 2-layer MLP using nn.Sequential
# Input: 8 features -> Hidden: 4 neurons with ReLU -> Output: 2 neurons
simple_mlp = None

if simple_mlp is not None:
    print("Simple MLP architecture:")
    print(simple_mlp)
    
    # Count parameters
    total_params = sum(p.numel() for p in simple_mlp.parameters())
    print(f"\nTotal parameters: {total_params}")

In [None]:
# TODO: Create a deeper MLP with 3 hidden layers
# Input: 10 -> Hidden1: 8 (ReLU) -> Hidden2: 6 (ReLU) -> Hidden3: 4 (ReLU) -> Output: 2
deep_mlp = None
# TODO: Calculate the total number of parameters in deep_mlp
# Parameters per layer: (input_size * output_size) + output_size
# Layer 1: (10 * 8) + 8 = 88
# Layer 2: (8 * 6) + 6 = 54
# Layer 3: (6 * 4) + 4 = 28
# Layer 4: (4 * 2) + 2 = 10
deep_mlp_params = None  # Calculate the sum

if deep_mlp is not None and deep_mlp_params is not None:
    print("Deep MLP architecture:")
    print(deep_mlp)
    print(f"\nCalculated parameters: {deep_mlp_params}")
    actual_params = sum(p.numel() for p in deep_mlp.parameters())
    print(f"Actual parameters: {actual_params}")

In [None]:
# Test Section 3: Building Networks with nn.Sequential
section_tests = [(getattr(validator, name), desc) for name, desc in EXERCISE3_SECTIONS["Section 3: Building Networks with nn.Sequential"]]
test_runner.test_section("Section 3: Building Networks with nn.Sequential", validator, section_tests, locals())

## Section 4: Forward Pass

The forward pass is the process of passing input data through the network to get predictions. Each layer transforms the data sequentially.

In [None]:
# TODO: Perform a forward pass through simple_mlp
# Create input data with batch size 3 and 8 features
forward_input = torch.randn(3, 8)

# TODO: Pass the input through simple_mlp
simple_output = None

if simple_output is not None:
    print(f"Input shape: {forward_input.shape}")
    print(f"Output shape: {simple_output.shape}")
    print(f"Output values:\n{simple_output}")

In [None]:
# TODO: Create a network with mixed activation functions
# Input: 6 -> Hidden1: 4 (ReLU) -> Hidden2: 3 (Tanh) -> Output: 1 (Sigmoid)
mixed_activation_mlp = None

# TODO: Perform forward pass with batch size 5
mixed_input = torch.randn(5, 6)
mixed_output = None  # Pass mixed_input through mixed_activation_mlp

if mixed_activation_mlp is not None and mixed_output is not None:
    print("Mixed activation MLP:")
    print(mixed_activation_mlp)
    print(f"\nInput shape: {mixed_input.shape}")
    print(f"Output shape: {mixed_output.shape}")
    print(f"Output range: [{mixed_output.min().item():.4f}, {mixed_output.max().item():.4f}]")
    print("(Note: Sigmoid ensures output is between 0 and 1)")

In [None]:
# Test Section 4: Forward Pass
section_tests = [(getattr(validator, name), desc) for name, desc in EXERCISE3_SECTIONS["Section 4: Forward Pass"]]
test_runner.test_section("Section 4: Forward Pass", validator, section_tests, locals())

## Section 5: Understanding Parameter Counting

Understanding how many parameters your network has is crucial for model complexity and memory requirements.

In [None]:
# TODO: Create a function to count parameters in any model
def count_parameters(model):
    """
    Count the total number of trainable parameters in a model.
    
    Args:
        model: A PyTorch nn.Module
    
    Returns:
        Total number of parameters
    """
    # TODO: Complete this function
    return None

# Test your function
if count_parameters is not None and simple_mlp is not None:
    param_count = count_parameters(simple_mlp)
    if param_count is not None:
        print(f"Simple MLP parameters: {param_count}")

In [None]:
# TODO: Create a large MLP and calculate its parameters
# Input: 100 -> Hidden1: 64 -> Hidden2: 32 -> Hidden3: 16 -> Output: 10
# Use ReLU activation between layers (except after output)
large_mlp = None

# TODO: Calculate expected number of parameters manually
# Layer 1: (100 * 64) + 64 = ?
# Layer 2: (64 * 32) + 32 = ?
# Layer 3: (32 * 16) + 16 = ?
# Layer 4: (16 * 10) + 10 = ?
expected_params = None  # Sum all layer parameters

if large_mlp is not None and expected_params is not None and count_parameters is not None:
    actual_params = count_parameters(large_mlp)
    if actual_params is not None:
        print(f"Expected parameters: {expected_params}")
        print(f"Actual parameters: {actual_params}")
        print(f"Match: {expected_params == actual_params}")

In [None]:
# Test Section 5: Understanding Parameter Counting
section_tests = [(getattr(validator, name), desc) for name, desc in EXERCISE3_SECTIONS["Section 5: Understanding Parameter Counting"]]
test_runner.test_section("Section 5: Understanding Parameter Counting", validator, section_tests, locals())

## Final Summary

In [None]:
# Display final summary of all tests
test_runner.final_summary()