<a href="https://colab.research.google.com/github/calmrocks/master-machine-learning-engineer/blob/main/BasicModels/DeepLearningTrainingProcess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Understanding Deep Learning Training Process

## Introduction
This notebook demonstrates the training process in deep learning, focusing on:
1. Forward Propagation
2. Loss Calculation
3. Backward Propagation
4. Parameter Updates

We'll implement a simple neural network from scratch to understand these concepts better.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from typing import List, Tuple

## 1. Creating a Simple Neural Network Class

First, let's create a simple neural network with one hidden layer to understand the training process:class SimpleNeuralNetwork:
    def __init__(self, input_size: int, hidden_size: int, output_size: int):
        # Initialize weights and biases
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))
        
        # Store parameters and gradients
        self.parameters = {'W1': self.W1, 'b1': self.b1, 'W2': self.W2, 'b2': self.b2}
        self.gradients = {}
        self.cache = {}
        
    def sigmoid(self, Z: np.ndarray) -> np.ndarray:
        return 1 / (1 + np.exp(-Z))
    
    def sigmoid_derivative(self, Z: np.ndarray) -> np.ndarray:
        s = self.sigmoid(Z)
        return s * (1 - s)
    
    def forward_propagation(self, X: np.ndarray) -> np.ndarray:
        # First layer
        Z1 = np.dot(X, self.W1) + self.b1
        A1 = self.sigmoid(Z1)
        
        # Second layer
        Z2 = np.dot(A1, self.W2) + self.b2
        A2 = self.sigmoid(Z2)
        
        # Store values for backpropagation
        self.cache = {
            'Z1': Z1, 'A1': A1,
            'Z2': Z2, 'A2': A2,
            'X': X
        }
        
        return A2
    
    def compute_loss(self, Y: np.ndarray, Y_hat: np.ndarray) -> float:
        m = Y.shape[0]
        loss = -1/m * np.sum(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat))
        return loss
    
    def backward_propagation(self, Y: np.ndarray) -> None:
        m = Y.shape[0]
        
        # Output layer
        dZ2 = self.cache['A2'] - Y
        dW2 = 1/m * np.dot(self.cache['A1'].T, dZ2)
        db2 = 1/m * np.sum(dZ2, axis=0, keepdims=True)
        
        # Hidden layer
        dZ1 = np.dot(dZ2, self.W2.T) * self.sigmoid_derivative(self.cache['Z1'])
        dW1 = 1/m * np.dot(self.cache['X'].T, dZ1)
        db1 = 1/m * np.sum(dZ1, axis=0, keepdims=True)
        
        # Store gradients
        self.gradients = {
            'dW1': dW1, 'db1': db1,
            'dW2': dW2, 'db2': db2
        }
    
    def update_parameters(self, learning_rate: float) -> None:
        self.W1 -= learning_rate * self.gradients['dW1']
        self.b1 -= learning_rate * self.gradients['db1']
        self.W2 -= learning_rate * self.gradients['dW2']
        self.b2 -= learning_rate * self.gradients['db2']
        
        # Update stored parameters
        self.parameters = {'W1': self.W1, 'b1': self.b1, 'W2': self.W2, 'b2': self.b2}