In [12]:
from utils import load_dataset_to_numpy
import numpy as np
import copy
from typing import Tuple, Dict, List

In [10]:
trian_dataset = r'datasets\cat_dog_dataset_train.csv'
test_dataset = r"datasets\cat_dog_dataset_test.csv"

X_train, y_train = load_dataset_to_numpy(trian_dataset)
X_test, y_test = load_dataset_to_numpy(trian_dataset)

In [31]:
print ("Number of training examples: " + str(X_train.shape[0]))
print ("Number of testing examples: " + str(X_test.shape[0]))
print ("Height/Width of each image: " + str(X_train[1].shape[0]))
print ("Each image is of size: (" + str(X_train[1].shape[0]) + ", " + str(X_train[1].shape[0]) + ", 3)")
print ("train_set_x shape: " + str(X_train.shape))
print ("train_set_y shape: " + str(y_train.shape))
print ("test_set_x shape: " + str(X_test.shape))
print ("test_set_y shape: " + str(y_test.shape))

Number of training examples: 557
Number of testing examples: 557
Height/Width of each image: 64
Each image is of size: (64, 64, 3)
train_set_x shape: (557, 64, 64, 3)
train_set_y shape: (557,)
test_set_x shape: (557, 64, 64, 3)
test_set_y shape: (557,)


#### Flatten the arrays into a single vector of shape (height * width * 3, 1)

In [40]:
train_set_x_flatten = X_train.reshape(X_train.shape[0], -1).T
test_set_x_flatten = X_test.reshape(X_test.shape[0], -1).T
print("Train and test shape after flattening:")
print(f"train: {train_set_x_flatten.shape} \ntest: {test_set_x_flatten.shape}")

Train and test shape after flattening:
train: (12288, 557) 
test: (12288, 557)


In [3]:

def sigmoid(z):
    """
    Compute the sigmoid of z

    z is A scalar or numpy array of any size.
    """

    s = 1 / ( 1 + np.exp(-z) )

    return s

def relu(z):
    """ Relu activation function"""
    
    return max(0.0, z )

When training a neural network, we need to initialize the paramters **w** and **b**. There are several options here, we'll use zero initialization for now.

In [4]:
def initialize_with_zeros(dimension):
    """
    This function creates a vector of zeros of shape (dim, 1) for w and initializes b to 0.
    """
    
    w = np.zeros((dimension, 1), dtype=float)
    b = float(0)
    
    return w, b

In [5]:
def propagate(w, b, X, Y):
    """
        TODO add docstring explanation
    """
    
    m = X.shape[1]
    
    # activation
    A = sigmoid(np.dot(w.T,X) + b) 

    # calculate cost 
    cost = -1/m * np.sum( np.dot(np.log(A), Y.T) + np.dot(np.log(1-A), (1-Y.T)))

    # Backprop - find grads
    dw = 1/m *(np.dot(X,(A - Y).T))
    db = 1/m * (np.sum(A -Y))

    cost = np.squeeze(np.array(cost))

    
    grads = {"dw": dw,
             "db": db}
    
    return grads, cost

### Optimizer

In [13]:
def optimize(w: np.ndarray, b: float, X: np.ndarray, Y: np.ndarray, 
             num_iterations: int = 100, learning_rate: float = 0.009, 
             print_cost: bool = False) -> Tuple[Dict, Dict, List[float]]:
    """
    Performs gradient descent to optimize the parameters w and b.

    Args:
    w (np.ndarray): Initial weight vector
    b (float): Initial bias term
    X (np.ndarray): Input data, shape (num_features, num_examples)
    Y (np.ndarray): True "label" vector, shape (1, num_examples)
    num_iterations (int): Number of iterations of the optimization loop
    learning_rate (float): Learning rate of the gradient descent update rule
    print_cost (bool): Print the cost every 100 iterations if set to True

    Returns:
    params (Dict): Dictionary containing the optimized w and b
    grads (Dict): Dictionary containing the gradients of w and b
    costs (List[float]): List of all costs computed during the optimization
    """
    w = copy.deepcopy(w)
    b = copy.deepcopy(b)
    
    costs = []
    
    for i in range(num_iterations):
        # Forward and backward propagation
        grads, cost = propagate(w, b, X, Y)
        
        # Retrieve derivatives from grads
        dw, db = grads["dw"], grads["db"]
        
        # Update parameters
        w -= learning_rate * dw
        b -= learning_rate * db
                
        # Record and print the cost
        if i % 100 == 0:
            costs.append(cost)
            if print_cost:
                print(f"Cost after iteration {i}: {cost:.6f}")
    
    params = {"w": w, "b": b}
    grads = {"dw": dw, "db": db}
    
    return params, grads, costs


### Other optimizers

In [14]:
class Optimizer:
    def __init__(self, learning_rate=0.01):
        self.learning_rate = learning_rate

    def update(self, w, dw):
        raise NotImplementedError

class SGD(Optimizer):
    def update(self, w, dw):
        return w - self.learning_rate * dw

class Momentum(Optimizer):
    def __init__(self, learning_rate=0.01, momentum=0.9):
        super().__init__(learning_rate)
        self.momentum = momentum
        self.v = None

    def update(self, w, dw):
        if self.v is None:
            self.v = np.zeros_like(w)
        self.v = self.momentum * self.v - self.learning_rate * dw
        return w + self.v

class RMSprop(Optimizer):
    def __init__(self, learning_rate=0.01, decay_rate=0.99, epsilon=1e-8):
        super().__init__(learning_rate)
        self.decay_rate = decay_rate
        self.epsilon = epsilon
        self.s = None

    def update(self, w, dw):
        if self.s is None:
            self.s = np.zeros_like(w)
        self.s = self.decay_rate * self.s + (1 - self.decay_rate) * dw**2
        return w - (self.learning_rate / (np.sqrt(self.s) + self.epsilon)) * dw

class Adam(Optimizer):
    def __init__(self, learning_rate=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8):
        super().__init__(learning_rate)
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = None
        self.v = None
        self.t = 0

    def update(self, w, dw):
        if self.m is None:
            self.m = np.zeros_like(w)
            self.v = np.zeros_like(w)
        
        self.t += 1
        self.m = self.beta1 * self.m + (1 - self.beta1) * dw
        self.v = self.beta2 * self.v + (1 - self.beta2) * (dw**2)
        m_hat = self.m / (1 - self.beta1**self.t)
        v_hat = self.v / (1 - self.beta2**self.t)
        
        return w - (self.learning_rate / (np.sqrt(v_hat) + self.epsilon)) * m_hat

def optimize(w, b, X, Y, num_iterations=100, optimizer=SGD()):
    for _ in range(num_iterations):
        # Compute gradients (assume we have a function for this)
        grads, cost = propagate(w, b, X, Y)
        dw, db = grads["dw"], grads['db']
        print(f"current cost: {cost}")
        # Update parameters
        w = optimizer.update(w, dw)
        b = optimizer.update(b, db.reshape(1, 1)).squeeze()

    return w, b

# Usage example
w = np.random.randn(10, 1)
b = np.zeros(1)
X = np.random.randn(10, 1000)
Y = np.random.randint(0, 2, (1, 1000))

# Using different optimizers
w_sgd, b_sgd = optimize(w, b, X, Y, optimizer=SGD(learning_rate=0.01))
# w_momentum, b_momentum = optimize(w, b, X, Y, optimizer=Momentum(learning_rate=0.01, momentum=0.9))
# w_rmsprop, b_rmsprop = optimize(w, b, X, Y, optimizer=RMSprop(learning_rate=0.01))
# w_adam, b_adam = optimize(w, b, X, Y, optimizer=Adam(learning_rate=0.01))


current cost: 1.7686114420497343
current cost: 1.7669364854337144
current cost: 1.7652621960970987
current cost: 1.7635885755009195
current cost: 1.7619156251096324
current cost: 1.7602433463911067
current cost: 1.758571740816643
current cost: 1.7569008098609724
current cost: 1.7552305550022582
current cost: 1.7535609777221062
current cost: 1.7518920795055684
current cost: 1.7502238618411425
current cost: 1.7485563262207844
current cost: 1.7468894741399104
current cost: 1.7452233070973995
current cost: 1.743557826595605
current cost: 1.741893034140339
current cost: 1.740228931240908
current cost: 1.7385655194100955
current cost: 1.7369028001641746
current cost: 1.7352407750229064
current cost: 1.7335794455095528
current cost: 1.7319188131508747
current cost: 1.7302588794771454
current cost: 1.7285996460221336
current cost: 1.726941114323142
current cost: 1.725283285920977
current cost: 1.723626162359978
current cost: 1.7219697451880098
current cost: 1.7203140359564715
current cost: 1.7