In [1]:
import numpy as np
import math

In [2]:
def numerical_Derivative(func,x,h=1e-5):
    fx_plus = func(x+h)
    fx = func(x)

    slope = (fx_plus - fx)/h
    return slope

    # testing on real function
def my_square(x):
    return x**2

In [3]:
slope_at_3 = numerical_Derivative(my_square, 3)
print(f"Slope at x=3: {slope_at_3:.5f}")

Slope at x=3: 6.00001


More generalised numerical derivative function

In [4]:
def numerical_derivative_general(func,x,h=1e-5,order=1,method="central"):
    if isinstance(x,(int,float)):
        if(order == 1):
            if(method=="forward"):
                return (func(x+h)-func(x)) / h
            elif(method=="backward"):
                return (func(x)-func(x-h)) / h
            elif(method=="central"):
                return (func(x+h)-func(x-h)) / (2*h)

        elif order == 2:
            return (func(x + h) - 2*func(x) + func(x - h)) / (h**2)
    elif isinstance(x,(list,tuple)):
        x = list(x)
        grad = []
        for i in range(len(x)):
            x_forward = x.copy()
            x_backward = x.copy()
            x_forward[i] += h
            x_backward[i] -= h

            dfi = (func(x_forward) - func(x_backward)) / (2*h)
            grad.append(dfi)
        return grad
    else:
        raise TypeError("x must be float , int , list or tuple")

In [5]:
def f(x): return x**2
print(numerical_derivative_general(f, 3)) 

6.000000000039306


In [6]:
def f(x): return x**2
print(numerical_derivative_general(f, 3, order=2))

1.9999824019123478


In [12]:
def gradient_descent(start_w,learning_rate,steps):
    w = start_w
    history = []

    def cost_func(val):
        return (val-5)**2
    print(f"starting at w = {w}")

    for i in range(steps):
        gradient = numerical_derivative_general(cost_func,w)

        w = w - (learning_rate*gradient)

        history.append(w)
        print(f"Step {i+1}: w = {w:.4f}, Gradient = {gradient:.4f}, Cost = {cost_func(w):.4f}")
    return w,history

In [13]:
final_w = gradient_descent(0.0,0.1,30)

starting at w = 0.0
Step 1: w = 1.0000, Gradient = -10.0000, Cost = 16.0000
Step 2: w = 1.8000, Gradient = -8.0000, Cost = 10.2400
Step 3: w = 2.4400, Gradient = -6.4000, Cost = 6.5536
Step 4: w = 2.9520, Gradient = -5.1200, Cost = 4.1943
Step 5: w = 3.3616, Gradient = -4.0960, Cost = 2.6844
Step 6: w = 3.6893, Gradient = -3.2768, Cost = 1.7180
Step 7: w = 3.9514, Gradient = -2.6214, Cost = 1.0995
Step 8: w = 4.1611, Gradient = -2.0972, Cost = 0.7037
Step 9: w = 4.3289, Gradient = -1.6777, Cost = 0.4504
Step 10: w = 4.4631, Gradient = -1.3422, Cost = 0.2882
Step 11: w = 4.5705, Gradient = -1.0737, Cost = 0.1845
Step 12: w = 4.6564, Gradient = -0.8590, Cost = 0.1181
Step 13: w = 4.7251, Gradient = -0.6872, Cost = 0.0756
Step 14: w = 4.7801, Gradient = -0.5498, Cost = 0.0484
Step 15: w = 4.8241, Gradient = -0.4398, Cost = 0.0309
Step 16: w = 4.8593, Gradient = -0.3518, Cost = 0.0198
Step 17: w = 4.8874, Gradient = -0.2815, Cost = 0.0127
Step 18: w = 4.9099, Gradient = -0.2252, Cost = 0.0

In [9]:
def gradient_descent_2d(learning_rate,steps):
    x = 10
    y = -10

    def cost_func_2d(x_val,y_val):
        return x_val**2 + y_val**2
    history = []
    for i in range(steps):
        def wrapped_func(vec):
            return cost_func_2d(vec[0], vec[1])
        
        grad = numerical_derivative_general(wrapped_func,[x,y])

        x = x - learning_rate*grad[0]
        y = y - learning_rate*grad[1]

        history.append((x, y)) 
        print(f"Step {i+1}: x = {x:.4f}, y = {y:.4f}, Gradient = {grad}, Cost = {cost_func_2d(x,y):.4f}")

    return (x,y),history

In [16]:
print(gradient_descent_2d(0.1,50))

Step 1: x = 8.0000, y = -8.0000, Gradient = [19.999999999242846, -19.999999999242846], Cost = 128.0000
Step 2: x = 6.4000, y = -6.4000, Gradient = [15.999999998683732, -15.999999998683732], Cost = 81.9200
Step 3: x = 5.1200, y = -5.1200, Gradient = [12.799999999657528, -12.799999999657528], Cost = 52.4288
Step 4: x = 4.0960, y = -4.0960, Gradient = [10.240000000294458, -10.240000000294458], Cost = 33.5544
Step 5: x = 3.2768, y = -3.2768, Gradient = [8.19199999995135, -8.19199999995135], Cost = 21.4748
Step 6: x = 2.6214, y = -2.6214, Gradient = [6.5536000004584585, -6.5536000004584585], Cost = 13.7439
Step 7: x = 2.0972, y = -2.0972, Gradient = [5.2428800003667675, -5.2428800003667675], Cost = 8.7961
Step 8: x = 1.6777, y = -1.6777, Gradient = [4.194304000293414, -4.194304000293414], Cost = 5.6295
Step 9: x = 1.3422, y = -1.3422, Gradient = [3.355443200225849, -3.355443200225849], Cost = 3.6029
Step 10: x = 1.0737, y = -1.0737, Gradient = [2.6843545601895613, -2.6843545601895613], Cost

back propagation 

In [None]:
# CHAIN RULE EXERCISE
# Scenario:
# x (Input) = 2
# y = x * 3  (Slope dy/dx = 3)
# z = y ** 2 (Slope dz/dy = 2y)

# We want dz/dx.
# 1. Forward Pass:
#    x = 2
#    y = 6 (2*3)
#    z = 36 (6^2)

# 2. Backward Pass:
#    Slope dz/dy = 2 * y = 2 * 6 = 12
#    Slope dy/dx = 3
#    Slope dz/dx = dz/dy * dy/dx = 12 * 3 = 36

# VERIFICATION (Numerical):
# If I move x from 2.0 to 2.001 (change of 0.001)
# z moves from 36.0 to ...?
# (2.001 * 3)^2 = 6.003^2 â‰ˆ 36.036
# Change in z = 0.036
# Slope = 0.036 / 0.001 = 36. MATCH!