#### J034 Meghna Jayakar Lab 1 

28/7/24

In [1]:
import numpy as np
import pandas as pd

#### Activation Functions 

In [7]:
def sigmoid(z):
    a = 1/(1 + np.exp(-z))
    return a

In [8]:
def tanh(z):
    a = (np.exp(z) - np.exp(-z))/(np.exp(z) + np.exp(-z))
    return a

In [9]:
def relu(z):
    a = np.maximum(0, z)
    return a

In [10]:
def leakyrelu(z):
    a = np.maximum(0.01, z)
    return a

#### Derivatives of Activation Functions

In [11]:
def dsigmoid(a):
    da = a*(1 - a)
    return da

In [12]:
def dtanh(a):
    da = (1 - a**2)
    return da

In [13]:
def drelu(a):
    da = (np.int64(a > 0))
    return da

In [14]:
def dleakyrelu(a, alpha = 0.01):
    da = np.where(a > 0, 1, alpha)
    return da

#### Randomly generating X and y

In [18]:
x = np.random.randn(2, 3) # nx = 2, m = 3
y = (np.random.randn(1, 3) > 0) # m = 3

In [21]:
x, x.shape

(array([[-0.47695544, -1.01818989,  1.82620117],
        [ 0.33994065,  0.72108724,  0.43990343]]),
 (2, 3))

In [22]:
y, y.shape

(array([[ True,  True,  True]]), (1, 3))

#### Defining Layer Sizes

In [33]:
def layer_sizes(x, y, node1):
    nx = x.shape[0]
    nh1 = node1
    ny = y.shape[0]
    
    layersizes = {'nx': nx, 'nh1': nh1, 'ny': ny}
    
    return layersizes

In [34]:
layersizes = layer_sizes(x, y, node1 = 4)
print(layersizes)

{'nx': 2, 'nh1': 4, 'ny': 1}


#### Initializing weights and biases

In [35]:
def initialize_parameters(layersizes):
    nx = layersizes['nx']
    nh1 = layersizes['nh1']
    ny = layersizes['ny']
    
    w1 = np.random.randn(nh1, nx)
    b1 = np.random.randn(nh1, 1)
    
    w2 = np.random.randn(ny, nh1)
    b2 = np.random.randn(ny, 1)
    
    parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}
    
    return parameters

In [36]:
parameters = initialize_parameters(layersizes)
print(parameters)

{'w1': array([[ 0.96304868,  0.33576726],
       [ 0.19928678, -0.02631709],
       [-1.73630317,  1.2026754 ],
       [ 0.61347926,  0.66688338]]), 'b1': array([[ 0.41451347],
       [ 1.97907769],
       [-0.34842484],
       [-1.03361147]]), 'w2': array([[ 0.70462661,  0.12093668,  1.72860692, -1.17422484]]), 'b2': array([[1.20614356]])}


#### Forward Propagation

In [41]:
def forwardprop(x, parameters):
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    
    z1 = np.dot(w1, x) + b1
    a1 = np.tanh(z1)
    
    z2 = np.dot(w2, a1) + b2
    a2 = sigmoid(z2)
    
    cache = {'z1': z1, 'a1': a1, 'z2': z2, 'a2': a2}
    
    return a2, cache

In [42]:
a2, cache = forwardprop(x, parameters)
print(a2, cache)

[[0.97176456 0.97732195 0.46757287]] {'z1': array([[ 0.0693231 , -0.32393548,  2.32093926],
       [ 1.87508053,  1.75718898,  2.33143846],
       [ 0.88855266,  2.28669538, -2.99020268],
       [-1.09951298, -1.17736876,  0.38008936]]), 'a1': array([[ 0.06921227, -0.31306105,  0.98090492],
       [ 0.95405249,  0.94218822,  0.981298  ],
       [ 0.71067813,  0.97956515, -0.99495714],
       [-0.80032401, -0.82662011,  0.36278507]]), 'z2': array([[ 3.53853577,  3.76341849, -0.12989085]]), 'a2': array([[0.97176456, 0.97732195, 0.46757287]])}


#### Cost Function

In [45]:
def costcompute(a2, y):
    m = y.shape[1]
    
    lossfunc = (y * np.log(a2) + (1 - y) * (np.log(1 - a2)))
    cost = np.sum(lossfunc)/m
    cost = float(np.squeeze(cost))
    
    return cost

In [46]:
costcompute(a2, y)

-0.2705936523029923

#### Back Propagation

In [47]:
def backprop(cache, x, y, parameters):
    m = y.shape[1]
    
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    
    z1 = cache['z1']
    a1 = cache['a1']
    z2 = cache['z2']
    a2 = cache['a2']
    
    dz2 = a2 - y
    dw2 = np.dot(dz2, a1.T)
    db2 = np.sum(dz2, axis = 1, keepdims = True)/m
    
    da1 = np.dot(w2.T, dz2)
    dz1 = da1 * dtanh(a1)
    dw1 = np.dot(dz1, x.T)
    db1 = np.sum(dz1, axis = 1, keepdims = True)/m
    
    gradients = {'dw2': dw2, 'db2': db2, 'dw1': dw1, 'db1': db1}
    
    return gradients

In [48]:
gradients = backprop(cache, x, y, parameters)
print(gradients)

{'dw2': array([[-0.51711502, -0.57077476,  0.48746124, -0.15181298]]), 'db2': array([[-0.19444687]]), 'dw1': array([[-0.00179569, -0.02336677],
       [-0.0038974 , -0.00137585],
       [-0.00377252, -0.01342847],
       [ 0.9771844 ,  0.2489591 ]]), 'db1': array([[-0.01613477],
       [-0.00100015],
       [-0.01166723],
       [ 0.1877527 ]])}


#### Updating the Gradients

In [49]:
def update(parameters, gradients, lr = 0.01):
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    
    dw1 = gradients['dw1']
    db1 = gradients['db1']
    dw2 = gradients['dw2']
    db2 = gradients['db2']
    
    w1 = w1 - lr*dw1
    b1 = b1 - lr*db1
    
    w2 = w2 - lr*dw2
    b2 = b2 - lr*db2
    
    parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}
    
    return parameters

In [50]:
parameters = update(parameters, gradients, lr = 0.01)
print(parameters)

{'w1': array([[ 0.96306664,  0.33600093],
       [ 0.19932576, -0.02630334],
       [-1.73626544,  1.20280968],
       [ 0.60370742,  0.66439378]]), 'b1': array([[ 0.41467482],
       [ 1.97908769],
       [-0.34830817],
       [-1.035489  ]]), 'w2': array([[ 0.70979776,  0.12664443,  1.7237323 , -1.17270671]]), 'b2': array([[1.20808803]])}


#### Network

In [53]:
def neuralnetwork(x, y, layersizes, epochs = 1000, pcost = False):
    nx = layersizes['nx']
    nh1 = layersizes['nh1']
    ny = layersizes['ny']
    
    parameters = initialize_parameters(layersizes)
    
    for i in range(0, epochs):
        a2, cache = forwardprop(x, parameters)
        
        cost = costcompute(a2, y)
        
        gradients = backprop(cache, x, y, parameters)
        
        parameters = update(parameters, gradients, lr = 0.01)
        
        if pcost and i % 100 == 0:
            print(f'Cost {i} {cost}')
    return parameters

In [54]:
neuralnetwork(x, y, layersizes, epochs = 1000, pcost = True)

Cost 0 -2.615394810152418
Cost 100 -0.29468400802824984
Cost 200 -0.10398834285014032
Cost 300 -0.05982427277589108
Cost 400 -0.04126624390741442
Cost 500 -0.03124198201898766
Cost 600 -0.025022781612078365
Cost 700 -0.02080994174414183
Cost 800 -0.017777609089673973
Cost 900 -0.015495807236903652


{'w1': array([[-1.28532108,  0.08085677],
        [ 0.29937374,  0.63224946],
        [ 0.10805754,  0.86917612],
        [ 0.00391111,  0.38011158]]),
 'b1': array([[-0.50366198],
        [ 0.4061873 ],
        [ 0.88441619],
        [-1.7405434 ]]),
 'w2': array([[ 0.31423501,  0.91082094,  2.4439194 , -2.73305395]]),
 'b2': array([[-0.8451474]])}