<a href="https://colab.research.google.com/github/tokaalaa/DM_Course/blob/main/Sheet_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from numpy import linalg as LA
import sklearn
import matplotlib.pyplot as plt

In [None]:
def relu(x):
    s = np.maximum(0,x)    
    return s

In [None]:
def initialize_parameters_random(layers_dims):
    parameters = {}
    L = len(layers_dims)            # integer representing the number of layers)
    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layers_dims[l], layers_dims[l-1]) * 0.01
        parameters['b' + str(l+1)] = np.random.randn(layers_dims[l], 1) * 0.01

    return parameters

In [None]:
parameters = initialize_parameters_random([2,2,1])
print("W1 = " + str(parameters["W1"]))
print("b2 = " + str(parameters["b2"]))
print("W2 = " + str(parameters["W2"]))
print("b3 = " + str(parameters["b3"]))

W1 = [[ 0.01454044 -0.00092839]
 [ 0.00932367 -0.00113497]]
b2 = [[-0.00167012]
 [ 0.00547458]]
W2 = [[0.00944714 0.00865369]]
b3 = [[-0.02981323]]


In [None]:
def forward_propagation(X, parameters):
        
    # retrieve parameters
    W1 = parameters["W1"]
    b2 = parameters["b2"]
    W2 = parameters["W2"]
    b3 = parameters["b3"]
    
    # LINEAR -> RELU -> LINEAR -> RELU
    z1 = np.dot(W1, X.T) + b2
    a1 = relu(z1)
    z2 = np.dot(W2, a1) + b3
    a2 = relu(z2)

    
    cache = (z1, a1, W1, b2, z2, a2, W2, b3)
    
    return a2, cache

In [None]:
def compute_loss(a2, Y, lamda):
    
    # retrieve parameters
    W1 = parameters["W1"]
    b2 = parameters["b2"]
    W2 = parameters["W2"]
    b3 = parameters["b3"]
    
    D = Y.shape[0]
    E = (a2 - Y)**2 + lamda * (LA.norm(W2)**2 + LA.norm(W1)**2 + LA.norm(b2)**2 + LA.norm(b3)**2)
    loss = 1./D * np.sum(E)
    
    return loss

In [None]:
def backward_propagation(X, Y, cache, lamda):
  
    (z1, a1, W1, b2, z2, a2, W2, b3) = cache

    # Top Layer
    h = np.where(a2 > 0, 1, 0)
   
    dW2 = 2 * lamda * W2.reshape(2,1) + 2* (a2 - Y) * h * a1
    db3 = 2 * lamda * b3 + 2* (a2 - Y) * h
    print(dW2)
    print(db3)
    # Bottom Layer
    dW1 = np.zeros((2,2,4))
    for i in range(X.shape[0]):
      dW1[:,:,i] = 2 * lamda * W1
      if (a2[0,i] > 0):
        dW1[:,:,i] += 2 * (a2[0,i] - Y[0,i]) * (W2.reshape(2,1) @ X[i,:].reshape(1,2))

    db2 = 2 * lamda * b2 + 2 * (a2 - Y) * h * W2.reshape(2,1)
    print(dW1)
    print(db2)
    gradients = {"dW2": dW2, "db3": db3,
                 "dW1": dW1, "db2": db2}
    
    return gradients

In [None]:
def update_parameters(parameters, grads, learning_rate):
    
    L = len(parameters) # number of layers in the neural networks
    
    # Update rule for each parameter
    for k in range(L-2):
        print(k)
        parameters["W" + str(k+1)] = parameters["W" + str(k+1)] - learning_rate * np.sum(grads["dW" + str(k+1)],axis = -1)
        parameters["b" + str(k+2)] = parameters["b" + str(k+2)] - learning_rate * np.sum(grads["db" + str(k+2)], axis = -1)
        print("W" + str(k+1))
        print(parameters["W" + str(k+1)].shape)
        print(np.sum(grads["dW" + str(k+1)],axis = -1).shape)
        print("b" + str(k+2))
        print(parameters["b" + str(k+2)].shape)
        print(np.sum(grads["db" + str(k+2)], axis = -1).shape)
    return parameters

In [None]:
def model(X, Y, learning_rate = 0.1, num_iterations = 10, print_cost = True, lamda = 0.5):
        
    grads = {}
    costs = [] # to keep track of the loss
    m = X.shape[0] # number of examples
    layers_dims = [X.shape[1], 2, 1]
    
    # Initialize parameters dictionary.
    parameters = initialize_parameters_random(layers_dims)
    # Loop (gradient descent)

    for i in range(0, num_iterations):

        # Forward propagation: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID.
        a2, cache = forward_propagation(X, parameters)
        
        # Loss
        cost = compute_loss(a2, Y, lamda)

        # Backward propagation.
        grads = backward_propagation(X, Y, cache, lamda)
        print(parameters)
        # Update parameters.
        parameters = update_parameters(parameters, grads, learning_rate)
        print(parameters)
        # Print the loss every 10 iterations
        if print_cost and i % 10 == 0:
            print("Cost after iteration {}: {}".format(i, cost))
            costs.append(cost)
            
    # plot the loss
    plt.plot(costs)
    plt.ylabel('cost')
    plt.xlabel('iterations (per hundreds)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()
    
    return parameters

In [None]:
X = np.array([[0,0],
             [0,1],
             [1,0],
             [1,1]])

Y = np.array([[0,1,1,0]])

model(X, Y, learning_rate = 0.1, num_iterations = 100, print_cost = True, lamda = 0.5)

[[ 0.00365647 -0.03008668  0.00354825  0.00364184]
 [ 0.00641169  0.00641169  0.00465941  0.00641169]]
[[ 0.02223766 -1.97769384 -1.97780276  0.02223069]]
[[[-0.0106356  -0.0106356  -0.01767958 -0.01058296]
  [ 0.00965296  0.00260935  0.00965296  0.00970559]]

 [[ 0.0017206   0.0017206  -0.01100786  0.00181572]
  [-0.0060176  -0.01874537 -0.0060176  -0.00592248]]]
[[ 0.0073435   0.00024723  0.00024685  0.00734347]
 [-0.00074277 -0.0135657  -0.0135664  -0.00074281]]
{'W1': array([[-0.0106356 ,  0.00965296],
       [ 0.0017206 , -0.0060176 ]]), 'b2': array([[ 0.00729083],
       [-0.00083793]]), 'W2': array([[0.00354825, 0.00641169]]), 'b3': array([[0.00739531]])}
0
W1
(2, 2)
(2, 2)
b2
(2, 2)
(2,)
1
W2
(1, 2)
(2,)
b3
(1, 1)
(1,)
{'W1': array([[-0.00568222,  0.00649087],
       [ 0.0022957 , -0.0023473 ]]), 'b2': array([[ 0.00577273,  0.0101526 ],
       [-0.00235604,  0.00202384]]), 'W2': array([[0.00547227, 0.00402224]]), 'b3': array([[0.39849813]])}
Cost after iteration 0: 1.9732801252

ValueError: ignored