<a href="https://colab.research.google.com/github/southjohn64/exercises/blob/main/dl_ex1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [44]:
import numpy as np


def initialize_parameters(layer_dims):
    '''
    :param layer_dims:  an array of the dimensions of each layer in the network
    (layer 0 is the size of the flattened input, layer L is the output softmax)
    :return: a dictionary containing the initialized W and b parameters of each layer (W1…WL, b1…bL)
    '''
    w_b_parmas_dic = {}
    for i in range(1, len(layer_dims)):
        layer_dim_prev = layer_dims[i - 1]
        layer_dim = layer_dims[i]
        w_b_parmas_dic['W' + str(i)] = np.random.randn(layer_dim_prev, layer_dim)
        w_b_parmas_dic['b' + str(i)] = np.zeros((layer_dim, 1))

    return w_b_parmas_dic


def linear_forward(A, W, b):
    '''
        Implement the linear part of a layer's forward propagation
        :param A: the activations of the previous layer
        :param W: the weight matrix of the current layer (of shape [size of current layer, size of previous layer])
        :param b: the bias vector of the current layer (of shape [size of current layer, 1])
        :return: Z – the linear component of the activation function (i.e., the value before applying the non-linear function)
        linear_cache – a dictionary containing A, W, b (stored for making the backpropagation easier to compute)
    '''

    # A is in fact the original X features after activations, so layer 1 get the original X vector (as there is no
    # activation yet)
    Z = np.dot(W.T, A) + b
    linear_cache = {'A': A, 'W': W, 'b': b}
    return Z, linear_cache


def softmax(Z):
    '''

    :param Z:the linear component of the activation function
    :return:
    '''
    nominator = np.exp(Z)
    denominator = (np.sum(np.exp(Z)))

    A = nominator/ denominator
    activation_cache = Z
    return A, activation_cache


def relu(Z):
    '''

    :param Z:the linear component of the activation function
    :return: A – the activations of the layer
    activation_cache – returns Z, which will be useful for the backpropagation

    '''

    A = np.maximum(Z, 0)
    activation_cache = Z

    return A, activation_cache

def	linear_activation_forward(A_prev, W, B, activation):
  Z_current, linear_cache = linear_forward(A_prev, W, B)

  if activation == 'softmax':
    A_current, activation_cache = softmax(Z_current)
  elif activation == 'relu':
    A_current, activation_cache = relu(Z_current)
  cache =  linear_cache.update(activation_cache)
  return A_current, cache



In [None]:
 nets = initialize_parameters([4, 6, 6, 10])
a = np.random.uniform(0, 255, (4,1))  # mnist image
print('a ',a)

z, linear_cache = linear_forward(a, nets.get('W1'), nets.get('b1'))
relu(z)

In [47]:
nets = initialize_parameters([2, 4, 10])
#a = np.random.uniform(0, 255, 4).reshape(-1, 1)  # mnist image
#a = np.random.uniform(0, 255, (4,1))  # mnist image
a = np.array([[1],[2]])
print(a.shape)

z, linear_cache = linear_forward(a, nets.get('W1'), nets.get('b1'))
print('z: ', z)
print('linear_cache: ', linear_cache)
#A_current, activation_cache = relu(Z_current)

#linear_activation_forward(a, nets.get('W1'), nets.get('b1'))

(2, 1)
z:  [[-2.86420238]
 [-4.82335325]
 [-4.45644025]
 [ 0.63992757]]
linear_cache:  {'A': array([[1],
       [2]]), 'W': array([[-1.18030014, -1.98778723, -2.61150807, -1.20035104],
       [-0.84195112, -1.41778301, -0.92246609,  0.92013931]]), 'b': array([[0.],
       [0.],
       [0.],
       [0.]])}
