# CNN using Numpy

In [6]:
import numpy as np

### Convolution functions, including:
    - Zero Padding
    - Convolve window 
    - Convolution forward
    - Convolution backward
### Pooling functions, including:
    - Pooling forward
    - Create mask 
    - Distribute value
    - Pooling backward
### Input data:
    - Number of samples : m
    - Image height : n_h
    - Image width : n_w
    - Number of channels : n_c ( 3 for rgb )

## Convolution

In [7]:
def zero_padding(X, pad):
    '''
    Arguments:
    X -- numpy array of shape (m, n_h, n_w, n_c) containing input images
    pad -- amount of padding on the image
    
    Returns:
    X_pad -- numpy array of shape (m, n_h + 2*pad, n_w + 2*pad, n_c) containing padded images
    '''
    X_pad = np.pad(X, ((0,0), (pad,pad), (pad,pad), (0,0)), mode = 'constant', constant_values = (0,0))
    return X_pad

In [8]:
def conv_step(Window, W, b):
    '''
    Arguments:
    Window -- current window slice of the input image of shape (f, f, n_c_prev)
    W -- weight matrix of shape (f, f, n_c_prev) where f is the filter size
    b -- bias matrix of shape (1, 1, 1) where f is the filter size
    
    Returns:
    Z -- convoluted output
    '''
    Z = np.multiply(Window, W)
    Z = np.sum(Z)
    Z = Z + float(b)
    return Z

In [9]:
def conv_forward(A_prev, W, b, hyper_params):
    '''
    Arguments:
    A_prev -- post-activation output of previous layer of shape (m, n_h_prev, n_w_prev, n_c_prev)
    W -- weight matrix of shape (f, f, n_c_prev, n_c) where f is the filter size
    b -- bias matrix of shape (1, 1, 1, n_c) where f is the filter size
    hyper_params -- python dictionary containing strides and padding paramters
    
    Returns:
    Z -- convoluted output of shape (m, n_h, n_w, n_c)
    cache -- values required for backward propagation (A_prev, W, b, hyper_params)
    '''
    (m, n_h_prev, n_w_prev, n_c_prev) = A_prev.shape
    (f, f, n_c_prev, n_c) = W.shape
    
    s = hyper_params['stride']
    p = hyper_params['pad']
    
    n_h = int((n_h_prev + 2*p - f)/s) + 1
    n_w = int((n_w_prev + 2*p - f)/s) + 1
    
    Z = np.zeros((m, n_h, n_w, n_c))
    A_prev_pad = zero_padding(A_prev,p)
    
    for i in range(m):
        curr_a_prev = A_prev_pad[i]
        for h in range(n_h):
            start_h = h*s
            end_h = h*s + f
            for w in range(n_w):
                start_w = w*s
                end_w = w*s + f
                for c in range(n_c):
                    Z[i,h,w,c] = conv_step(curr_a_prev[start_h:end_h,start_w:end_w,:], W[:,:,:,c], b[:,:,:,c])
    cache = (A_prev, W, b, hyper_params)
    return Z, cache

In [10]:
def pool_forward(A_prev, hyper_params, mode = 'max'):
    '''
    Arguments:
    A_prev -- post-activation output of previous layer of shape (m, n_h_prev, n_w_prev, n_c_prev)
    hyper_params -- python dictionary containing strides and padding paramters
    mode -- type of the pooling function (max or average)
    
    Returns:
    A -- pooled output of shape (m, n_h, n_w, n_c)
    cache -- values required for backward propagation (A_prev, hyper_params)
    '''
    (m, n_h_prev, n_w_prev, n_c_prev) = A_prev.shape
    
    s = hyper_params['stride']
    f = hyper_params['f']
    
    n_h = int((n_h_prev - f)/s) + 1
    n_w = int((n_w_prev - f)/s) + 1
    n_c = n_c_prev
    A = np.zeros((m, n_h, n_w, n_c))
    
    for i in range(m):
        curr_a_prev = A_prev[i]
        for h in range(n_h):
            start_h = h*s
            end_h = h*s + f
            for w in range(n_w):
                start_w = w*s
                end_w = w*s + f
                for c in range(n_c):
                    curr_window = curr_a_prev[start_h:end_h,start_w:end_w,c]
                    
                    if mode == 'max':
                        A[i,h,w,c] = np.max(curr_window)
                    elif mode == 'average':
                        A[i,h,w,c] = np.mean(curr_window)
    cache = (A_prev, hyper_params)
    return A, cache

In [11]:
def conv_backward(dZ, cache):
    '''
    Arguments:
    dZ -- gradient of cost with respect to the output of the layer (Z) of shape (m, n_h, n_w, n_c)
    cache -- cache of values needed for the conv_backward(), output of conv_forward()
    
    Returns:
    dA_prev -- gradient of the cost with respect to the input of the conv layer (A_prev),
               numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    dW -- gradient of the cost with respect to the weights of the conv layer (W)
          numpy array of shape (f, f, n_C_prev, n_C)
    db -- gradient of the cost with respect to the biases of the conv layer (b)
          numpy array of shape (1, 1, 1, n_C)
    '''
    (m, n_h, n_w, n_c) = dZ.shape
    (A_prev, W, b, hyper_params) = cache
    (m, n_h_prev, n_w_prev, n_c_prev) = A_prev.shape
    (f, f, n_c_prev, n_c) = W.shape
    
    s = hyper_params['stride']
    p = hyper_params['pad']
    
    dA_prev = np.zeros((m, n_h_prev, n_w_prev, n_c_prev))
    dW = np.zeros((f, f, n_c_prev, n_c))
    db = np.zeros((1, 1, 1, n_c))
    
    A_prev_pad = zero_padding(A_prev, p)
    dA_prev_pad = zero_padding(dA_prev, p)
    
    for i in range(m):
        a_prev_pad = A_prev_pad[i]
        da_prev_pad = dA_prev_pad[i]
        for h in range(n_h):
            start_h = h*s
            end_h = h*s + f
            for w in range(n_w):
                start_w = w*s
                end_w = w*s + f
                for c in range(n_c):
                    a_slice = a_prev_pad[start_h:end_h, start_w:end_w, :]
                    da_prev_pad[start_h:end_h, start_w:end_w, :] += W[:, :, :, c]*dZ[i, h, w, c]
                    dW[:, :, :, c] += a_slice*dZ[i, h, w, c]
                    db[:, :, :, c] += dZ[i, h, w, c]
        dA_prev[i, :, :, :] = da_prev_pad[p:-p, p:-p, :]
    return dA_prev, dW, db

In [12]:
def create_mask(X):
    '''
    Arguments:
    X -- Array of shape (f, f)
    
    Returns:
    mask -- Array of the same shape as window, contains a True at the position corresponding to the max entry of x.
    '''
    mask = (X == np.max(X))
    return mask

In [17]:
def distribute_value(dZ, shape):
    '''
    Arguments:
    dZ -- input scalar
    shape -- the shape (n_h, n_w) of the output matrix for which we want to distribute the value of dZ
    
    Returns:
    A -- Array of size (n_h, n_w) for which we distributed the value of dZ
    '''
    (n_h, n_w) = shape
    A = np.ones((n_h, n_w))*(dZ/(n_h*n_w))
    return A

In [19]:
def pool_backward(dA, cache, mode = 'max'):
    '''
    Arguments:
    dA -- gradient of cost with respect to the output of the pooling layer, same shape as A 
    cache -- cache output from the forward pass of the pooling layer, contains the layer's input and hparameters 
    mode -- pooling mode defined as a string ('max' or 'average')
    
    Returns:
    dA_prev -- gradient of cost with respect to the input of the pooling layer, same shape as A_prev
    '''
    A_prev, hyper_params = cache
    m, n_h, n_w, n_c = dA.shape
    m, n_h_prev, n_w_prev, n_c_prev = A_prev.shape
    
    s = hyper_params['stride']
    f = hyper_params['f']
    
    dA_prev = np.zeros((m, n_h_prev, n_w_prev, n_c_prev))
    
    for i in range(m):
        a_prev = A_prev[i]
        for h in range(n_h):
            start_h = h*s
            end_h = h*s + f
            for w in range(n_w):
                start_w = w*s
                end_w = w*s + f
                for c in range(n_c):
                    da = dA[i, h, w, c]
                    if mode == 'max':
                        mask = create_mask(A_prev[i, start_h:end_h, start_w:end_w, c])
                        dA_prev[i, start_h:end_h, start_w:end_w, c] += np.multiply(mask, da)
                    elif mode == 'average':
                        dA_prev[i, start_h:end_h, start_w:end_w, c] += distribute_value(da, (f,f))
    return dA_prev