# Building components of the convolutional model

In [1]:
# import packages
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def zero_pad(X, pad):
    """
    Applies zero padding to the input X
    
    Arguments:
    X -- input
    pad -- padding size
    
    Returns:
    X_pad -- padded input
    """
    
    X_pad = np.pad(X, ((0, 0), (pad, pad), (pad, pad), (0, 0)))
    
    return X_pad

In [3]:
# Sanity check
x = np.random.randn(4, 3, 3, 2)
x_padded = zero_pad(X=x, pad=1)
print(x.shape, x_padded.shape)

(4, 3, 3, 2) (4, 5, 5, 2)


In [4]:
def conv_single_step(a_slice_prev, W, b):
    """
    Apply a single filter defined by parameters W on a single slice of the output activation of the previous layer.
    
    Arguments:
    a_slice_prev -- slice of input data
    W -- Weight parameters contained in a window 
    b -- Bias parameters contained in a window
    
    Returns:
    Z -- Output of Convolution operation
    """
    
    s = np.multiply(a_slice_prev, W)
    Z = np.sum(s)
    b = np.squeeze(b)
    Z = Z + b
    
    return Z

In [5]:
# Sanity check
np.random.seed(1)
a_slice_prev = np.random.randn(4, 4, 3)
W = np.random.randn(4, 4, 3)
b = np.random.randn(1, 1, 1)

Z = conv_single_step(a_slice_prev, W, b)
print(Z)

-6.999089450680221


In [6]:
def conv_forward(A_prev, W, b, h_p):
    """
    Arguements:
    A_prev -- Previous activation layer or input X
    W -- Weights
    b -- biases
    h_p -- hyperparameters (stride and pad)
    
    Returns:
    Z, cache -- Convolved activation and cache for back propagation
    """
    
    # Retrieve the shape of the previous activation 
    (m, n_HP, n_WP, n_CP) = A_prev.shape
    
    # Retrieve the shape of the kernel size
    (f, f, n_CP, n_C) = W.shape
    
    # Retrieve the values for stride and padding from h_p dictionary
    stride  = h_p['stride']
    pad = h_p['pad']
    
    # Convolution output size calculation
    n_H = int((n_HP + 2*pad - f)/stride) + 1
    n_W = int((n_WP + 2*pad - f)/stride) + 1
    
    # Initialize output Z with zeros
    Z = np.zeros((m, n_H, n_W, n_C))
    
    # Pad previous activation layer
    A_prev_pad = zero_pad(A_prev, pad)
    
    for i in range(m):
        
        # Takes the entire i'th example
        a_prev_pad = A_prev_pad[i]
        
        for h in range(n_H):
            # Generate windows for the slices of the activation/input X
            vert_start = stride * h
            vert_end = vert_start + f
            
            for w in range(n_W):
                # Generate windows for the slices of the activation/input X
                horiz_start = stride * w
                horiz_end = horiz_start + f
                
                for c in range(n_C):
                    
                    # Take the slice with all of its channels
                    a_slice_prev = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
                    
                    # Take the weights and biases of the kernel/filter for each respective channel
                    weights = W[:, :, :, c]
                    biases = b[:, :, :, c]
                    
                    # Perform the convolution operation
                    Z[i, h, w, c] = conv_single_step(a_slice_prev, weights, biases)
                    
    cache = (A_prev, W, b, h_p)
    
    return Z, cache
                    

In [7]:
# Sanity Check
np.random.seed(1)
h_p = {'stride':2, 'pad':1}
A_prev = np.random.randn(2, 5, 7, 4)
W = np.random.randn(3, 3, 4, 8)
# 8 values for the bias parameters as there are 8 filters
b = np.random.randn(1, 1, 1, 8)
Z, cache_conv = conv_forward(A_prev, W, b, h_p)
np.mean(Z)

0.5511276474566768

In [8]:
def pool_forward(A_prev, h_p, mode = 'max'):
    """
    Arguements:
    A_prev -- Previous activation layer or input X
    h_p -- hyperparameters (stride and filter_size)
    
    Returns:
    A, cache -- Pooled activation and cache for back propagation
    """
    
    (m, n_HP, n_WP, n_CP) = A_prev.shape
    
    f = h_p['f']
    stride = h_p['stride']
    
    n_H = int(1 + (n_HP - f) / stride)
    n_W = int(1 + (n_WP - f) / stride)
    n_C = n_CP
    
    A = np.zeros((m, n_H, n_W, n_C))
    
    for i in range(m):
        a_prev_slice = A_prev[i]
        
        for h in range(n_H):
            vert_start = stride * h
            vert_end = vert_start + f
            
            for w in range(n_W):
                horiz_start = stride * w
                horiz_end = horiz_start + f
                
                for c in range(n_C):
                    
                    a_slice_prev = a_prev_slice[vert_start:vert_end, horiz_start:horiz_end, c]
                    
                    if mode == 'max':
                        A[i, h, w, c] = np.max(a_slice_prev)
                    elif mode == 'average':
                        A[i, h, w, c] = np.mean(a_slice_prev)
                    else:
                        print(mode + "-type pooling layer not Defined")
    
    cache = (A_prev, h_p)
    
    assert(A.shape == (m, n_H, n_W, n_C))
    
    return A, cache

In [9]:
# Sanity check
np.random.seed(1)
A_prev = np.random.randn(2, 5, 5, 3)
h_p = {'stride': 1, 'f': 3}

A, cache = pool_forward(A_prev, h_p, mode = 'max')
print(A.shape, A[1, 1])

A, cache = pool_forward(A_prev, h_p, mode = 'average')
print(A.shape, A[1, 1])

(2, 3, 3, 3) [[1.96710175 0.84616065 1.27375593]
 [1.96710175 0.84616065 1.23616403]
 [1.62765075 1.12141771 1.2245077 ]]
(2, 3, 3, 3) [[ 0.44497696 -0.00261695 -0.31040307]
 [ 0.50811474 -0.23493734 -0.23961183]
 [ 0.11872677  0.17255229 -0.22112197]]
