# <span style="color:blue"> Convolutional layers by numpy </span>

In [1]:
import numpy as np
import matplotlib.pyplot as plt

# Zero padding

Write a function that pads an input $X$ with shape $(m,n_H,n_W,n_C)$ with zeros along the height and width axes (i.e., axes $1$ and $2$). Here, $m$, $n_H$, $n_W$, and $n_C$ denote number of training examples, height and width of the image, and number channels, respectively.

In [2]:
def zero_pad(X, p):
    """
    Input:
    X: data; X.shape = (m, n_H, n_W, n_C)
    p: padding; int
    
    Output:
    X_p: padded image; X_p.shape (m, n_H+2p, n_W+2p, n_C)
    """
    X_p = np.pad(X,((0,0),(p,p),(p,p),(0,0)),mode='constant',constant_values=0)
    
    return X_p

In [8]:
X = np.random.randn(1, 3, 3, 2)
print(X[0,:,:,0])
print(f"X.shape = {X.shape}")
X_p = zero_pad(X, 1)
print(X_p[0,:,:,0])
print(f"X_p.shape = {X_p.shape}")

[[ 1.25354314 -0.11923911 -0.13090589]
 [ 0.68255556 -1.30304607 -1.33926508]
 [-2.53665143 -0.07492347  0.12569815]]
X.shape = (1, 3, 3, 2)
[[ 0.          0.          0.          0.          0.        ]
 [ 0.          1.25354314 -0.11923911 -0.13090589  0.        ]
 [ 0.          0.68255556 -1.30304607 -1.33926508  0.        ]
 [ 0.         -2.53665143 -0.07492347  0.12569815  0.        ]
 [ 0.          0.          0.          0.          0.        ]]
X_p.shape = (1, 5, 5, 2)


# Single convolution
write a function that convolves a slice of the last layer activation with shape $(f,f,n_C^{\ell-1})$ by a filter/kernel $W$ with shape $(f,f,n_C^{\ell-1})$ and bias $b$ with shape $(1,1,1)$.

In [27]:
def conv_single(a_slice_prev, W, b):
    """
    Input:
    a_slice_prev: activation from previous layer. a_slice_prev.shape = (f, f, n_C_prev)
    W: filter weights; W.shape = (f, f, n_C_prev)
    b: bias parameter; b.shape = (1, 1, 1)
    
    Output:
    z: corresponding linear activation of the current layer; int
    """
    z = np.multiply(W,a_slice_prev).sum()+b.item()
    return z

In [28]:
a_slice_prev = np.random.randint(10,size=(3,3,2))
print(f"first channel of a:\n{a_slice_prev[:,:,0]}")
print(f"second channel of a:\n{a_slice_prev[:,:,1]}")
W = np.random.randint(2,size=(3,3,2))
print(f"first channel of kernel:\n{W[:,:,0]}")
print(f"second channel of kernel:\n{W[:,:,1]}")
z = conv_single(a_slice_prev, W, np.zeros((1,1,1)))
print(f"z = {z}")

first channel of a:
[[8 4 1]
 [9 4 7]
 [8 4 4]]
second channel of a:
[[1 6 6]
 [3 3 3]
 [3 4 6]]
first channel of kernel:
[[1 1 0]
 [0 1 1]
 [0 1 0]]
second channel of kernel:
[[0 0 0]
 [0 0 0]
 [0 1 1]]
z = 37.0


# Convolution layer
Now write a full conv layer.

In [34]:
def conv(A_prev, W, b, hyper_parameters):
    """
    Input:
    A_prev: activations of the previous layer; A_prev.shape = (m, n_H_prev, n_W_prev, n_C_prev)
    W: filter weights; W.shape = (f, f, n_C_prev, n_C)
    b: bias parameters; b.shape (1, 1, 1, n_C)
    hyper_parameters: pad and stride; parameters.shape = (2,)
        
    Output:
    z: corresponding linear activation of the current layer; z.shape = (m, n_H, n_W, n_C)
    """
    
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    (f, f, n_C_prev, n_C) = W.shape
    p = hyper_parameters["pad"]
    s = hyper_parameters["stride"]
    
    n_H, n_W = int((n_H_prev+2*p-f)/s+1), int((n_W_prev+2*p-f)/s+1)
    z = np.zeros((m, n_H, n_W, n_C))
    A_prev_pad = zero_pad(A_prev, p)
    for i in range(m):
        for c in range(n_C):
            for h in range(n_H):
                for w in range(n_W):
                    h_lo, w_lo = h*s, w*s # start (low) cell in A_prev_pad
                    h_hi, w_hi = h_lo+f, w_lo+f # end (hi) cell in A_prev_pad
                    a_slice_prev = A_prev_pad[i,h_lo:h_hi,w_lo:w_hi,:]
                    z[i,h,w,c] = conv_single(a_slice_prev, W[:,:,:,c], b[0,0,0,c])
    
    return z

# Pool layer

In [45]:
def pool(A_prev, hyper_parameters, mode = "max"):
    """
    Input:
    A_prev: activations of the previous layer; A_prev.shape = (m, n_H_prev, n_W_prev, n_C_prev)
    hyper_parameters: f and stride; parameters.shape = (2,)
    mode: pooling mode ("max" or "average")
    
    Output:
    A: output of the pool layer; A.shape = (m, n_H, n_W, n_C)
    """
    
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    f = hyper_parameters["f"]
    s = hyper_parameters["stride"]
    
    n_H = int((n_H_prev-f)/s+1)
    n_W = int((n_W_prev-f)/s+1)
    n_C = n_C_prev
    
    A = np.zeros((m, n_H, n_W, n_C))              
    
    for i in range(m):
        for c in range(n_C):
            for h in range(n_H):
                for w in range(n_W):
                    h_lo, w_lo = h*s, w*s
                    h_hi, w_hi = h_lo+f, w_lo+f
                    a_slice_prev = A_prev[i,h_lo:h_hi,w_lo:w_hi,c]
                    if mode == "max":
                        A[i,h,w,c] = np.max(a_slice_prev)
                    elif mode == "average":
                        A[i,h,w,c] = np.mean(a_slice_prev)
    
    return A