In [1]:
import numpy as np

In [13]:
def conv_step(x_slice, w,b):
    s = np.multiply(x_slice,w)
    Z = np.sum(s)
    Z = Z + float(b)
    return Z

def conv_forward(X, weight, b, hparams):
    """
    Implements forward pass for a convolution layer

    X : input to the current layer (n,H,W,Cin)
    W : weights (f, f, Cin, Cout)
    b : bias (1, 1, 1, Cout)
    hparams : stride, pad

    returns:
    Z - output (n, Ho, Wo, Cout)
    cache - values needed for backward
    """

    # extract params
    n,H,W,Cin = X.shape
    (f, f, Cin, Cout) = weight.shape
    stride = hparams['stride']
    pad = hparams['pad']

    # compute Ho, Wo
    Ho = int((H + 2 * pad - f) /stride) +1
    Wo = int((W + 2 * pad - f) /stride) +1
    #init output
    Z = np.zeros((n, Ho, Wo, Cout))
    X_pad = np.pad(X, ((0,0), (pad,pad), (pad,pad), (0,0)), 'constant', constant_values=0)

    for i in range(n): #batch
        x_curr = X_pad[i] #pick the current input
        for h in range(Ho): # height
            for w in range(Wo): #width
                for c in range(Cout): #channel
                    ##slice data
                    vert_start = h * stride
                    vert_end = vert_start + f
                    hor_start = w* stride
                    hor_end = hor_start + f
                    x_curr_slice = x_curr[vert_start: vert_end, hor_start:hor_end, :]
                    Z[i,h,w,c] = conv_step(x_curr_slice, weight[:,:,:,c], b[:,:,:,c])
    assert(Z.shape == (n, Ho, Wo, Cout))
    cache = (X, weight, b, hparams)

    return Z, cache
                    

#dZ= dL/dO = loss gradient from the previous layer
# dL/dF = dL/dO * X
# dL/dX = dL/dO * 180 deg rotated F

def conv_backward(dZ, cache):

    ##extract params
    (X, weight, b, hparams) = cache
    n,H,W,Cin = X.shape
    (f, f, Cin, Cout) = weight.shape
    stride = hparams['stride']
    pad = hparams['pad']
    (n, Ho, Wo, Cout) = dZ.shape

    #initialize derivatives

    dW = np.zeros((f,f,Cin,Cout))
    dX = np.zeros((n,H,W,Cin ))
    dB = np.zeros((1,1,1,Cout))

    # pad
    X_pad = np.pad(X, ((0,0), (pad,pad), (pad,pad), (0,0)), 'constant', constant_values=0)
    dX_pad = np.pad(dX, ((0,0), (pad,pad), (pad,pad), (0,0)), 'constant', constant_values=0)


    for i in range(n):#batch
        x_curr = X_pad[i]
        dx_curr = dX_pad[i]

        for h in range(Ho):
            for w in range(Wo):
                for c in range(Cout):
                    vert_start = h*stride
                    vert_end = vert_start + f
                    hor_start = w * stride
                    hor_end = hor_start+ f
                    
                    x_curr_slice = x_curr[vert_start: vert_end, hor_start:hor_end:,:]

                    #update gradient
                    dx_curr[vert_start: vert_end, hor_start:hor_end:,:]+=weight[:,:,:,c] * dZ[i,h,w,c]
                    dW[:,:,:,c] +=x_curr_slice * dZ[i,h,w,c]
                    dB[:,:,:,c] += dZ[i,h,w,c]
        dX[i,:,:,:] = dx_curr[pad:-pad, pad:-pad,:]

    assert(dX.shape == (n, H,W, Cin))

    return dX, dW, dB
    

In [14]:
np.random.seed(1)
A_prev = np.random.randn(10,4,4,3)
W = np.random.randn(2,2,3,8)
b = np.random.randn(1,1,1,8)
hparameters = {"pad" : 2,
               "stride": 2}

Z, cache_conv = conv_forward(A_prev, W, b, hparameters)
print("Z's mean =", np.mean(Z))
print("Z[3,2,1] =", Z[3,2,1])
print("cache_conv[0][1][2][3] =", cache_conv[0][1][2][3])

Z's mean = 0.048995203528855794
Z[3,2,1] = [-0.61490741 -6.7439236  -2.55153897  1.75698377  3.56208902  0.53036437
  5.18531798  8.75898442]
cache_conv[0][1][2][3] = [-0.20075807  0.18656139  0.41005165]


  Z = Z + float(b)


In [15]:
np.random.seed(1)
dA, dW, db = conv_backward(Z, cache_conv)
print("dA_mean =", np.mean(dA))
print("dW_mean =", np.mean(dW))
print("db_mean =", np.mean(db))

dA_mean = 1.4524377775388075
dW_mean = 1.7269914583139097
db_mean = 7.839232564616838
