In [72]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline

In [73]:
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)

train_data = mnist.train.images
train_y = mnist.train.labels
test_data  = mnist.test.images
test_y = mnist.test.labels    

#reshape the train_data and test_data
def reshape_data(train_data, test_data):
    
    train_x = (train_data>0).reshape(55000,28,28,1).astype(np.uint8)*255
    test_x  = (test_data>0).reshape(10000,28,28,1).astype(np.uint8)*255

    return train_x, test_x

# split train_data into train_x and validation_x
train_x, test_x = reshape_data(train_data, test_data)
validation_x   = train_x[:5000, ...]
validation_y = train_y[:5000]
train_x = train_x[5000:, ...]
train_y = train_y[5000:]

print("Shape of Train_x:", train_x.shape)
print("Shape of Train_y:", train_y.shape)
print(".......................................")
print("Shape of Validation_x:", validation_x.shape)
print("Shape of Validation_y:", validation_y.shape)
print(".......................................")
print("Shape of Test_x:", test_x.shape)
print("Shape of Test_y:",test_y.shape )

Extracting MNIST_data\train-images-idx3-ubyte.gz
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz
Shape of Train_x: (50000, 28, 28, 1)
Shape of Train_y: (50000, 10)
.......................................
Shape of Validation_x: (5000, 28, 28, 1)
Shape of Validation_y: (5000, 10)
.......................................
Shape of Test_x: (10000, 28, 28, 1)
Shape of Test_y: (10000, 10)


In [58]:
def zero_pad(X, pad):
    
    X_pad = np.pad(X, ((0,0), (pad,pad), (pad,pad),(0,0)),'constant')
    
    return X_pad

def conv_single_step(a_slice_prev, W, b):
    
    s = a_slice_prev*W
    Z = np.sum(s)
    Z = Z + float(b)

    return Z


def conv_forward(A_prev, W, b, hparameters):
     
    (m, n_H_prev, n_W_prev, n_C_prev) = np.shape(A_prev)
    
    # Retrieve dimensions from W's shape
    (f, f, n_C_prev, n_C) = np.shape(W)
    
    # Retrieve information from "hparameters"
    stride = hparameters["stride"]
    pad = hparameters["pad"]
    
    # Compute the dimensions of the CONV output volume using the formula given above
    n_H = int((n_H_prev - f + 2*pad)/stride + 1)
    n_W = int((n_W_prev - f + 2*pad)/stride + 1)
    
    # Initialize the output volume Z with zeros.
    Z = np.zeros((m, n_H, n_W, n_C))
    
    # Create A_prev_pad by padding A_prev
    A_prev_pad = zero_pad(A_prev, pad)
    
    for i in range(m):                               # loop over the batch of training examples
        a_prev_pad = A_prev_pad[i]                  # Select ith training example's padded activation
        for h in range(n_H):                           # loop over vertical axis of the output volume
            for w in range(n_W):                       # loop over horizontal axis of the output volume
                for c in range(n_C):                   # loop over channels (= #filters) of the output volume
                    
                    # Find the corners of the current "slice" 
                    vert_start = h*stride
                    vert_end = h*stride+f
                    horiz_start = w*stride
                    horiz_end = w*stride+f
                    
                    # Use the corners to define the (3D) slice of a_prev_pad
                    a_slice_prev = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
                    
                    # Convolve the (3D) slice with the correct filter W and bias b, to get back one output neuron
                    Z[i, h, w, c] = conv_single_step(a_slice_prev, W[:,:,:,c], float(b[:,:,:,c]))
                                        
    
    # Save information in "cache" for the backprop
    cache = (A_prev, W, b, hparameters)
    
    return Z, cache

In [64]:
#Run conv_forward
A_prev = train_x[0:10]
W = np.random.randn(5,5,1,10)
b = np.random.randn(1,1,1,10)
hparameters = {"pad" : 2,
               "stride": 2}

Z, cache_conv = conv_forward(A_prev, W, b, hparameters)

print("Z's mean =", np.mean(Z))
print("Z[3,2,1] =", Z[3,2,1])
print("cache_conv[0][1][2][3] =", cache_conv[0][1][2][3])

Z's mean = -54.267488889652185
Z[3,2,1] = [ 0.30182753 -0.46297576  0.2171394   1.60236826  0.03968618 -0.01280611
 -1.87198718 -0.46090325 -1.83461454  0.39634688]
cache_conv[0][1][2][3] = [0]


In [65]:
def pool_forward(A_prev, hparameters, mode = "max"):
    
    
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    f = hparameters["f"]
    stride = hparameters["stride"]
    n_H = int(1 + (n_H_prev - f) / stride)
    n_W = int(1 + (n_W_prev - f) / stride)
    n_C = n_C_prev
    A = np.zeros((m, n_H, n_W, n_C))              
    
    
    for i in range(m):                           # loop over the training examples
        for h in range(n_H):                     # loop on the vertical axis of the output volume
            for w in range(n_W):                 # loop on the horizontal axis of the output volume
                for c in range (n_C):            # loop over the channels of the output volume
                    
                    # Find the corners of the current "slice"
                    vert_start = h*stride
                    vert_end = h*stride + f
                    horiz_start = w*stride
                    horiz_end = w*stride + f
                    
                    # Use the corners to define the current slice on the ith training example of A_prev
                    a_prev_slice = A_prev[i, vert_start:vert_end, horiz_start:horiz_end, c]
                    
                    # Compute the pooling operation on the slice
                    if mode == "max":
                        A[i, h, w, c] = np.max(a_prev_slice)
                    elif mode == "average":
                        A[i, h, w, c] = np.mean(a_prev_slice)
    
  
    
    # Store the input and hparameters in "cache" for pool_backward()
    cache = (A_prev, hparameters)
    
   
    
    return A, cache

In [67]:
#Run forward pooling
A_prev = train_x[0:10]
hparameters = {"stride" : 2, "f": 3}

A, cache = pool_forward(A_prev, hparameters)
print("mode = max")
#print("A =", A)

A, cache = pool_forward(A_prev, hparameters, mode = "average")
print("mode = average")
#print("A =", A)

mode = max
mode = average


In [68]:
def conv_backward(dZ, cache):
    
    # Retrieve information from "cache"
    (A_prev, W, b, hparameters) = cache
    
    # Retrieve dimensions from A_prev's shape
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    
    # Retrieve dimensions from W's shape
    (f, f, n_C_prev, n_C) = W.shape
    
    # Retrieve information from "hparameters"
    stride = hparameters["stride"]
    pad = hparameters["pad"]
    
    # Retrieve dimensions from dZ's shape
    (m, n_H, n_W, n_C) = dZ.shape
    
    # Initialize dA_prev, dW, db with the correct shapes
    dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))                           
    dW = np.zeros((f, f, n_C_prev, n_C))
    db = np.zeros((1, 1, 1, n_C))

    # Pad A_prev and dA_prev
    A_prev_pad = zero_pad(A_prev,pad)
    dA_prev_pad = zero_pad(dA_prev,pad)
    
    for i in range(m):                       # loop over the training examples
        
        # select ith training example from A_prev_pad and dA_prev_pad
        a_prev_pad = A_prev_pad[i, :, :, :]
        da_prev_pad = dA_prev_pad[i, :, :, :]
        
        for h in range(n_H):                   # loop over vertical axis of the output volume
            for w in range(n_W):               # loop over horizontal axis of the output volume
                for c in range(n_C):           # loop over the channels of the output volume
                    
                    # Find the corners of the current "slice"
                    vert_start = h*stride
                    vert_end = vert_start + f
                    horiz_start = w*stride
                    horiz_end = horiz_start + f
                    
                    # Use the corners to define the slice from a_prev_pad
                    a_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]

                    # Update gradients for the window and the filter's parameters using the code formulas given above
                    da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] +=  W[:,:,:,c] * dZ[i, h, w, c]
                    dW[:,:,:,c] += a_slice * dZ[i, h, w, c]
                    db[:,:,:,c] += dZ[i, h, w, c]
                    
        # Set the ith training example's dA_prev to the unpaded da_prev_pad
        dA_prev[i, :, :, :] = da_prev_pad[pad:-pad, pad:-pad, :]
    
    # Making sure your output shape is correct
    assert(dA_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))
    
    return dA_prev, dW, db

In [69]:
dA, dW, db = conv_backward(Z, cache_conv)
print("dA_mean =", np.mean(dA))
print("dW_mean =", np.mean(dW))
print("db_mean =", np.mean(db))

dA_mean = 1198.6808962270513
dW_mean = -13471979.235181382
db_mean = -106364.27822371628


In [70]:
def create_mask_from_window(x):
    
    mask = (x == np.max(x))
    
    return mask

In [71]:
x = np.random.randn(2,3)
mask = create_mask_from_window(x)
print('x = ', x)
print("mask = ", mask)

x =  [[ 1.62434536 -0.61175641 -0.52817175]
 [-1.07296862  0.86540763 -2.3015387 ]]
mask =  [[ True False False]
 [False False False]]


In [47]:
def distribute_value(dz, shape):
    
    # Retrieve dimensions from shape 
    (n_H, n_W) = shape
    
    # Compute the value to distribute on the matrix
    average = dz/(n_H*n_W)
    
    # Create a matrix where every entry is the "average" value 
    a = np.ones((n_H,n_W))*average
    
    return a

In [48]:
a = distribute_value(2, (2,2))
print('distributed value =', a)

distributed value = [[0.5 0.5]
 [0.5 0.5]]


In [49]:
def pool_backward(dA, cache, mode = "max"):
        
    # Retrieve information from cache
    (A_prev, hparameters) = cache
    
    # Retrieve hyperparameters from "hparameters"
    stride = hparameters["stride"]
    f = hparameters["f"]
    
    # Retrieve dimensions from A_prev's shape and dA's shape
    m, n_H_prev, n_W_prev, n_C_prev = np.shape(A_prev)
    m, n_H, n_W, n_C = np.shape(dA)
    
    # Initialize dA_prev with zeros
    dA_prev = np.zeros(np.shape(A_prev))
    
    for i in range(m):                       # loop over the training examples
        
        # select training example from A_prev
        a_prev = A_prev[i, :, :, :]
        
        for h in range(n_H):                   # loop on the vertical axis
            for w in range(n_W):               # loop on the horizontal axis
                for c in range(n_C):           # loop over the channels (depth)
                    
                    # Find the corners of the current "slice"
                    vert_start = h*stride
                    vert_end = vert_start + f
                    horiz_start = w*stride
                    horiz_end = horiz_start + f
                    
                    # Compute the backward propagation in both modes.
                    if mode == "max":
                        
                        # Use the corners and "c" to define the current slice from a_prev 
                        a_prev_slice = a_prev[vert_start:vert_end, horiz_start:horiz_end, c]
                        # Create the mask from a_prev_slice (≈1 line)
                        mask = create_mask_from_window(a_prev_slice)
                        # Set dA_prev to be dA_prev + (the mask multiplied by the correct entry of dA) 
                        dA_prev[i, vert_start: vert_end, horiz_start: horiz_end, c] += mask*dA[i, h*stride, w*stride, c]
                        
                    elif mode == "average":
                        
                        # Get the value a from dA
                        da = distribute_value(dA,(f,f))
                        # Define the shape of the filter as fxf
                        shape = (f,f)
                        # Distribute it to get the correct slice of dA_prev. i.e. Add the distributed value of da. 
                        dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += da[i, vert_start:vert_end, horiz_start:horiz_end, c]
                        
    # Making sure your output shape is correct
    assert(dA_prev.shape == A_prev.shape)
    
    return dA_prev

In [53]:
A_pre = train_x[0:50]
hparameters = {"stride" : 1, "f": 2}
A, cache = pool_forward(A_prev, hparameters)
dA = np.random.randn(5, 28, 28, 1)

dA_prev = pool_backward(dA, cache, mode = "max")
print("mode = max")
print('mean of dA = ', np.mean(dA))
print('dA_prev[1,1] = ', dA_prev[1,1])  
print()
#dA_prev = pool_backward(dA, cache, mode = "average")
#print("mode = average")
#print('mean of dA = ', np.mean(dA))
#print('dA_prev[1,1] = ', dA_prev[1,1]) 

mode = max
mean of dA =  0.013676820545002506
dA_prev[1,1] =  [[ 0.20323943]
 [ 0.20843787]
 [ 1.02052687]
 [ 0.95248445]
 [-0.83611591]
 [-1.25023392]
 [ 0.3479425 ]
 [ 0.9962105 ]
 [ 0.81944622]
 [ 2.70618512]
 [ 1.6319785 ]
 [-0.30530301]
 [-0.03323547]
 [-1.61915887]
 [-2.37481559]
 [-0.1170443 ]
 [ 1.88423947]
 [ 0.32058087]
 [-2.73033102]
 [-2.91917979]
 [-0.4946884 ]
 [ 0.25873012]
 [-1.19970265]
 [-5.72393947]
 [-2.76620267]
 [ 1.95711663]
 [-4.18259764]
 [-5.47305323]]

