In [1]:
import numpy as np
seed=10
np.random.seed(seed)

In [2]:
n_H=32  #Height of image
n_W=32  #Width of image
n_C=1   #number of colour channels
f_H=3   #Height of filters
f_W=3   #Width of filters
n_f=4   #number of filters
lr=0.1  #learning_rate
n_F=10  #Number of neurons in final layer

pad_size = 1
stride = 2

#New height and width after
n_H2 = int(np.floor((n_H-f_H+2*pad_size)/stride)) + 1
n_W2 = int(np.floor((n_W-f_W+2*pad_size)/stride)) + 1

cache = dict()

In [3]:
def padding(X, pad_size,pad_variable=0):
    """
    Argument:
    X -- python numpy array of shape (m, n_H, n_W, n_C) representing a batch of m images
    pad_size -- integer, amount of padding around each image on vertical and horizontal dimensions
    pad_variable -- the number to pad with(default 0)
    
    Returns:
    X_pad -- padded image of shape (m, n_H + 2*pad, n_W + 2*pad,n_C)
    """
    
    X_pad = np.pad(X,((0,0),(pad_size,pad_size),(pad_size,pad_size),(0,0)),'constant',constant_values = pad_variable)
    
    return X_pad

def relu(x):
    """
    Implements ReLU activation funtion for array x
    """
    xf = (x>0).astype(np.int)
    return(np.multiply(xf,x))


In [4]:

def conv_single_step(X_slice,W, b):
    """
    Apply one filter defined by parameters W[] on a single slice (slice) from the image
    
    Arguments:
    X_slice -- slice of input data of shape (f_H, f_W, n_C)
    W -- Weight parameters contained in a window - matrix of shape (f_H, f_W, n_C, n_f)
    b -- Bias parameters contained in a window - matrix of shape (1, 1, 1, n_f)
    
    Returns:
    Z -- a scalar value, result of convolving the sliding window (W, b) on a slice x of the input data
    """
    s = np.multiply(X_slice,W)
    A = np.sum(s)
    A = A + float(b)
    Z = relu(A)
    return Z

In [5]:

def conv_forward(X,cache):
    """
    Implements the forward propagation for a convolution function
    
    Arguments:
    X -- Input image data, numpy array of shape (m, n_H, n_W, n_C)
    cache
    
    cache imports:
    W -- Weights, numpy array of shape (f_H, f_W,n_C,n_f)
    b -- Biases, numpy array of shape (1,1,1, n_f)
     
    cache saves:
    X_pad -- padded 
    Z -- conv output, numpy array of shape (m, n_H2, n_W2, n_f)
    R_Z -- bool which stores whether Z>0 or not, which is basically derivative of ReLU
    """
    
    W = cache['W']
    b = cache['b']
    m = X.shape[0]

    
    # Initialize the output volume Z with zeros. (≈1 line)
    Z = np.zeros((m,n_H2,n_W2,n_f))
    # Initialize R_Z
    R_Z =np.zeros((m,n_H2,n_W2,n_f))  
    # Create padded image
    X_pad = padding(X,1,0)
    
    for i in range(m):                               # loop over the batch of training examples
        Xc = X_pad[i]                               # Select ith training example's padded image
        for h in range(n_H2):                           # loop over vertical axis of the output volume
            for w in range(n_W2):                       # loop over horizontal axis of the output volume
                for c in range(n_f):                  # loop over channels (= #filters) of the output volume
                    
                    # Find the corners of the current "slice" (≈4 lines)
                    vert_start = h*stride
                    vert_end = vert_start+f_H
                    horiz_start = w*stride
                    horiz_end = horiz_start+f_W
                    
                    # Use the corners to define the (3D) slice of a_prev_pad (See Hint above the cell)
                    X_slice = Xc[vert_start:vert_end,horiz_start:horiz_end,:]
                    
                    # Convolve the (3D) slice with the correct filter W and bias b, to get back one output neuron
#                     print("******For c=",c," ******")
#                     print(X_slice.shape)
#                     print(W[:,:,:,c].shape)
#                     print(b[:,:,:,c].shape)
                    Z[i, h, w, c] = conv_single_step(X_slice,W[:,:,:,c],b[:,:,:,c])
                    R_Z[i, h, w, c] = int(Z[i, h, w ,c]>0)
                    
                                        
    assert(Z.shape == (m, n_H2, n_W2, n_f))
    # Save information in "cache" for the backprop
    cache['Z']=Z
    cache['X_pad']=X_pad
    cache['R_Z']=R_Z
    print(" Convolutional forward pass done")
    return cache

In [6]:
def fully_connected(cache):
    """
    Implements the fully connected layer
    
    Cache imports:
    Z -- output activations of the previous layer, numpy array of shape (m, n_H2, n_W2, n_f)
    W2 -- Weights, numpy array of shape (n_H2,n_W2,n_f,n_F)
    b2 -- Biases, numpy array of shape (1,1,1,n_F)
    
    cache saves:
    S -- Exponential sum(over all outputs) for every image of shape (m)
    Z2 -- Output from fully connected layer after softmax of shape(m,n_F)
        
    """
    
    W2=cache['W2']
    b2=cache['b2']
    Z=cache['Z']
    m = Z.shape[0] 
    #initializing output activations
    Z2 = np.zeros((m,n_F))
    
    S = np.zeros((m,1))
    for i in range(m):
        for c in range(n_f):
            for a in range(n_F):
                A = np.multiply(Z[i,:,:,c],W2[:,:,c,a])
                Z2[i,a] = np.exp(np.sum(A) + b2[:,:,:,a])
        s = np.sum(Z2[i,:])
        Z2[i,:]=Z2[i,:]/s
        S[i]=s
    cache['S']=S
    cache['Z2']=Z2
    print(" Fully connected forward pass done")
    return(cache)
            

In [7]:
def back_prop(cache,Y):
    """
    Updates the weights using simple backpropagation
    
    Arguments:
    E -- error output, numpy array of shape (m,n_F)
    Y -- Binary actual output
    
    
    """
    #Import all saved data from cache
    S = cache['S']
    X_pad = cache['X_pad']
    W2 = cache['W2']
    R_Z = cache['R_Z']
    Z = cache['Z']
    W = cache['W']
    b = cache['b']
    W2 = cache['W2']
    b2 = cache['b2']
    m = Z.shape[0]
    
    #Initialize all weight and bias updates
    dW2 = np.zeros((n_H2,n_W2,n_f,n_F))
    db2 = np.zeros((1,1,1,n_F))
    dW = np.zeros((f_H,f_W,n_C,n_f))
    db = np.zeros((1,1,1,n_f))
    
    
    for a in range(n_F):
        for i in range(m): #Loopover images
            Xc = X_pad[i]
            dW2[:,:,:,a] += (-lr/m) * (Z[i,:,:,:]) * (Y[i,a]) * (S[i])
            db2[:,:,:,a] += (-lr/m) * (Y[i,a]) * (S[i])
            
            for c in range(n_f): #Loop over
                for h in range(n_H2):
                    for w in range(n_W2):
                         
                            
                        # Find the corners of the current "slice" (≈4 lines)
                        vert_start = h*stride
                        vert_end = vert_start+f_H
                        horiz_start = w*stride
                        horiz_end = horiz_start+f_W
                        X_slice = Xc[vert_start:vert_end,horiz_start:horiz_end,:]
                        
                        dW[:,:,:,c] += (-lr/m) * Y[i,a] * S[i] * W2[h,w,c,a] * R_Z[i,h,w,c] * X_slice
                        db[:,:,:,c] += (-lr/m) * Y[i,a] * S[i] * W2[h,w,c,a] * R_Z[i,h,w,c]
                        
    W2 += dW2
    b2 += db2
    W += dW
    b += db
    cache['W']=W
    cache['b']=b
    cache['W2']=W2
    cache['b2']=b2
    print("Backprop done, parameters updated")
    return cache
            
                               
            
            

In [8]:
def cross_entropy_loss(Z2,Y):
    """
    Gets total cross entropy loss for binary output Y
    
    Arguments:
    Z2 -- fully connected output, numpy array of shape (m, n_F)
    Y -- Binary actual output, numpy array of shape (m,n_F)
    
    Returns:
    E -- error output
    """
    E=0 
    for a in range(n_F):
        e=0
        for i in range(m):
            e += - Y[i,a]*np.log(Z2[i,a])
        e=e/m
        E+=e
    return(E)

In [None]:
def forward_pass(X,cache,Y):
    cache = conv_forward(X,cache)
    cache = fully_connected(cache)
    print(" Cross entropy loss is:",cross_entropy_loss(cache['Z2'],Y))
    cache = back_prop(cache,Y)

In [None]:
#Setting number of images
m = 100 

#random input data
X = np.random.randn(m,n_H,n_W,n_C)

#random output data
Y = np.zeros((m,n_F)) 
Y[0:50,0:5]=np.ones((50,5))
Y[0:50,5:]=np.ones((50,5))

cache['W'] = np.random.randn(f_H, f_W,n_C,n_f)
cache['b'] = np.random.randn(1,1,1,n_f)
cache['W2'] = np.random.randn(n_H2,n_W2,n_f,n_F)
cache['b2'] = np.random.randn(1,1,1,n_F)

epochs = 10
for e in range(epochs):
    forward_pass(X,cache,Y)
    


 Convolutional forward pass done
 Fully connected forward pass done
 Cross entropy loss is: 247.177687876
Backprop done, parameters updated
 Convolutional forward pass done
 Fully connected forward pass done
 Cross entropy loss is: nan


