In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import math
import time

In [2]:
def one_hot_encoding(y, m):

    encoded_matrix = np.zeros((m, 6))

    for i in range(m):
        encoded_matrix[i, y[i]] = 1.0
      
    return encoded_matrix

In [3]:
# activation functions & their derivatives
def sigmoid(x):
    s = 1.0/(1.0 + np.exp(-x))
    return s


def relu(z):
    r = np.maximum(0, z)
    return r

In [4]:
def loss(a, y):
    x, m = a.shape

    loss = -(1.0/m) * np.sum(np.multiply(y, np.log(a)) + np.multiply(1.0 - y, np.log(1 - a)))   
    return loss

In [5]:
def pool_forward(a, f, stride, pool_type):
    (m, nh_prev, nw_prev, nc_prev) = a.shape
    
    nh = int(1 + (nh_prev - f)/stride)
    nw = int(1 + (nw_prev - f)/stride)
    nc = nc_prev
    
    pool_m = np.zeros((m, nh, nw, nc))
    
    for i in range(m):
        for h in range(nh):
            for w in range(nw):
                for c in range(nc):
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    
                    pool_slice = a[i, vert_start:vert_end, horiz_start:horiz_end, c]
                    
                    if pool_type == 'avg':
                        pool_m[i, h, w, c] = np.mean(pool_slice)
                    else:
                        pool_m[i, h, w, c] = np.max(pool_slice)
    return pool_m

In [6]:
def convolution_forward(x, weight, b, pad, stride):
    # get dimensions from a and w
    (m, nh_prev, nw_prev, nc_prev) = x.shape
    (f, f, nc_prev, nc) = weight.shape
 
    # calc dimensions of output matrix
    nh = int((nh_prev - f + 2 * pad) / stride) + 1
    nw = int((nw_prev - f + 2 * pad) / stride) + 1
    
    z = np.zeros((m, nh, nw, nc))
    a = np.zeros((m, nh, nw, nc))

    a_pad = np.pad(x, ((0, 0), (pad, pad), (pad, pad), (0, 0)), 'constant', constant_values=0)
    # print(a_pad[0])
    # a_pad = np.pad(a, (pad, pad), 'constant', constant_values=0)

    for i in range(m):
        a_val = a_pad[i]
        for h in range(nh):
            for w in range(nw):
                for c in range(nc):

                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    
                    a_slice = a_val[vert_start:vert_end, horiz_start:horiz_end, :]

                    # convolution
                    s = np.multiply(a_slice, weight[:,:,:,c]) + b[:,:,:,c]
                    z[i, h, w, c] = np.sum(s)
                    a[i, h, w, c] = relu(z[i, h, w, c])
    return a

In [7]:
def full_forward(x, w1, b1, w2, b2):
    z1 = np.dot(w1, x) + b1
    a1 = relu(z1)
    
    z2 = np.dot(w2, a1) + b2
    a2 = sigmoid(z2)
    return a1, a2

In [8]:
def forward(x, y, weights, biases, f_size, pad, p_stride, first_pass):
    (w1, w2, w3, w4) = weights
    (b1, b2, b3, b4) = biases
    (f1, f2) = f_size
    (pad1, pad2) = pad

    (m, temp1, temp2, temp3) = x.shape
    
    a1 = convolution_forward(x, w1, b1, pad1, 2)
    p1 = pool_forward(a1, f2, p_stride, pool_type='avg')
    a2 = convolution_forward(p1, w2, b2, pad2, 2)
    p2 = pool_forward(a2, f2, p_stride, pool_type='max')

    p_shape = (p1.shape, p2.shape)
    a3 = p2.reshape(m, -1)
    
    a4, a5 = full_forward(a3.T, w3, b3, w4, b4)
    y = one_hot_encoding(y, y.shape[0]).T
    cost = loss(a5, y)
        
    a = (a1, a2, a3, a4, a5)
    p = (p1, p2)
    
    if first_pass is True:
        print('Layer 1 Convolution shape {}'.format(a1.shape))
        print('Layer 1 Pool shape {}'.format(p1.shape))
        print('Layer 2 Convolution shape {}'.format(a2.shape))
        print('Layer 2 Pool shape {}'.format(p2.shape))
        print('Layer 3 Flattened shape {}'.format(a3.shape))
        print('Layer 4 Fully connected shape {}'.format(a4.shape))
        print('Layer 5 Fully connected shape {}'.format(a5.shape))
    
    return a, p_shape, p, cost

In [9]:
def gen_mask(a):
    mask = (a == np.max(a))
    return mask

In [28]:
def full_back(a, x, y, w):
#     a1 (108, 1020) a2 (6, 1020) w1 (108, 1296) w2 (6, 108)
# (108, 1020) (6, 1020)

    (w1, w2) = w
    (a1, a2) = a
    print(y.shape)
    m = y.shape[0]
    
    dz2 = a2 - y
    dz = np.dot(w2.T, dz2)
    dw2 = (1.0/m)*np.dot(dz2, a1.T)
    db2 = (1.0/m) * np.sum(dz2, axis=1, keepdims=True)
    
    # dz1 = np.dot(w2.T, dz2)
    dz1 = np.dot(w1.T, dz)
    dz1 = np.multiply(dz1, np.int64(a1 > 0))
    dw1 = (1.0/m) * np.dot(dz1, x)
    db1 = (1.0/m) * np.sum(dz1, axis=1, keepdims=True)

    db = (db1, db2)
    dw = (dw1, dw2)

    print(dz1.shape)

    return dw, db, dz1

In [11]:
def pool_back(da, a, p_shape, f, pool_type):

    m, nh_prev, nw_prev, nc_prev = a.shape
    m, nh, nw, nc = da.shape
        
    a_m = np.zeros(a.shape)
    
    for i in range(m):
        a_val = a[i]
        for h in range(nh):
            for w in range(nw):
                for c in range(nc):
                    vert_start = h 
                    vert_end = vert_start + f
                    horiz_start = w 
                    horiz_end = horiz_start + f

                    if pool_type == 'avg':
                        avg_val = da / (f * f)
                        temp = np.ones(f, f) * avg_val
                        a_m[i, vert_start:vert_end, horiz_start:horiz_end, c] += temp 
                    else:
                        a_slice = a_val[vert_start:vert_end, horiz_start:horiz_end, c]
                        mask = gen_mask(a_slice)
                        a_m[i, vert_start:vert_end, horiz_start:horiz_end, c] += \
                            (mask * da[i, h, w, c])
    
    return a_m

In [12]:
def conv_back(dz, a, weight, pad, stride):
    (m, nh_prev, nw_prev, nc_prev) = a.shape
    (f, f, nc_prev, nc) = weight.shape
    
    (m, nh, nw, nc) = dz.shape
    
    da = np.zeros((m, nh_prev, nw_prev, nc_prev))
    dw = np.zeros((f, f, nc_prev, nc))
    db = np.zeros((1, 1, 1, nc))
    
    a_pad = np.pad(a, ((0, 0), (pad, pad), (pad, pad), (0, 0)), 'constant', constant_values=0)
    da_pad = np.pad(da, ((0, 0), (pad, pad), (pad, pad), (0, 0)), 'constant', constant_values=0)

    for i in range(m):
        a_val = a_pad[i]
        da_val = da_pad[i]
        for h in range(nh):
            for w in range(nw):
                for c in range(nc):
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    
                    a_slice = a_val[vert_start:vert_end, horiz_start:horiz_end, :]
                    da_val[vert_start:vert_end, horiz_start:horiz_end, :] += weight[:,:,:,c] * \
                        dz[i,h,w,c]
                    
                    dw[:,:,:,c] += a_slice * dz[i,h,w,c]
                    db[:,:,:,c] += dz[i,h,w,c]
        if pad != 0:
            da[i,:,:,:] = da_val[pad:-pad, pad:-pad, :]
    return da, dw, db

In [13]:
def backward(x, y, a, p, w, p_shape, pad):
    (a1, a2, a3, a4, a5) = a
    (w1, w2, w3, w4) = w
    (p1, p2) = p
    (pad1, pad2) = pad

    full_a = (a4, a5)
    full_w = (w3, w4)
    dw34, db34, dz3 = full_back(full_a, a3, y, full_w)

    (p1_shape, p2_shape) = p_shape

    (f, f, temp, temp) = w2.shape
    da2 = pool_back(p2, a2, p2_shape, 5, 'max')
    dz2 = np.where(da2 < 0, 0.0, 1.0)
    da2, dw2, db2 = conv_back(dz2, p1, w2, pad2, 2)

    (f, f, temp, temp) = w1.shape
    da1 = pool_back(p1, a1, p1_shape, 5, 'max')
    dz1 = np.where(da1 < 0, 0.0, 1.0)
    da1, dw1, db1 = conv_back(dz1, x, w1, pad1, 2)
    
    (dw3, dw4) = dw34
    (db3, db4) = db34

    dw = (dw1, dw2, dw3, dw4)
    db = (db1, db2, db3, db4)
    return dw, db

In [14]:
def gen_minibatches(batch_size, x, y, seed):
    # np.random.seed(seed)
    m = x.shape[0]
    y = y.reshape(1, -1)
    
    # perm = list(np.random.permutation(m))
    # shuffled_x = x[:, perm]
    # shuffled_y = y[:, perm].reshape((1, m))
    
    num_batches = int(math.floor(m/batch_size))
    
    batches = []
    for i in range(num_batches):
        batch_x = x[i * batch_size:(i + 1) * batch_size, :, :, :]
        batch_y = y[:, i * batch_size:(i + 1) * batch_size]
        batch = (batch_x, batch_y)
        batches.append(batch)
    
    # if m % batch_size != 0:
    #     end = m - batch_size * num_batches
    #     s = num_batches * batch_size
    #     if s == 0:
    #         s = 1
    #     batch_x = x[s, :, :, :]
    #     batch_y = y[:, num_batches * batch_size]
    #     batch = (batch_x, batch_y)
    #     batches.append(batch)

    return batches

In [29]:
start_time = time.time()

# Load data
x = np.load('ex5_train_x.npy')
x = x/255.0
y = np.load('ex5_train_y.npy')

# set the seed
np.random.seed(5)

# Init parameters
f1 = 4
pad1 = 1
stride = 2
p_stride = 1
f2 = 5
pad2 = 0
n1 = 108
n2 = 6

# w1 b1 - conv layer 1
# w2 b2 - conv layer 2
# w3 b3 - fully connected layer 1
# w4 b4 - fully connected layer 2
w1 = np.random.uniform(-1, 1, (f1, f1, 3, 8)) * 0.01
w2 = np.random.uniform(-1, 1, (f1, f1, 8, 16)) * 0.01
w3 = np.random.uniform(-1, 1, (n1, 1296)) * 0.01
w4 = np.random.uniform(-1, 1, (n2, n1)) * 0.01

b1 = np.zeros((1, 1, 1, 8))
b2 = np.zeros((1, 1, 1, 16))
b3 = np.zeros((n1, 1))
b4 = np.zeros((n2, 1))

weights = (w1, w2, w3, w4)
biases = (b1, b2, b3, b4)
filter_size = (f1, f2)

w = (w1, w2, w3, w4)
pad = (pad1, pad2)

epochs = 1
alpha = 0.01

batch_size = 85
num_batches = int(1020/batch_size)
print('Number of batches {} with {} batch size\n'.format(num_batches, batch_size))
seed = 5
costs = []
mb_costs = []
first_pass = True

for epoch in range(epochs):
    batch_cost = 0
    
    seed += 1
    batches = gen_minibatches(batch_size, x, y, seed)
    
    a, p_shape, p, cost = forward(x, y, weights, biases, filter_size, pad, p_stride, first_pass)
    dw, db = backward(x, y, a, p, w, p_shape, pad)
    (dw1, dw2, dw3, dw4) = dw
    (db1, db2, db3, db4) = db
    w1 = w1 - (alpha * dw1)
    w2 = w2 - (alpha * dw2)
    w3 = w3 - (alpha * dw3)
    w4 = w4 - (alpha * dw4)

    b1 = b1 - (alpha * db1)
    b2 = b2 - (alpha * db2)
    b3 = b3 - (alpha * db3)
    b4 = b4 - (alpha * db4)

    weights = (w1, w2, w3, w4)
    biases = (b1, b2, b3, b4)

    # for batch in batches:
    #     (x, y) = batch
    #     a, p_shape, p, cost = forward(x, y, weights, biases, filter_size, pad, p_stride, first_pass)
    #     dw, db = backward(x, y, a, p, w, p_shape, pad)
    #     (dw1, dw2, dw3, dw4) = dw
    #     (db1, db2, db3, db4) = db
    #     w1 = w1 - (alpha * dw1)
    #     w2 = w2 - (alpha * dw2)
    #     w3 = w3 - (alpha * dw3)
    #     w4 = w4 - (alpha * dw4)
    # 
    #     b1 = b1 - (alpha * db1)
    #     b2 = b2 - (alpha * db2)
    #     b3 = b3 - (alpha * db3)
    #     b4 = b4 - (alpha * db4)
    # 
    #     weights = (w1, w2, w3, w4)
    #     biases = (b1, b2, b3, b4)
        # mb_costs.append(cost)
    #     batch_cost += cost
    #     first_pass = False
    # 
    # batch_cost = batch_cost/num_batches
    # costs.append(batch_cost)

# print('\nFinal cost {}\n'.format(costs[-1]))
# plt.plot(costs)
# plt.plot(mb_costs)

end_time = time.time()
elapsed = end_time - start_time

print('Time elapsed {} mins {} secs'.format(int(elapsed/60), int(elapsed%60)))


Number of batches 12 with 85 batch size



Layer 1 Convolution shape (1020, 32, 32, 8)
Layer 1 Pool shape (1020, 28, 28, 8)
Layer 2 Convolution shape (1020, 13, 13, 16)
Layer 2 Pool shape (1020, 9, 9, 16)
Layer 3 Flattened shape (1020, 1296)
Layer 4 Fully connected shape (108, 1020)
Layer 5 Fully connected shape (6, 1020)
(1020,)


ValueError: operands could not be broadcast together with shapes (1296,1020) (108,1020) 