In [260]:
import pandas as pd
import numpy as np
from mlxtend.data import loadlocal_mnist


alpha = 0.05

dbg = False


In [261]:
def read_mnist():
    x, y = loadlocal_mnist(images_path='MNIST/train-images.idx3-ubyte',
            labels_path='MNIST/train-labels.idx1-ubyte')
    return x, y


In [262]:
"""
z = wx + b
"""
class FullyConnected:
    def __init__(self, out_dim):
        self.out_dim = out_dim
        self.a = None
        self.w = None
        self.b = None
        self.z = None
        self.dw = None
        self.db = None

    def forward(self, a):
        if self.w is None or self.b is None:
            self.w = np.random.random((self.out_dim, a.shape[0]))*0.001
            self.b = np.zeros((self.out_dim, 1))

        self.a = a
        self.z = np.dot(self.w, a) + self.b

        if dbg:
            print("fc_forward: ")
            print(self.z.shape)

        return self.z

    def backward(self, dz):
        m = dz.shape[1]
        self.dw = np.matmul(dz, self.a.T)/m
        self.db = np.sum(dz, axis=1, keepdims=True)/m
        self.w = self.w - alpha*self.dw
        self.b = self.b - alpha*self.db
        da = np.matmul(self.w.T, dz)
        return da


In [263]:
"""
a = ReLU(z)
"""
class ReLU:
    def __init__(self):
        self.z = None
        self.a = None

    @staticmethod
    def relu(z):
        r = np.maximum(0, z)
        return r

    @staticmethod
    def relu_derivative(z):
        dz = np.array(z, copy=True)
        dz[dz<=0] = 0
        dz[dz>0] = 1
        return dz

    def forward(self, z):
        self.z = z
        self.a = self.relu(z)
        if dbg:
            print("relu_forward: ")
            print(self.a.shape)
        return self.a

    def backward(self, da):
        dz = np.multiply(da, self.relu_derivative(self.z))
        return dz



In [264]:
def cross_entropy(y_hat, y):
    m = y.shape[1]
    logs = np.multiply(np.log(y_hat),y)
    cost = - np.sum(logs) / m
    return cost

"""
y_hat = e^z/sum(e^x)
"""
class SoftMax:
    def __init__(self):
        self.out_dim = None
        self.z = None
        self.y_hat = None

    def forward(self, z):
        if self.out_dim is None:
            self.out_dim = z.shape[0]
        self.z = z
        self.y_hat = np.exp(z) / np.sum(np.exp(z), axis=0)
        if dbg:
            print("soft_forward: ")
            print(self.y_hat.shape)
        return self.y_hat

    def backward(self, y):
        dz = self.y_hat - y
        return dz

In [265]:
class Convolutional:
    def __init__(self, number_of_filters, filter_dim, stride=1, padding = 0):
        self.number_of_filters = number_of_filters
        self.filter_dim = filter_dim
        self.stride = stride
        self.padding = padding
        self.w = None
        self.b = None
        self.a_prev = None

    def zero_pad(self, x, pad):
        x_pad = np.pad(x, ((0,0), (pad, pad), (pad, pad), (0,0)), mode='constant', constant_values = (0,0))
        return x_pad

    def conv_single_step(self, a_slice_prev, w, b):
        s = np.multiply(a_slice_prev, w)
        z = np.sum(s)
        z = z + float(b)
        return z


    def forward(self, a_prev):
        self.a_prev = a_prev
        (m, n_H_prev, n_W_prev, n_C_prev) = a_prev.shape[0], a_prev.shape[1], a_prev.shape[2], a_prev.shape[3]
        if self.w is None:
            self.w = np.random.random((self.filter_dim, self.filter_dim, n_C_prev, self.number_of_filters))*0.001
            self.b = np.zeros((1, 1, 1, self.number_of_filters))

        (f_H, f_W, n_C_prev, n_C) = self.w.shape[0], self.w.shape[1], self.w.shape[2], self.w.shape[3]

        stride = self.stride
        pad = self.padding

        n_H = int(int(n_H_prev + 2*pad - f_H)/stride + 1)
        n_W = int(int(n_W_prev + 2*pad - f_W)/stride + 1)

        z = np.zeros([m, n_H, n_W, n_C])
        a_prev_pad = self.zero_pad(a_prev, pad)

        for i in range(m):
            a_prev_pad_cur = a_prev_pad[i]
            for h in range(n_H):
                vert_start = stride * h
                vert_end = vert_start + f_H
                for w in range(n_W):
                    horiz_start = stride * w
                    horiz_end = horiz_start + f_W
                    for c in range(n_C):
                        a_slice_prev = a_prev_pad_cur[ vert_start:vert_end, horiz_start:horiz_end, :]

                        weights = self.w[:, :, :, c]
                        biases = self.b[:, :, :, c]
                        z[i, h, w, c] = self.conv_single_step(a_slice_prev, weights, biases)

        assert(z.shape == (m, n_H, n_W, n_C))
        if dbg:
            print("conv_forward: ")
            print(z.shape)
        return z

    def backward(self, dz):
        (m, n_H_prev, n_W_prev, n_C_prev) = self.a_prev.shape
        (f, f, n_C_prev, n_C) = self.w.shape
        stride = self.stride
        pad = self.padding
        (m, n_H, n_W, n_C) = dz.shape
        da_prev = np.zeros(self.a_prev.shape)
        dw = np.zeros(self.w.shape)
        db = np.zeros(self.b.shape)
        a_prev_pad = self.zero_pad(self.a_prev, pad)
        da_prev_pad = self.zero_pad(da_prev, pad)

        for i in range(m):
            a_prev_pad_cur = a_prev_pad[i]
            da_prev_pad_cur = da_prev_pad[i]

            for h in range(n_H):
                for w in range(n_W):
                    for c in range(n_C):
                        vert_start = stride * h
                        vert_end = vert_start + f
                        horiz_start = stride * w
                        horiz_end = horiz_start + f

                        a_slice = a_prev_pad_cur[vert_start:vert_end,horiz_start:horiz_end,:]

                        da_prev_pad_cur[vert_start:vert_end, horiz_start:horiz_end, :] += self.w[:,:,:,c] * dz[i, h, w, c]
                        dw[:,:,:,c] += a_slice * dz[i, h, w, c]
                        db[:,:,:,c] += dz[i, h, w, c]

            if pad > 0:
                da_prev[i, :, :, :] = da_prev_pad_cur[pad:-pad, pad:-pad, :]
            else:
                da_prev[i, :, :, :] = da_prev_pad_cur[:, :, :]



        assert(da_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))

        self.w = self.w - alpha*dw
        self.b = self.b - alpha*db

        return da_prev



In [266]:
class MaxPool:
    def __init__(self, filter_dim, stride):
        self.filter_dim = filter_dim
        self.stride = stride
        self.a_prev = None

    def forward(self, a_prev):
        self.a_prev = a_prev
        (m, n_H_prev, n_W_prev, n_C_prev) = a_prev.shape

        f = self.filter_dim
        stride = self.stride

        n_H = int(1 + (n_H_prev - f) / stride)
        n_W = int(1 + (n_W_prev - f) / stride)
        n_C = n_C_prev

        a = np.zeros((m, n_H, n_W, n_C))

        for i in range(m):
            for h in range(n_H):
                vert_start = stride * h
                vert_end = vert_start + f
                for w in range(n_W):
                    horiz_start = stride * w
                    horiz_end = horiz_start + f
                    for c in range (n_C):
                        a_prev_slice = a_prev[i]
                        a[i, h, w, c] = np.max(a_prev_slice[vert_start:vert_end, horiz_start:horiz_end, c])

        assert(a.shape == (m, n_H, n_W, n_C))
        if dbg:
            print("pool_forward: ")
            print(a.shape)
        return a

    def create_mask_from_window(self, x):
        mask = (x == np.max(x))
        return mask

    def backward(self, da):
        stride = self.stride
        f = self.filter_dim

        m, n_H_prev, n_W_prev, n_C_prev = self.a_prev.shape
        m, n_H, n_W, n_C = da.shape

        da_prev = np.zeros(self.a_prev.shape)

        for i in range(m):
            a_prev = self.a_prev[i,:,:,:]

            for h in range(n_H):
                for w in range(n_W):
                    for c in range(n_C):
                        vert_start  = h * stride
                        vert_end    = h * stride + f
                        horiz_start = w * stride
                        horiz_end   = w * stride + f

                        a_prev_slice = a_prev[ vert_start:vert_end, horiz_start:horiz_end, c ]
                        mask = self.create_mask_from_window( a_prev_slice )
                        da_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += mask * da[i, h, w, c]

        assert(da_prev.shape == self.a_prev.shape)
        return da_prev

In [267]:
class Flattening:
    def __init__(self):
        self.a_prev = None

    def forward(self, a_prev):
        self.a_prev = a_prev
        m = a_prev.shape[0]
        a = list()
        for i in range(m):
            a.append(np.ravel(a_prev[i,:,:,:]))
        a = np.array(a)
        a = a.T
        if dbg:
            print("flatten_forward: ")
            print(a.shape)
        return a

    def backward(self, da):
        da_prev = da.reshape(self.a_prev.shape)
        return da_prev

In [268]:
def modify_label(y):
    out = np.zeros((10, y.shape[0]))
    for i in range(y.shape[0]):
        out[y[i,0], i] = 1
    return out


def run_mnist():
    f1 = open("architecture.txt", "r")
    lines = f1.readlines()
    cnn_layers = list()
    for line in lines:
        words = line.strip().split()
        if words[0].lower() == "fc":
            cnn_layers.append(FullyConnected(int(words[1])))
        elif words[0].lower() == "relu":
            cnn_layers.append(ReLU())
        elif words[0].lower() == "softmax":
            cnn_layers.append(SoftMax())
        elif words[0].lower() == "conv":
            cnn_layers.append(Convolutional( int(words[1]), int(words[2]), int(words[3]), int(words[4])))
        elif words[0].lower() == "pool":
            cnn_layers.append(MaxPool(int(words[1]), int(words[2])))
        elif words[0].lower() == "flatten":
            cnn_layers.append(Flattening())

    f1.close()

    itr = 10

    x_mnist_train, y_mnist_train = read_mnist()
    batch_sz = 50
    for i in range(0,x_mnist_train.shape[0],batch_sz):
        curr_batch_x = x_mnist_train[i:i+batch_sz,:]
        curr_batch_y = y_mnist_train[i:i+batch_sz]

        curr_batch_x = curr_batch_x.reshape((batch_sz, 28, 28, 1))
        curr_batch_y = curr_batch_y.reshape(batch_sz, 1)

        curr_batch_y = modify_label(curr_batch_y)

        prev_a = curr_batch_x
        for layer in cnn_layers:
            prev_a = layer.forward(prev_a)

        prev_derivative = curr_batch_y
        for j in range(len(cnn_layers)-1,0,-1):
            prev_derivative = cnn_layers[j].backward(prev_derivative)

        print(cross_entropy(prev_a, curr_batch_y))

        itr -= 1
        if itr <= 0:
            break


run_mnist()

2.3025873110561657
2.3025800238825314
2.30224424416095
2.3030525896673493
2.3020605230545335
2.302508413560533
2.3030388871191314
2.3014207442251395
2.3023827387116778
2.301717831398491


In [269]:
# def read_data(file_path):
#     df = pd.read_csv(file_path, delim_whitespace=True, header=None)
#     num_features = df.shape[1] - 1
#     df = pd.get_dummies(df, columns=[4], drop_first=False)
#     train_dataset = df.to_numpy()
#     x_train = train_dataset[:,:num_features]
#     y_train = train_dataset[:,num_features:]
#
#     x_train = x_train.T
#     y_train = y_train.T
#
#     # print(x_train.shape)
#     # print(y_train.shape)
#     return x_train, y_train


In [270]:
# def modify_level(y_hat):
#     for j in range(y_hat.shape[1]):
#         mx = -10
#         mx_idx = -1
#         for i in range(y_hat.shape[0]):
#             if y_hat[i,j] > mx:
#                 mx = y_hat[i,j]
#                 mx_idx = i
#             y_hat[i][j] = 0
#         y_hat[mx_idx, j] = 1
#
#     return y_hat
#
# def calc_accuracy(y_hat, y):
#     match = 0
#     for j in range(y_hat.shape[1]):
#         flag = 0
#         for i in range(y_hat.shape[0]):
#             if y_hat[i,j] != y[i,j]:
#                 flag = 1
#                 break
#         if flag == 0:
#             match += 1
#
#     print("accuracy: " + str(match/y_hat.shape[1]))
#
#
#
#
# def run_cnn():
#     f1 = open("architecture.txt", "r")
#     lines = f1.readlines()
#     cnn_layers = list()
#     for line in lines:
#         words = line.strip().split()
#         if words[0].lower() == "fc":
#             cnn_layers.append(FullyConnected(int(words[1])))
#         elif words[0].lower() == "relu":
#             cnn_layers.append(ReLU())
#         elif words[0].lower() == "softmax":
#             cnn_layers.append(SoftMax())
#
#     f1.close()
#
#     x, y = read_data("Toy Dataset/trainNN.txt")
#
#     for itr in range(itr_limit):
#         prev_a = x
#         for layer in cnn_layers:
#             prev_a = layer.forward(prev_a)
#
#         prev_derivative = y
#         for i in range(len(cnn_layers)-1,0,-1):
#             prev_derivative = cnn_layers[i].backward(prev_derivative)
#
#         # if itr % 500 == 0:
#         #     print(cross_entropy(prev_a, y))
#
#     # prev_a = modify_level(prev_a)
#     # calc_accuracy(prev_a, y)
#
#     x_test, y_test = read_data("Toy Dataset/testNN.txt")
#
#     prev_a = x_test
#     for itr in range(itr_limit):
#         prev_a = x
#         for layer in cnn_layers:
#             prev_a = layer.forward(prev_a)
#
#     prev_a = modify_level(prev_a)
#     calc_accuracy(prev_a, y_test)
#

In [271]:
# """
# run cnn
# """
# # run_cnn()