# 3-layer Neural Network for Classification 
without the deep learning framework (only python)

## 0.  Import dependency package

In [1]:
import numpy as np
import gzip
from PIL import Image
from matplotlib import pyplot as plt
%matplotlib inline

## 1. Load data
The following functions are retrieved from https://stackoverflow.com/questions/40427435/extract-images-from-idx3-ubyte-file-or-gzip-via-python

In [2]:
def training_images():
    with gzip.open('data/train-images-idx3-ubyte.gz', 'r') as f:
        # first 4 bytes is a magic number
        magic_number = int.from_bytes(f.read(4), 'big')
        # second 4 bytes is the number of images
        image_count = int.from_bytes(f.read(4), 'big')
        # third 4 bytes is the row count
        row_count = int.from_bytes(f.read(4), 'big')
        # fourth 4 bytes is the column count
        column_count = int.from_bytes(f.read(4), 'big')
        # rest is the image pixel data, each pixel is stored as an unsigned byte
        # pixel values are 0 to 255
        image_data = f.read()
        images = np.frombuffer(image_data, dtype=np.uint8)\
            .reshape((image_count, row_count, column_count))
        return images


def training_labels():
    with gzip.open('data/train-labels-idx1-ubyte.gz', 'r') as f:
        # first 4 bytes is a magic number
        magic_number = int.from_bytes(f.read(4), 'big')
        # second 4 bytes is the number of labels
        label_count = int.from_bytes(f.read(4), 'big')
        # rest is the label data, each label is stored as unsigned byte
        # label values are 0 to 9
        label_data = f.read()
        labels = np.frombuffer(label_data, dtype=np.uint8)
        return labels

In [3]:
X_t = training_images()
Y_t = training_labels()

X_t = X_t.reshape(-1,1,784)

# normalization
X_t = X_t / 255

In [4]:
print(X_t.shape)

(60000, 1, 784)


## 2. Generate Neural Network

### 2.0 Activation Function

In [5]:
class ReLU:
    def prop(self, X):
        return np.maximum(0, X)
    
    def derivative(self, X):
        result = np.copy(X)
        result[result >= 0] = 1
        result[result < 0] = 0
        return result

class LeakyReLU:
    def __init__(self, hyper):
        self.hyper = hyper
        
    def prop(self, X):
        return np.maximum(self.hyper*X, X)
    
    def derivative(self, X):
        result = np.copy(X)
        result[result >= 0] = 1
        result[result < 0] = self.hyper
        return result

### 2.1 Layer_Dense

In [6]:
class Layer_dense:
    def __init__(self, input_size, output_size, activation):
        self.W = np.random.normal(size=(input_size, output_size))
        self.b = np.random.normal(size=(1,output_size))
        self.L = None
        self.activation_func = activation()
    
    def prop(self, X):
        self.L = np.dot(X, self.W) + self.b
        return self.activation_func.prop(self.L)
    
    def derivative(self, X):
        return np.array([ np.multiply(self.W, v) for v in self.activation_func.derivative(X) ])
    
    def activation_derivative(self):
        return self.activation_func.derivative(self.L)
    
    def update(self, learning_rate, dW, db):
        self.W -= learning_rate * dW
        self.b -= learning_rate * db

### 2.2 Softmax

In [7]:
class Softmax:
    def prop(self, X):
        return np.array([ np.exp(x)/np.sum(np.exp(x)) for x in X ])
    
    def derivative(self, Y_hat, Y):
        "I will use this function for derivative"
        result = np.copy(Y_hat)
        result[range(result.shape[0]),0,Y] -= 1
        return result 
    
    def _derivative(self, X):
        """I will not use this function, just implementation of partial derivative"""
        result = np.zeros((*X.shape,X.shape[-1]))
        S = Softmax.prop(X)
        for k, V in enumerate(X):
            for i,x1 in enumerate(*V):
                for j,x2 in enumerate(*V):
                    if i==j:
                        result[k,0,i,j] = S[k,0,i] * (1 - S[k,0,j])
                    else:
                        result[k,0,i,j] = S[k,0,i] * S[k,0,j] * -1
        return result

### 2.3 Multi-Class Cross entropy function

In [8]:
class Cross_entropy:
    def prop(self, Y_hat, Y):
        return -1 * np.log(np.fromiter([ y_hat[:,y] for y_hat,y in zip(Y_hat,Y) ], float))
    

    def derivative(self, Y_hat, Y):
        "I will not use this function, just implementation of derivative"
        result = np.zeros(Y_hat.shape)
        result[range(Y_hat.shape[0]),0,Y] = -1 * (1/Y_hat[range(Y_hat.shape[0]),0,Y])
        return result

### 2.4 Neural Network

In [9]:
class NN:
    def __init__(self, X_t, Y_t, loss, learning_rate, batch_size):
        self.X_t = X_t
        self.Y_t = Y_t
        
        self.loss_func = loss
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        
        self.layers = []
        self.inputs = []

        
    def add(self, layer):
        self.layers.append(layer)
    
    def prop(self, X):
        result = X
        self.inputs = [X]
        for layer in self.layers:
            result = layer.prop(result)
            self.inputs.append(result)
            
        return result
    
    def backprop(self, Y):
        dZ = self.layers[-1].derivative(self.inputs[-1], Y) # softmax+cost 
        for i,layer in enumerate(reversed(self.layers),3):
            dW = np.multiply(np.dot(self.inputs[-i].T,layer.activation_derivative()), dZ) 
            db = np.sum(dZ, axis=1, keepdims = True)
            dZ = np.dot(layer.derivative(self.inputs[-i]), dZ)
            layer.update(self.learning_rate, dW, db)
            
    
    def fit(self):
        pass
    
    def predict(self, X):
        for layer in layers:
            X = layer(X)

In [10]:
nn = NN(X_t, Y_t, Cross_entropy(), 0.001, 64)
nn.add(Layer_dense(784,64,ReLU))
nn.add(Layer_dense(64,10,ReLU))
nn.add(Softmax())
nn.prop(X_t[:64])
print("done")

done


In [11]:
nn.backprop(Y_t[:64])

AttributeError: 'Softmax' object has no attribute 'backprop'

### Trivial

In [None]:
layer0 = Layer_dense(28*28,64,ReLU)
layer1 = Layer_dense(64,10,ReLU)

batch_x = X_t[0:64]

output0 = layer0.prop(batch_x)
output1 = layer1.prop(output0)
output2 = Softmax().prop(output1)
output3 = Cross_entropy().prop(output2, Y_t[0:64])

#             if isinstance(layer, Layer_dense):
#                 print("nn-bakcprop-layer", i, self.outputs[-i].shape)
#                 layer.update(0.001, delta, self.outputs[-i])
#                 delta = np.array(layer.backprop(self.outputs[-i]) * delta)
#                 continue
#             print("nn-backprop", i, layer.backprop(self.outputs[-i]).shape)
#             print("nn-backprop-delta",i , delta.shape)
#             delta = np.array([prev_d*next_d for next_d, prev_d in zip(layer.backprop(self.outputs[-i]), delta)])