In [90]:
import numpy as np
import pandas as pd
import PIL.Image as Image

# Process data

In [91]:
IMAGE_SIZE = (28, 28)
EPOCH = 1000
LEARNING_RATE = 0.00001

# Import data
data = pd.read_csv(r"../digit-recognizer/train.csv")
data_test = pd.read_csv(r"../digit-recognizer/test.csv")    

# Get labels and image array from data
labels : np.ndarray = data.values[:, 0]
images : np.ndarray = data.values[:, 1:].astype('uint8')

images_test : np.ndarray = data_test.values.astype('uint8')

# Reference for backpropagation (from 3b1b neural network video)

![title](image.png)

# <center> SOFTMAX MODEL </center>

In [92]:
class ModelSoftmax:
    def __init__(self, input, hidden, output, lr):
        # Initialize weight and bias
        self.w_input = np.random.normal(size=((input, output)))
        # self.b_input = np.zeros(output)
        # self.w_hidden = np.random.rand(hidden, output)
        # self.b_hidden = np.random.rand(output)
        
        self.lr = lr

    def softmax(self, x : np.ndarray):
        new_x : np.ndarray = np.zeros(10)
        x = np.subtract(x, np.max(x))
        for i in range(10):
            new_x[i] = np.exp(x[i]) / np.sum(np.exp(x), axis=0)
            # print(np.exp(x[i]))
        # print(new_x)
        return new_x 

    def forward(self, x):
        x = np.dot(x, self.w_input)
        x = self.softmax(x)
        
        return x

    def train(self, input: np.ndarray, pred: np.ndarray, label: np.ndarray, debug=False):
        # ============================================
        # Softmax with Cross Entropy Loss
        
        # cost = -sum(label * ln(pred)) = -ln(pred)
        # label array only has one 1 value (the correct label), so technically, -ln(pred) is also correct
        # pred = softmax(out)
        # out = w * input
        # d_cost = -1 / (pred)
        # d_pred = pred * (1 - pred) # From StatQuest
        # d_cost * d_pred = pred[i] - 1
        # self.w_input[j, i] -= step

        d_cost = -1 / pred[np.argmax(label)] # d_cost
        d_pred = np.zeros(len(pred))
        for i, p in enumerate(pred): # d_pred (derivative of softmax)
            # d_softmax/d_y1 = (e**y1 * (e**y2 + e**y3 + ...)) / (e**y1 + e**y2 + e**y3 + ...)**2
            d_pred[i] = np.exp(p) * np.sum(np.exp(pred[pred != p])) / np.sum(np.exp(pred))**2
            
        step = np.outer(input, d_pred)  * d_cost * self.lr
        self.w_input += step

        # Debug
        if (debug):
            print("Cost: ", np.log(pred[np.argmax(label)]))
            # print("Step: ", step)
            # print("d_pred: ", d_pred)
            # print("d_cost: ", d_cost)
            
        # ============================================

# <center> SIGMOID MODEL </center>

In [93]:
class ModelSigmoid:
    def __init__(self, input, hidden, output, lr):
        # Initialize weight and bias
        self.w_input = np.random.normal(size=((input, output)))
        # self.b_input = np.zeros(output)
        # self.w_hidden = np.random.rand(hidden, output)
        # self.b_hidden = np.random.rand(output)
        
        self.lr = lr

    def sigmoid(self, x, derive=False):
        if derive:
            return self.sigmoid(x) * (1-self.sigmoid(x))
        return 1 / (1 + np.exp(-x))

    def forward(self, x):
        x = np.dot(x, self.w_input)
        x = self.sigmoid(x)
        
        return x

    def train(self, input: np.ndarray, pred: np.ndarray, label: np.ndarray, debug=False):
        # ============================================
        # # sigmoid with MSELoss (Mean Squared Error)
        # cost = 1/len(pred) * np.sum(pred-label)**2
        # d_cost = 2/len(pred) * (pred-label)
        
        # pred = sigmoid(z) {a(L)}
        # d_pred = self.sigmoid(z, derive=True)     
        
        # z = weight * input
        # z = np.dot(input, self.w_input)
        
        # Weight input gradient descent
        d_cost = 2/len(pred) * (pred-label)
        d_pred = self.sigmoid(np.dot(input, self.w_input), derive=True)
        step = np.outer(input, d_pred) * d_cost * self.lr
        self.w_input -= step

        # Debug
        if (debug):
            print("Cost: ", np.sum(pred-label)**2/len(pred))
            # print("Step: ", step)
            # print("d_pred: ", d_pred)
            # print("d_cost: ", d_cost)
            
        # ============================================

# Test

In [94]:
model = ModelSoftmax(IMAGE_SIZE[0]*IMAGE_SIZE[1], 16, 10, LEARNING_RATE)
# model = ModelSigmoid(IMAGE_SIZE[0]*IMAGE_SIZE[1], 16, 10, LEARNING_RATE)

for i in range(EPOCH):
    wrong = 0
    for j, d in enumerate(images):
        # High input breaks the neural network
        # Problem such as NaN, inf, etc, because e sucks
        d = d / 255
        
        pred = model.forward(d)
        
        # 0 will also break log e, because e sucks
        # Clip pred so the value only ranges around 1e-7 and 1
        pred = np.clip(pred, 1e-7, 1e7)

        if (np.argmax(pred) != labels[j]):
            wrong += 1
            
        label = np.zeros(10)
        label[labels[j]] = 1
        model.train(d, pred, label)

        if ((j+1) % 2000 == 0):
            # print(model.w_input)
            model.train(d, pred, label, True)
            print(f"Data {j+1}: Wrong = {wrong}, Accuracy: {100-wrong/j*100}%")

    print(f"Epoch: {i+1} --> Wrong: {wrong}, Accuracy: {100-wrong / labels.size * 100}%\n")    

Cost:  3.2860245986052226
Data 2000: Wrong = 1762, Accuracy: 11.855927963981998%
Cost:  2.518142630297416
Data 4000: Wrong = 3533, Accuracy: 11.65291322830707%
Cost:  1.7205573037879134
Data 6000: Wrong = 5319, Accuracy: 11.335222537089521%
Cost:  1.490027120166312
Data 8000: Wrong = 7091, Accuracy: 11.351418927365913%
Cost:  2.6703658406692634
Data 10000: Wrong = 8860, Accuracy: 11.391139113911379%
Cost:  1.6418139012858632
Data 12000: Wrong = 10640, Accuracy: 11.325943828652385%
Cost:  0.9643193167785201
Data 14000: Wrong = 12435, Accuracy: 11.172226587613395%
Cost:  2.4605124839888584
Data 16000: Wrong = 14236, Accuracy: 11.019438714919687%
Cost:  2.0225155988529453
Data 18000: Wrong = 15999, Accuracy: 11.111728429357186%
Cost:  2.3450114262919604
Data 20000: Wrong = 17766, Accuracy: 11.165558277913902%
Cost:  3.6784308265262853
Data 22000: Wrong = 19540, Accuracy: 11.177780808218557%
Cost:  1.050102428605122
Data 24000: Wrong = 21314, Accuracy: 11.187966165256896%
Cost:  2.75368850

KeyboardInterrupt: 

In [None]:
# testing = np.array([
#     [1, 1, 1],
#     [1, 0, 1],
#     [0, 1, 1],
#     [0, 0, 1],
#     [0, 0, 0],
#     [1, 0, 0],
#     [1, 1, 0]
# ])

# label = np.array(
#     [1, 1, 1, 0, 0, 0, 1]
# )

# model = Model(3, 0, 1, LEARNING_RATE)
# for i, d in enumerate(testing):
#     res = float(model.forward(d))
    
#     print(res)
#     model.train(d, res, label[i])