In [1]:
import numpy as np
import pandas as pd

TEST_SIZE = 1000
MAX_VALUE = 255 # should be for pixels (grayscale values range from 0-255 and 87 for musical notes (notes range from 0-87))
COL = 784
RESULT_SIZE = 10

df = pd.read_csv('number_data.csv') # can be any of the 4 datasets

In [2]:
data = np.array(df)
np.random.shuffle(data) # randomize order of data
r, c = data.shape

data_dev = data[:TEST_SIZE].T
Y_dev = data_dev[0]
X_dev = data_dev[1:]
X_dev = X_dev / MAX_VALUE

data_train = data[TEST_SIZE:].T
Y_train = data_train[0]
X_train = data_train[1:]
X_train = X_train / MAX_VALUE
_,m_train = X_train.shape

In [3]:
def init_params():
    w1 = np.random.rand(RESULT_SIZE, COL) - 0.5
    c1 = np.random.rand(RESULT_SIZE, 1) - 0.5
    w2 = np.random.rand(RESULT_SIZE, RESULT_SIZE) - 0.5
    c2 = np.random.rand(RESULT_SIZE, 1) - 0.5
    return w1, c1, w2, c2

def ReLU(n): # 1st activation function (x if x>0, 0 else)
    return np.maximum(n, 0)

def softmax(n): # 2nd activation function
    return np.exp(n) / sum(np.exp(n))
    
def for_prop(w1, c1, w2, c2, X): #forward propagation
    L1 = w1.dot(X) + c1
    R1 = ReLU(L1)
    L2 = w2.dot(R1) + c2
    R2 = softmax(L2)
    return L1, R1, L2, R2

def ReLU_deriv(n):
    return n > 0

def res(Y):
    res_Y = np.zeros((Y.size, Y.max() + 1))
    res_Y[np.arange(Y.size), Y] = 1
    res_Y = res_Y.T
    return res_Y

def back_prop(L1, R1, R2, w2, X, Y):
    res_Y = res(Y)
    dL2 = R2 - res_Y
    dw2 = 1 / r * dL2.dot(R1.T)
    dc2 = 1 / r * np.sum(dL2)
    dL1 = w2.T.dot(dL2) * ReLU_deriv(L1)
    dw1 = 1 / r * dL1.dot(X.T)
    dc1 = 1 / r * np.sum(dL1)
    return dw1, dc1, dw2, dc2

def update(w1, c1, w2, c2, dw1, dc1, dw2, dc2, speed):
    w1 = w1 - speed * dw1
    c1 = c1 - speed * dc1    
    w2 = w2 - speed * dw2  
    c2 = c2 - speed * dc2    
    return w1, c1, w2, c2

In [6]:
def predict(R2):
    return np.argmax(R2, 0)

def check(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

def gradient_descent(X, Y, speed, iterations):
    w1, c1, w2, c2 = init_params()
    for i in range(iterations):
        L1, R1, L2, R2 = for_prop(w1, c1, w2, c2, X)
        dw1, dc1, dw2, dc2 = back_prop(L1, R1, R2, w2, X, Y)
        w1, c1, w2, c2 = update(w1, c1, w2, c2, dw1, dc1, dw2, dc2, speed)
        if i % 500 == 0:
            print("Iteration: ", i)
            predictions = predict(R2)
            print(check(predictions, Y))
    return w1, c1, w2, c2

def make_predictions(X, w1, c1, w2, c2):
    _, _, _, A2 = for_prop(w1, c1, w2, c2, X)
    predictions = predict(A2)
    return predictions

def test_prediction(index, w1, c1, w2, c2):
    prediction = make_predictions(X_train[:, index, None], w1, c1, w2, c2)
    label = Y_train[index]
    print("Prediction: ", prediction)
    print("Label: ", label)

In [7]:
w1, c1, w2, c2 = gradient_descent(X_train, Y_train, 0.2, 2000)

Iteration:  0
[5 3 0 ... 4 4 4] [4 7 1 ... 5 4 2]
0.11504878048780488
Iteration:  500
[4 7 1 ... 4 4 2] [4 7 1 ... 5 4 2]
0.8778292682926829
Iteration:  1000
[4 7 1 ... 4 4 2] [4 7 1 ... 5 4 2]
0.8991707317073171
Iteration:  1500
[4 7 1 ... 5 4 2] [4 7 1 ... 5 4 2]
0.9083170731707317


In [8]:
dev_predictions = make_predictions(X_dev, w1, c1, w2, c2)
check(dev_predictions, Y_dev)

[9 2 3 1 4 8 3 2 3 6 2 2 8 2 9 8 3 7 0 3 9 8 4 9 4 9 6 3 2 2 4 5 9 2 4 8 9
 8 3 2 9 5 9 0 4 5 2 3 4 6 2 1 6 8 5 4 5 6 2 0 0 3 9 6 7 9 9 5 6 7 3 9 1 0
 9 6 8 9 8 5 5 7 5 1 6 5 6 5 7 4 3 0 0 0 2 6 6 9 2 9 3 5 7 3 4 3 0 8 4 1 7
 3 5 9 1 6 5 7 5 5 9 3 7 9 8 6 8 2 8 9 4 4 3 0 7 6 8 5 3 0 5 8 6 2 3 6 3 4
 0 1 7 9 2 3 0 4 6 0 1 6 4 4 1 5 1 1 8 7 3 9 9 3 5 7 8 5 8 2 9 9 1 9 8 2 8
 1 0 7 0 7 9 8 3 1 4 0 2 4 1 0 3 4 9 8 5 9 9 6 8 4 6 1 4 3 7 1 4 1 6 3 2 0
 8 4 0 3 0 9 6 0 3 8 5 7 8 1 1 4 8 4 5 9 6 2 1 0 5 0 0 0 9 3 6 3 5 0 1 7 8
 1 4 7 1 9 7 4 8 0 8 2 8 8 6 1 2 9 2 0 7 1 3 8 1 0 4 1 1 7 7 6 6 3 8 9 4 1
 8 6 2 0 9 1 7 1 8 3 4 7 2 3 1 6 3 3 1 6 0 8 4 8 6 9 9 3 7 1 7 1 8 1 9 2 7
 7 0 6 5 0 5 8 9 2 7 5 2 3 7 4 4 0 3 9 7 9 7 9 5 4 6 0 5 8 4 6 9 9 5 4 3 9
 9 3 5 5 2 4 5 0 9 4 5 9 8 3 5 6 2 7 2 8 1 0 7 0 8 6 2 0 6 2 9 5 0 7 0 2 0
 0 3 6 7 2 4 4 0 8 2 4 3 0 8 9 1 9 1 8 8 4 0 1 9 4 2 6 4 4 4 8 4 5 0 4 4 9
 8 4 2 5 4 4 6 9 4 5 8 9 2 9 9 1 6 3 5 9 4 8 9 0 7 1 9 1 9 8 7 1 7 9 7 7 9
 3 7 1 7 2 3 6 9 5 0 6 7 

0.896