In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
train_data = pd.read_csv('MNIST_DATA/train.csv')
m,n = train_data.shape

In [4]:
test_data = pd.read_csv('MNIST_DATA/test.csv')
test_data.shape

(28000, 784)

In [5]:
train_data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
train_data = np.array(train_data).T
X_train = train_data[1:n]
Y_train = train_data[0]


In [7]:
test_data = np.array(test_data).T
X_test = test_data[1:n]
Y_test = test_data[0]

In [10]:
print(f"{X_train.shape = }")
print(f"{X_test.shape = }")
print(f"{Y_train.shape = }")
print(f"{Y_test.shape = }")

X_train.shape = (784, 42000)
X_test.shape = (783, 28000)
Y_train.shape = (42000,)
Y_test.shape = (28000,)


In [20]:
class NeuralNetwork:
    def __init__(self):
        pass
    
    def init_params(self):
        w1 = np.random.rand(10, 784) - 0.5
        b1 = np.random.rand(10, 1) - 0.5
        w2 = np.random.rand(10, 10) - 0.5
        b2 = np.random.rand(10, 1) - 0.5
        return w1, b1, w2, b2
    
    def ReLU(self, x):
        return np.maximum(0, x)
    
    def softMax(self, x):
        return np.exp(x) / np.sum(np.exp(x))
    
    def forward_prop(self, w1, b1, w2, b2, X):
        z1 = w1.dot(X) + b1
        a1 = self.ReLU(z1)
        z2 = w2.dot(a1) + b2
        a2 = self.softMax(z2)
        
        return z1,a1,z2,a2
    
    def one_hot(self,y):
        one_hot_y = np.zeros((y.size , y.max() + 1))
        one_hot_y[np.arange(y.size) , y] = 1
        one_hot_y = one_hot_y.T
        return one_hot_y
    
    def deriv_ReLU(self,x):
        return x>0
    
    def back_prop(self, z1, a1, z2, a2,w2,x, y):
        m = y.size
        one_hot_y = self.one_hot(y)
        dz2 = a2-one_hot_y
        dw2 = 1/ m * dz2.dot(a1.T)
        db2 = 1 / m * np.sum(dz2)
        
        dz1 = w2.T.dot(dz2) * self.deriv_ReLU(z1)
        dw1 = 1 / m * dz1.dot(x.T)
        db1 = 1 / m * np.sum(dz1)
        
        return dw1,db1,dw2,db2 
    
    def update_params(self, w1,b1,w2,b2,dw1,db1,dw2,db2,alpha):
        w1 -= alpha*dw1
        b1 -= alpha*db1
        w2 -= alpha*dw2
        b2 -= alpha*db2
        return w1,b1,w2,b2  
    
    def get_predictions(self, a2):
        return np.argmax(a2, 0)
    
    def get_accuracy(self, predictions, y):
        print(predictions, y)
        return np.sum(predictions == y) / y.size
    
    def gradient_decent(self,x,y,iterations,alpha):
        w1,b1,w2,b2 = self.init_params()
        for i in range(iterations):
            z1,a1,z2,a2 = self.forward_prop(w1,b1,w2,b2,x)
            dw1,db1,dw2,db2 = self.back_prop(z1,a1,z2,a2,w2,x,y)
            w1,b1,w2,b2 = self.update_params(w1,b1,w2,b2,dw1,db1,dw2,db2,alpha)
            if(i % 10 == 0):
                print("Iteration: ",i)
                predictions = self.get_predictions(a2)
                print("Accuracy: ",self.get_accuracy(predictions,y))
        return w1,b1,w2,b2
    
    def make_predictions(self,w1,b1,w2,b2,x):
        _,_,_,a2 = self.forward_prop(w1,b1,w2,b2,x)
        predictions = self.get_predictions(a2)
        return predictions
    
    def show_training_predictions(self,index,w1,b1,w2,b2,x,y):
        cur_img = x[:,index,None]
        prediction = self.make_predictions(w1,b1,w2,b2,cur_img)
        print("Prediction: ", prediction)
        print("Label: ", y[index])
        cur_img = cur_img.reshape((28,28))
        plt.imshow(cur_img, cmap='gray')
        plt.show()
        
    

In [19]:
model = NeuralNetwork()
w1,b1,w2,b2 = model.gradient_decent(X_train, Y_train, 100, 0.1)


  return np.exp(x) / np.sum(np.exp(x))
  return np.exp(x) / np.sum(np.exp(x))


Iteration:  0
[2 0 0 ... 0 0 0] [1 0 1 ... 7 6 9]
Accuracy:  0.07971428571428571
Iteration:  10
[0 0 0 ... 0 0 0] [1 0 1 ... 7 6 9]
Accuracy:  0.09838095238095237
Iteration:  20
[0 0 0 ... 0 0 0] [1 0 1 ... 7 6 9]
Accuracy:  0.09838095238095237
Iteration:  30
[0 0 0 ... 0 0 0] [1 0 1 ... 7 6 9]
Accuracy:  0.09838095238095237


KeyboardInterrupt: 