MNIST Number Indentifier

Michael Pham, Fall 2022
Based on math laid out by Samson Zhang

Will only be using numpy for calculations, matplotlib for visualization

In [99]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

import data

In [None]:
data = pd.read_csv('data.csv')

In [None]:
data.head()

splitting data into testing and training set

In [None]:
data_matrix = np.array(data)
np.random.shuffle(data_matrix)
m, n = data_matrix.shape

total_n = m #adjustable
test_n = int(total_n / 3.333)
training_n = total_n - test_n

training_set = data_matrix[0:training_n].T
test_set = data_matrix[training_n :].T

x_train = training_set[1:]/255 #input train
y_train = training_set[0] #target train
x_test = test_set[1:]/255 #input test
y_test = test_set[0] #target test

machine learning model

In [None]:
class Model:
    #initialize parameters
    def __init__(self):
        self.w1 = np.random.rand(10, 784) - 0.5
        self.b1 = np.random.rand(10,1) - 0.5
        self.w2 = np.random.rand(10, 10) - 0.5
        self.b2 = np.random.rand(10,1) - 0.5
    
    
    #ReLu function
    ReLu = lambda self, z: np.maximum(0, z)
    #softmax function
    softmax = lambda self, z: np.exp(z) / sum(np.exp(z))
    #derivative of ReLu function
    derivative_ReLu = lambda self, z: z > 0
    
    
    #forward propagation
    def forward_propagation(self, x):
        self.z1 = self.w1.dot(x) + self.b1
        self.a1 = self.ReLu(self.z1)
        self.z2 = self.w2.dot(self.a1) + self.b2
        self.a2 = self.softmax(self.z2)
        
        return

    
    #encode probabilities into 0-1
    def hot_encode(self, y):
        one_hot_y = np.zeros((y.size, y.max() + 1))
        one_hot_y[np.arange(y.size), y] = 1

        return one_hot_y.T

    
    #backwards propagation
    def backwards_propagation(self, x, y):
        one_hot_y = self.hot_encode(y)
        m = y.size

        self.dz2 = self.a2 - one_hot_y
        self.dw2 = (1/m) * self.dz2.dot(self.a1.T)
        self.db2 = 1/m * np.sum(self.dz2)
        self.dz1 = (self.w2.T).dot(self.dz2) * self.derivative_ReLu(self.z1)
        self.dw1 = 1/m * self.dz1.dot(x.T)
        self.db1 = 1/m * np.sum(self.dz1)
        
        return

    
    #update parameters
    def update_parameters(self,alpha):
        self.w1 = self.w1 - alpha * self.dw1
        self.b1 = self.b1 - alpha * self.db1
        self.w2 = self.w2 - alpha * self.dw2
        self.b2 = self.b2 - alpha * self.db2
        
        return
    
    
    #convert probabilities into prediction
    get_predictions = lambda self: np.argmax(self.a2, 0)
    #calculates how accurate predicted y is vs real y
    get_accuracy = lambda self, predictions, y: np.sum(predictions == y)/y.size

    
    #makes prediction from inputs
    def make_predictions(self, x):
        self.forward_propagation(x)
        predictions = self.get_predictions()
        
        return predictions
    
    
    #plots training error vs test error graph
    def generalization_graph(self, x_graph, train_graph, test_graph):
        plt.plot(x_graph, train_graph, label = "training")
        plt.plot(x_graph, test_graph, label = "test")
        plt.legend()
        plt.show()
    
        return
    
    
    #performs gradient descent
    def gradient_descent(self, x, y, x_test, y_test, iterations, alpha):
        x_graph = []
        train_graph = []
        test_graph = []
        for i in range(iterations):
            self.forward_propagation(x)
            self.backwards_propagation(x, y)
            self.update_parameters(alpha)
            
            predictions = self.get_predictions()
            training_accuracy = self.get_accuracy(predictions, y)
            training_error = 1-training_accuracy
            
            test_predictions = self.make_predictions(x_test)
            testing_accuracy = self.get_accuracy(test_predictions, y_test)
            testing_error = 1-testing_accuracy
                             
            x_graph.append(i)
            train_graph.append(training_error)
            test_graph.append(testing_error)
            
            if i % 50 == 0:
                print("Iteration: ", i)
                print("training set accuracy", training_accuracy)
                print("test set accuracy", testing_accuracy)
                
        self.generalization_graph(x_graph, train_graph, test_graph)
                
        return

In [None]:
model = Model()
model.gradient_descent(x_train, y_train, x_test, y_test, 2000, 0.25)