<a href="https://colab.research.google.com/github/localhersheys/neural-network-from-scratch-on-python/blob/main/mnist_nn_001.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'mnist-in-csv:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F27352%2F34877%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240602%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240602T101045Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D66d0e51cd43ffa362404acc436da188c407a78cbd79463e0b05da0f202adae70f29eb926db856bd13241e1de8ac05ba5c7caec50f32bf4b2588ac09cb411edb09566a51ed0583caf0888bae9a9cb7bbb115557adca742eab39497e773cb8f71d826d3e88d177f6c5bab35ac4ac3ad737bed8d7bbc9e39c0886317146326168afd456b16a4d0e3815c169b248dd32d281f30d8d81007038b3a826d47e1678cb7bc72416d3a110e1ccbf4519f657cc83abd2521f643fcd5fbb29d4e8f179438663cb7ac1b215ae98a8dde5bba4ec4aa253b0c7e81d12edd8d8b49bb717c4240ce8eaa6abae90d63b274b26a8a4e9b01bb34e4248bae3bb1d3979ec57d6bf286166'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
import numpy as np #for dealing with matrices
import pandas as pd #for importing the data

#importing testing and training data
data_train = pd.read_csv('/kaggle/input/mnist-in-csv/mnist_train.csv')
data_test = pd.read_csv('/kaggle/input/mnist-in-csv/mnist_test.csv')

data_train

In [None]:
#converting pandas table to numpy array
data_train = np.array( data_train ).T
data_test = np.array( data_test ).T

data_train

In [None]:
#dividing data into labels and pixels
Y_train = data_train[0]
X_train = data_train[1:]

Y_test = data_test[0]
X_test = data_test[1:]

X_train.shape

In [8]:
#normalizing the data
X_train = X_train / 255
X_test = X_test / 255

In [9]:
#shape = no. of rows , no. of columns
n, m = X_train.shape

In [10]:
#initialize parameters
def init_params():
    W1 = np.random.rand(10 , 784) - 0.5
    b1 = np.random.rand(10 , 1) - 0.5
    W2 = np.random.rand(10 , 10) - 0.5
    b2 = np.random.rand(10 , 1) - 0.5
    return W1, b1, W2, b2

#defining activation functions ReLU and softmax
def ReLU(X):
    return np.maximum(0 , X)

def softmax(X):
    return np.exp(X)/ sum(np.exp(X))

#calculating value of each layer given the weights and biases
def fwd_prop(X, W1, b1, W2, b2):
    A1 = W1.dot(X) + b1
    Z1 = ReLU(A1)
    A2 = W2.dot(Z1) + b2
    Z2 = softmax(A2)
    return A1, Z1, A2, Z2

#defining a function for one hot encoding of Y
def one_hot_encode(Y, Z2):
    one_hot_Y = np.zeros(Z2.shape).T
    one_hot_Y[np.arange(Y.size),Y] = 1
    return one_hot_Y.T

#defining function for derivative of ReLU
def deriv(X):
    return X>0

#finding the amount of change we need to introduce to the weights and biases
def bwd_prop(X, Y, Z2, Z1, A1, W2):
    m = Y.size
    dZ2 = Z2 - one_hot_encode(Y,Z2)
    dW2 = 1/m * dZ2.dot(Z1.T)
    db2 = 1/m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2)*deriv(A1)
    dW1 = 1/m *dZ1.dot(X.T)
    db1 = 1/m * np.sum(dZ1)
    return dW1, db1, dW2, db2

#updating the weights and biases
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    return W1, b1, W2, b2

In [11]:
#getting the predictions given the final layer
def get_predictions(Z2):
    return np.argmax(Z2, 0)

#get accuracy given the prediction and label
def get_accuracy(prediction , Y):
    return sum( prediction == Y ) / Y.size

In [14]:
#function for running the model given X, Y, learning rate (alpha) and iteration
def train(X, Y, alpha, iterations):
    #initializing weights and biases
    W1, b1, W2, b2 = init_params()
    for i in range(iterations+1):
        #finding values of each layer
        A1, Z1, A2, Z2 = fwd_prop(X, W1, b1, W2, b2)
        #finding the amount of change we need to introduce to the weights and biases
        dW1, db1, dW2, db2 = bwd_prop(X, Y, Z2, Z1, A1, W2)
        #updating the parameters according to the values found above
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)

        #print the accuracy if iteration number is a multiple of 100
        if i%100 == 0 :
            print("iteration :" , i)
            print("accuracy : " , get_accuracy(get_predictions(Z2) , Y))
    return W1, b1, W2, b2

In [15]:
W1, b1, W2, b2 = train(X_train , Y_train , 0.1 , 500 )

iteration : 0
accuracy :  0.09378333333333333
iteration : 100
accuracy :  0.68545
iteration : 200
accuracy :  0.7862833333333333
iteration : 300
accuracy :  0.8240333333333333
iteration : 400
accuracy :  0.84415
iteration : 500
accuracy :  0.8571


In [16]:
A1_test, Z1_test, A2_test, Z2_test = fwd_prop(X_test, W1, b1, W2, b2)
test_predictions = get_predictions(Z2_test)
get_accuracy(test_predictions, Y_test)

0.8593