In [16]:
!pip install pandas numpy matplotlib Pillow



In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

In [4]:
data = pd.read_csv("train.csv")

In [7]:
data = np.array(data)
m, n = data.shape
np.random.shuffle(data) # shuffle before splitting into dev and training sets

data_dev = data[0:1000].T
Y_dev = data_dev[0]
X_dev = data_dev[1:n]
X_dev = X_dev / 255.

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255.
_,m_train = X_train.shape

In [8]:
def init_params():
    W1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2

# Activation function
def ReLU(Z):
    return np.maximum(0, Z) #So this function just checks if the number is greater then 0 or less than euqal to 0. and returns the desired maximum.

def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=0, keepdims=True))
    return expZ / np.sum(expZ, axis=0, keepdims=True) 

# Forward propagation function
def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

# One hot encoding logic
def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max()+1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def deriv_ReLU(Z):
    return Z > 0

# Backward propagation function
def backward_prop(Z1, A1, Z2, A2, W2, X, Y):
    m = Y.size
    one_hot_Y = one_hot(Y)
    dZ2 = 2*(A2 - one_hot_Y)
    dW2 = (1 / m) * dZ2.dot(A1.T)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = W2.T.dot(dZ2) * deriv_ReLU(Z1)
    dW1 = (1 / m) * dZ1.dot(X.T)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)

    return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * np.reshape(db1, (10,1))
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * np.reshape(db2, (10,1))

    return W1, b1, W2, b2

In [9]:
def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    # print(predictions, Y)
    return np.sum(predictions == Y) / Y.size
    

def gradient_descent(X, Y, iterations, alpha):
    W1, b1, W2, b2 = init_params()
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        loss = -np.mean(Y * np.log(A2 + 1e-8))  # Cross-entropy loss
        accuracy = get_accuracy(get_predictions(A2), Y)

        if i % 100 == 0:
            print(f"Iteration {i} - Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")
    return W1, b1, W2, b2

In [20]:
def preprocess_image(image_path):
    image = Image.open(image_path)
    image = image.convert('L')
    image = image.resize((28, 28))
    image_array = np.array(image)
    image_array = image_array / 255.0
    image_array = np.expand_dims(image_array, axis=0)
    
    return image_array

img_arr = preprocess_image("rsz_image_editor.png")
img_arr

array([[[0.        , 0.        , 0.        , 0.        , 0.        ,
         0.00784314, 0.0745098 , 0.05490196, 0.03137255, 0.36470588,
         0.55686275, 0.55686275, 0.64705882, 0.61176471, 0.6       ,
         0.49803922, 0.23529412, 0.01568627, 0.0745098 , 0.06666667,
         0.07058824, 0.06666667, 0.00784314, 0.        , 0.        ,
         0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         0.01568627, 0.04705882, 0.09411765, 0.6       , 0.4627451 ,
         0.14509804, 0.03921569, 0.03529412, 0.05490196, 0.13333333,
         0.27843137, 0.5372549 , 0.37254902, 0.03137255, 0.0745098 ,
         0.07058824, 0.06666667, 0.00784314, 0.        , 0.        ,
         0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         0.00392157, 0.05882353, 0.61960784, 0.30588235, 0.        ,
         0.05098039, 0.07843137, 0.0745098 , 0.07058824, 0.05490196,
         0.

In [23]:
def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions

def test_prediction(index, W1, b1, W2, b2):
    current_image = X_train[:, index, None]
    prediction = make_predictions(img_arr[:, index, None], W1, b1, W2, b2)
    label = Y_train[index]
    print("Prediction: ", prediction)
    print("Label: ", label)
    
    current_image = current_image.reshape((28, 28)) * 255
    plt.gray()
    plt.imshow(current_image, interpolation='nearest')
    plt.show()

In [24]:
W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 10000, 0.01)

Iteration 0 - Loss: 14.5017, Accuracy: 0.1163
Iteration 100 - Loss: 11.0290, Accuracy: 0.2665
Iteration 200 - Loss: 11.5441, Accuracy: 0.3644
Iteration 300 - Loss: 12.3170, Accuracy: 0.4182
Iteration 400 - Loss: 13.1963, Accuracy: 0.4598
Iteration 500 - Loss: 14.2461, Accuracy: 0.5020
Iteration 600 - Loss: 15.5039, Accuracy: 0.5500
Iteration 700 - Loss: 16.8683, Accuracy: 0.5904
Iteration 800 - Loss: 18.2594, Accuracy: 0.6284
Iteration 900 - Loss: 19.6204, Accuracy: 0.6572
Iteration 1000 - Loss: 20.9082, Accuracy: 0.6813
Iteration 1100 - Loss: 22.0944, Accuracy: 0.7025
Iteration 1200 - Loss: 23.1716, Accuracy: 0.7217
Iteration 1300 - Loss: 24.1525, Accuracy: 0.7370
Iteration 1400 - Loss: 25.0605, Accuracy: 0.7523
Iteration 1500 - Loss: 25.9112, Accuracy: 0.7652
Iteration 1600 - Loss: 26.7057, Accuracy: 0.7776
Iteration 1700 - Loss: 27.4375, Accuracy: 0.7888
Iteration 1800 - Loss: 28.1004, Accuracy: 0.7975
Iteration 1900 - Loss: 28.6989, Accuracy: 0.8055
Iteration 2000 - Loss: 29.2391, 

In [22]:
test_prediction(7, W1, b1, W2, b2)
test_prediction(4, W1, b1, W2, b2)
test_prediction(5, W1, b1, W2, b2)
test_prediction(8, W1, b1, W2, b2)

ValueError: shapes (10,784) and (1,1,28) not aligned: 784 (dim 1) != 1 (dim 1)