# Solving the MNIST dataset

In [1]:
import numpy as np
from keras import datasets
from keras import utils

mnist = datasets.mnist



In [2]:
from dense import Dense
from convolution import Convolutional
from reshape import Reshape
from activations import Sigmoid, Tanh
from losses import binary_cross_entropy, binary_cross_entropy_prime

In [3]:
# Data cleaning to perform binary classification
def preprocess_data(x, y, limit):
    zero_index = np.where(y == 0)[0][: limit] # Filter only the indices where y_pred is 0
    one_index = np.where(y == 1)[0][: limit] # Filter only the indices where y_pred is 1
    all_indices = np.hstack((zero_index, one_index)) # Stack the filtered indices
    all_indices = np.random.permutation(all_indices) # Shuffle them to create randomness
    x , y = x[all_indices], y[all_indices]
    x = x.reshape(len(x), 1, 28, 28)
    x = x.astype("float32") / 255
    y = utils.to_categorical(y)
    y = y.reshape(len(y), 2, 1)
    return x, y

In [4]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 100)
x_test, y_test = preprocess_data(x_test, y_test, 100)

In [5]:
# Neural network
network = [
    Convolutional((1,28,28),3, 5),
    Sigmoid(),
    Reshape((5, 26, 26), (5 * 26 * 26, 1)),
    Dense(5 * 26 * 26, 100),
    Sigmoid(),
    Dense(100, 2),
    Sigmoid()
]

In [8]:
epochs = 20
learning_rate = 0.1

# Train the model
for e in range(epochs):
    error = 0
    for (x,y) in zip(x_train, y_train):
        output = x
        for layer in network:
            output = layer.forward(output)
        
        error += binary_cross_entropy(y, output)

        grad = binary_cross_entropy_prime(y, output)
        for layer in reversed(network):
            grad = layer.backward(grad, learning_rate)

    error /= len(x_train)
    print(f"{e + 1} / {epochs}, error = {error}")

1 / 20, error = 0.001031576103289042
2 / 20, error = 0.0010079366852002677
3 / 20, error = 0.0009853308397071994
4 / 20, error = 0.0009636917568577172
5 / 20, error = 0.000942957102496977
6 / 20, error = 0.0009230686798343568
7 / 20, error = 0.0009039721268520125
8 / 20, error = 0.0008856166377672782
9 / 20, error = 0.0008679546989171918
10 / 20, error = 0.0008509418324966409
11 / 20, error = 0.0008345363443824907
12 / 20, error = 0.0008186990741476856
13 / 20, error = 0.0008033931461174612
14 / 20, error = 0.0007885837200137992
15 / 20, error = 0.0007742377385172811
16 / 20, error = 0.0007603236670564822
17 / 20, error = 0.0007468112183541184
18 / 20, error = 0.0007336710507224003
19 / 20, error = 0.0007208744248747529
20 / 20, error = 0.0007083927994277995


In [9]:
# test
for x, y in zip(x_test, y_test):
    output = x
    for layer in network:
        output = layer.forward(output)
    print(f"pred: {np.argmax(output)}, true: {np.argmax(y)}")

pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: