# Solving the MNIST dataset

In [1]:
import numpy as np
from keras import datasets
from keras import utils

mnist = datasets.mnist



In [2]:
from dense import Dense
from convolution import Convolutional
from reshape import Reshape
from activations import Sigmoid, Tanh
from losses import binary_cross_entropy, binary_cross_entropy_prime
from network import train, predict

In [3]:
# Data cleaning to perform binary classification
def preprocess_data(x, y, limit):
    zero_index = np.where(y == 0)[0][: limit] # Filter only the indices where y_pred is 0
    one_index = np.where(y == 1)[0][: limit] # Filter only the indices where y_pred is 1
    all_indices = np.hstack((zero_index, one_index)) # Stack the filtered indices
    all_indices = np.random.permutation(all_indices) # Shuffle them to create randomness
    x , y = x[all_indices], y[all_indices]
    x = x.reshape(len(x), 1, 28, 28)
    x = x.astype("float32") / 255
    y = utils.to_categorical(y)
    y = y.reshape(len(y), 2, 1)
    return x, y

In [4]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 100)
x_test, y_test = preprocess_data(x_test, y_test, 100)

In [5]:
# Neural network
network = [
    Convolutional((1,28,28),3, 5),
    Sigmoid(),
    Reshape((5, 26, 26), (5 * 26 * 26, 1)),
    Dense(5 * 26 * 26, 100),
    Sigmoid(),
    Dense(100, 2),
    Sigmoid()
]

In [6]:
epochs = 20
learning_rate = 0.1

train(
    network, 
    x_train, 
    y_train, 
    binary_cross_entropy, 
    binary_cross_entropy_prime, 
    learning_rate, 
    epochs, 
    True
)

1 / 20, error = 0.4769455872113758
2 / 20, error = 0.1482396323793187
3 / 20, error = 0.0772686147757989
4 / 20, error = 0.052069877378219456
5 / 20, error = 0.04728558671185413
6 / 20, error = 0.028024710941446557
7 / 20, error = 0.021312234265610016
8 / 20, error = 0.02197499551970031
9 / 20, error = 0.029368501919644813
10 / 20, error = 0.01744519124616011
11 / 20, error = 0.007699499929660772
12 / 20, error = 0.006115449699259593
13 / 20, error = 0.005354676263101114
14 / 20, error = 0.0047408163591131005
15 / 20, error = 0.0041831803719711074
16 / 20, error = 0.003744228310827038
17 / 20, error = 0.003444052498805794
18 / 20, error = 0.003230171745758417
19 / 20, error = 0.0030630879697438225
20 / 20, error = 0.0029229889345982057


In [7]:
# test
for x, y in zip(x_test, y_test):
    output = predict(network, x)
    print(f"pred: {np.argmax(output)}, true: {np.argmax(y)}")

pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: