In [4]:
pip install mnist

Collecting mnist
  Downloading https://files.pythonhosted.org/packages/c6/c4/5db3bfe009f8d71f1d532bbadbd0ec203764bba3a469e4703a889db8e5e0/mnist-0.2.2-py2.py3-none-any.whl
Installing collected packages: mnist
Successfully installed mnist-0.2.2


In [5]:
import mnist
import numpy as np

In [6]:
class Handwritten_digits_classifier:
    def __init__(self, hidden_layers=[128, 64], hidden_layers_types=['sigmoid', 'relu'], epochs=10,
                 learning_rate=0.01):
        self.hidden_layers = hidden_layers
        self.hidden_layers_types = hidden_layers_types
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.weights = self.initialization()

        if len(hidden_layers) != len(hidden_layers_types):
            print('Hidden layers units are not equal with Hidden layers activations')
            return

        x_train = mnist.train_images()
        x_train = (x_train / 255).astype('float32').reshape(-1, 784)

        x_val = mnist.test_images()
        x_val = (x_val / 255).astype('float32').reshape(-1, 784)

        y_train = self.to_categorical(mnist.train_labels())
        y_val = self.to_categorical(mnist.test_labels())

        self.train(x_train, y_train, x_val, y_val)

    def relu(self, x, derivative=False):
        if derivative:
            y = np.copy(x)
            y[y >= 0] = 1
            y[y < 0] = 0
            return y
        return np.maximum(0, x)

    def sigmoid(self, x, derivative=False):
        if derivative:
            return (np.exp(-x)) / ((np.exp(-x) + 1) ** 2)
        return 1 / (1 + np.exp(-x))

    def softmax(self, x, derivative=False):
        exps = np.exp(x - x.max())
        if derivative:
            return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
        return exps / np.sum(exps, axis=0)

    def to_categorical(self, y, num_classes=None, dtype='float32'):
        y = np.array(y, dtype='int')
        input_shape = y.shape
        if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
            input_shape = tuple(input_shape[:-1])
        y = y.ravel()
        if not num_classes:
            num_classes = np.max(y) + 1
        n = y.shape[0]
        categorical = np.zeros((n, num_classes), dtype=dtype)
        categorical[np.arange(n), y] = 1
        output_shape = input_shape + (num_classes,)
        categorical = np.reshape(categorical, output_shape)
        return categorical

    def initialization(self):
        layers_unit = [784]
        layers_unit.extend(self.hidden_layers)
        layers_unit.append(10)
        weights = {}
        for i in range(1, len(layers_unit)):
            weights[f'W{i}'] = np.random.randn(layers_unit[i], layers_unit[i - 1]) * np.sqrt(1. / layers_unit[i])
        return weights

    def forward_pass(self, x_train):
        weights = self.weights
        # input layer activations becomes sample
        weights['A0'] = x_train
        for i in range(1, len(self.hidden_layers) + 2):

            if i == len(self.hidden_layers) + 1:  # output layer
                weights[f'Z{i}'] = np.dot(weights[f"W{i}"], weights[f'A{i - 1}'])
                weights[f'A{i}'] = self.softmax(weights[f'Z{i}'])
            else:
                weights[f'Z{i}'] = np.dot(weights[f"W{i}"], weights[f'A{i - 1}'])
                if self.hidden_layers_types[i - 1] == 'sigmoid':
                    weights[f'A{i}'] = self.sigmoid(weights[f'Z{i}'])
                else:
                    weights[f'A{i}'] = self.relu(weights[f'Z{i}'])

        return weights[f'A{len(self.hidden_layers) + 1}']

    def backward_pass(self, y_train, output):
        params = self.weights
        change_w = {}
        error = None
        for i in reversed(range(1, len(self.hidden_layers) + 2)):
            if i == len(self.hidden_layers) + 1:
                # Calculate last W update
                error = (output - y_train) / output.shape[0] * self.softmax(params[f'Z{i}'], derivative=True)
                change_w[f'W{i}'] = np.outer(error, params[f'A{i - 1}'])

            else:
                if list(reversed(self.hidden_layers_types))[i - 1] == 'sigmoid':
                    error = np.dot(params[f'W{i + 1}'].T, error) * self.sigmoid(params[f'Z{i}'], derivative=True)
                else:
                    error = np.dot(params[f'W{i + 1}'].T, error) * self.relu(params[f'Z{i}'], derivative=True)
                change_w[f'W{i}'] = np.outer(error, params[f'A{i - 1}'])

        for key, value in change_w.items():
            self.weights[key] -= self.learning_rate * value

    def compute_accuracy(self, x_val, y_val):
        predictions = []

        for x, y in zip(x_val, y_val):
            output = self.forward_pass(x)
            pred = np.argmax(output)
            predictions.append(pred == np.argmax(y))

        return np.mean(predictions)

    def train(self, x_train, y_train, x_val, y_val):
        for iteration in range(self.epochs):
            for x, y in zip(x_train, y_train):
                output = self.forward_pass(x)
                self.backward_pass(y, output)

            accuracy = self.compute_accuracy(x_val, y_val)
            print(f'Epoch: {iteration + 1}, Accuracy: {accuracy * 100:.2f}%')


In [7]:
classifier = Handwritten_digits_classifier(hidden_layers=[256, 128, 64], hidden_layers_types=['relu', 'sigmoid', 'relu'])

Epoch: 1, Accuracy: 41.52%
Epoch: 2, Accuracy: 69.44%
Epoch: 3, Accuracy: 83.94%
Epoch: 4, Accuracy: 86.74%
Epoch: 5, Accuracy: 87.81%
Epoch: 6, Accuracy: 89.14%
Epoch: 7, Accuracy: 90.71%
Epoch: 8, Accuracy: 91.12%
Epoch: 9, Accuracy: 90.70%
Epoch: 10, Accuracy: 91.96%
