In [3]:
"""
Helper module to provide activation to network layers.
Four types of activations with their derivates are available:

- Sigmoid
- Softmax
- Tanh
- ReLU
"""
import numpy as np

import os
import gzip
import cPickle
import wget
import random
import time


def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))


def sigmoid_prime(z):
    return sigmoid(z) * (1 - sigmoid(z))


def softmax(z):
    return np.exp(z) / np.sum(np.exp(z))


def softmax_prime(z):
    return softmax(z) * (1 - softmax(z))


def tanh(z):
    return np.tanh(z)


def tanh_prime(z):
    return 1 - tanh(z) * tanh(z)


def relu(z):
    return np.maximum(z, 0)


def relu_prime(z):
    return float(z > 0)


def load_mnist():
    global delete_indices
    
    abs_path = os.path.join(os.getcwd(), 'data')
    if not os.path.exists(abs_path):
        os.mkdir(abs_path)
        wget.download('http://deeplearning.net/data/mnist/mnist.pkl.gz', out='data')

    print("load_mnist: images downloaded")
    data_file = gzip.open(os.path.join(os.curdir, 'data', 'mnist.pkl.gz'), 'rb')
    training_data, validation_data, test_data = cPickle.load(data_file)
    data_file.close()
    print("load_mnist: images unpacked")

    training_inputs = []
        
    training_points_counter = 0
    for x in training_data[0]:
        intermediate_pixels = np.multiply(np.repeat(x,len(x)), np.tile(x,len(x)))
        intermediate_pixels = np.delete(intermediate_pixels, delete_indices)
        intermediate_pixels = np.reshape(intermediate_pixels, (306936, 1))
        training_inputs.append(intermediate_pixels)
        training_points_counter = training_points_counter + 1
        if training_points_counter % 500 == 0:
            print("Processed training point {0} of {1}.".format(training_points_counter, len(training_data[0])))
        
    #training_inputs = [np.reshape(x, (306936, 1)) for x in training_inputs] - need to do earlier, this crashes kernel
    training_results = [vectorized_result(y) for y in training_data[1]]
    training_data = zip(training_inputs, training_results)
    
    print("load_mnist: training data ready")
    
    validation_inputs = [np.multiply(np.repeat(x,len(x)), np.tile(x,len(x))) for x in validation_data[0]]
    validation_inputs = [np.delete(x, delete_indices) for x in validation_inputs]
    validation_inputs = [np.reshape(x, (306936, 1)) for x in validation_inputs]
    validation_results = validation_data[1]
    validation_data = zip(validation_inputs, validation_results)
    
    print("load_mnist: validation data ready")
    
    test_inputs = [np.multiply(np.repeat(x,len(x)), np.tile(x,len(x))) for x in test_data[0]]
    test_inputs = [np.delete(x, delete_indices) for x in test_inputs]
    test_inputs = [np.reshape(x, (306936, 1)) for x in test_inputs]
    test_data = zip(test_inputs, test_data[1])
    
    print("load_mnist: test data ready")
    
    print("load_mnist: images split into training, validation and test sets")
    
    return training_data, validation_data, test_data


def vectorized_result(y):
    e = np.zeros((10, 1))
    e[y] = 1.0
    return e


class NeuralNetwork(object):

    def __init__(self, sizes=list(), learning_rate=1.0, mini_batch_size=16,
                 epochs=10):
        """Initialize a Neural Network model.

        Parameters
        ----------
        sizes : list, optional
            A list of integers specifying number of neurns in each layer. Not
            required if a pretrained model is used.

        learning_rate : float, optional
            Learning rate for gradient descent optimization. Defaults to 1.0

        mini_batch_size : int, optional
            Size of each mini batch of training examples as used by Stochastic
            Gradient Descent. Denotes after how many examples the weights
            and biases would be updated. Default size is 16.

        """
        # Input layer is layer 0, followed by hidden layers layer 1, 2, 3...
        self.sizes = sizes
        self.num_layers = len(sizes)

        # First term corresponds to layer 0 (input layer). No weights enter the
        # input layer and hence self.weights[0] is redundant.
        self.weights = [np.array([0])] + [np.random.randn(y, x) for y, x in
                                          zip(sizes[1:], sizes[:-1])]

        # Input layer does not have any biases. self.biases[0] is redundant.
        self.biases = [np.random.randn(y, 1) for y in sizes]

        # Input layer has no weights, biases associated. Hence z = wx + b is not
        # defined for input layer. self.zs[0] is redundant.
        self._zs = [np.zeros(bias.shape) for bias in self.biases]

        # Training examples can be treated as activations coming out of input
        # layer. Hence self.activations[0] = (training_example).
        self._activations = [np.zeros(bias.shape) for bias in self.biases]

        self.mini_batch_size = mini_batch_size
        self.epochs = epochs
        self.eta = learning_rate

    def fit(self, training_data, validation_data=None):
        """Fit (train) the Neural Network on provided training data. Fitting is
        carried out using Stochastic Gradient Descent Algorithm.

        Parameters
        ----------
        training_data : list of tuple
            A list of tuples of numpy arrays, ordered as (image, label).

        validation_data : list of tuple, optional
            Same as `training_data`, if provided, the network will display
            validation accuracy after each epoch.

        """
        epoch_counter = 0
        for epoch in range(self.epochs):
            epoch_counter = epoch_counter + 1
            random.shuffle(training_data)
            mini_batches = [
                training_data[k:k + self.mini_batch_size] for k in
                range(0, len(training_data), self.mini_batch_size)]

            counter = 0
            for mini_batch in mini_batches:
                if counter % 100 == 0:
                    print("batch ", counter, " of ", len(mini_batches), " / epoch ", epoch_counter)
                counter = counter + 1
                nabla_b = [np.zeros(bias.shape) for bias in self.biases]
                nabla_w = [np.zeros(weight.shape) for weight in self.weights]
                for x, y in mini_batch:
                    self._forward_prop(x)
                    delta_nabla_b, delta_nabla_w = self._back_prop(x, y)
                    nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
                    nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]

                self.weights = [
                    w - (self.eta / self.mini_batch_size) * dw for w, dw in
                    zip(self.weights, nabla_w)]
                self.biases = [
                    b - (self.eta / self.mini_batch_size) * db for b, db in
                    zip(self.biases, nabla_b)]

            if validation_data:
                accuracy = self.validate(validation_data) / 100.0
                print("Epoch {0}, accuracy {1} %.".format(epoch + 1, accuracy))
            else:
                print("Processed epoch {0}.".format(epoch))

    def validate(self, validation_data):
        """Validate the Neural Network on provided validation data. It uses the
        number of correctly predicted examples as validation accuracy metric.

        Parameters
        ----------
        validation_data : list of tuple

        Returns
        -------
        int
            Number of correctly predicted images.

        """
        validation_results = [(self.predict(x) == y) for x, y in validation_data]
        return sum(result for result in validation_results)

    def predict(self, x):
        """Predict the label of a single test example (image).

        Parameters
        ----------
        x : numpy.array

        Returns
        -------
        int
            Predicted label of example (image).

        """

        self._forward_prop(x)
        return np.argmax(self._activations[-1])

    def _forward_prop(self, x):
        self._activations[0] = x
        for i in range(1, self.num_layers):
            self._zs[i] = (
                 self.weights[i].dot(self._activations[i - 1]) + self.biases[i]
            )
            self._activations[i] = sigmoid(self._zs[i])

    def _back_prop(self, x, y):
        nabla_b = [np.zeros(bias.shape) for bias in self.biases]
        nabla_w = [np.zeros(weight.shape) for weight in self.weights]

        error = (self._activations[-1] - y) * sigmoid_prime(self._zs[-1])
        nabla_b[-1] = error
        nabla_w[-1] = error.dot(self._activations[-2].transpose())

        for l in range(self.num_layers - 2, 0, -1):
            error = np.multiply(
                self.weights[l + 1].transpose().dot(error),
                sigmoid_prime(self._zs[l])
            )
            nabla_b[l] = error
            nabla_w[l] = error.dot(self._activations[l - 1].transpose())

        return nabla_b, nabla_w

    def load(self, filename='model.npz'):
        """Prepare a neural network from a compressed binary containing weights
        and biases arrays. Size of layers are derived from dimensions of
        numpy arrays.

        Parameters
        ----------
        filename : str, optional
            Name of the ``.npz`` compressed binary in models directory.

        """
        npz_members = np.load(os.path.join(os.curdir, 'models', filename))

        self.weights = list(npz_members['weights'])
        self.biases = list(npz_members['biases'])

        # Bias vectors of each layer has same length as the number of neurons
        # in that layer. So we can build `sizes` through biases vectors.
        self.sizes = [b.shape[0] for b in self.biases]
        self.num_layers = len(self.sizes)

        # These are declared as per desired shape.
        self._zs = [np.zeros(bias.shape) for bias in self.biases]
        self._activations = [np.zeros(bias.shape) for bias in self.biases]

        # Other hyperparameters are set as specified in model. These were cast
        # to numpy arrays for saving in the compressed binary.
        self.mini_batch_size = int(npz_members['mini_batch_size'])
        self.epochs = int(npz_members['epochs'])
        self.eta = float(npz_members['eta'])

    def save(self, filename='model.npz'):
        """Save weights, biases and hyperparameters of neural network to a
        compressed binary. This ``.npz`` binary is saved in 'models' directory.

        Parameters
        ----------
        filename : str, optional
            Name of the ``.npz`` compressed binary in to be saved.

        """
        np.savez_compressed(
            file=os.path.join(os.curdir, 'models', filename),
            weights=self.weights,
            biases=self.biases,
            mini_batch_size=self.mini_batch_size,
            epochs=self.epochs,
            eta=self.eta
        )


In [4]:
delete_indices = None

num_of_pixels = 784 # len(training_data[0][0]) data is not loaded yet!
    
if delete_indices is None:
    delete_indices = np.array([])
    for pixel_idx in range(0,num_of_pixels):
        for j in range(0,num_of_pixels+1):
            if j <= pixel_idx:
                idx = num_of_pixels * pixel_idx + j
                delete_indices = np.append(delete_indices, idx)
    print("load_mnist: Initialized delete indices")

load_mnist: Initialized delete indices


In [5]:
training_data, validation_data, test_data = load_mnist()

load_mnist: images downloaded
load_mnist: images unpacked




Processed training point 500 of 50000.
Processed training point 1000 of 50000.
Processed training point 1500 of 50000.
Processed training point 2000 of 50000.
Processed training point 2500 of 50000.
Processed training point 3000 of 50000.
Processed training point 3500 of 50000.
Processed training point 4000 of 50000.
Processed training point 4500 of 50000.
Processed training point 5000 of 50000.
Processed training point 5500 of 50000.
Processed training point 6000 of 50000.
Processed training point 6500 of 50000.
Processed training point 7000 of 50000.
Processed training point 7500 of 50000.
Processed training point 8000 of 50000.
Processed training point 8500 of 50000.
Processed training point 9000 of 50000.
Processed training point 9500 of 50000.
Processed training point 10000 of 50000.
Processed training point 10500 of 50000.
Processed training point 11000 of 50000.
Processed training point 11500 of 50000.
Processed training point 12000 of 50000.
Processed training point 12500 of 50



load_mnist: validation data ready




load_mnist: test data ready
load_mnist: images split into training, validation and test sets


In [6]:
# Should get 50000 10000 10000

print(len(training_data))
print(len(validation_data))
print(len(test_data))

50000
10000
10000


In [7]:
# For safety in resource-heavy training, repeat fitting in next cell four times and save intermediate models below

higher_order_net = NeuralNetwork(sizes=[306936, 20, 20, 10], learning_rate=5.0, mini_batch_size=10, epochs=5)

In [18]:
start = time.time()

higher_order_net.fit(training_data, validation_data=validation_data)

end = time.time()

print("time: ", end - start)

('batch ', 0, ' of ', 5000, ' / epoch ', 1)
('batch ', 100, ' of ', 5000, ' / epoch ', 1)
('batch ', 200, ' of ', 5000, ' / epoch ', 1)
('batch ', 300, ' of ', 5000, ' / epoch ', 1)
('batch ', 400, ' of ', 5000, ' / epoch ', 1)
('batch ', 500, ' of ', 5000, ' / epoch ', 1)
('batch ', 600, ' of ', 5000, ' / epoch ', 1)
('batch ', 700, ' of ', 5000, ' / epoch ', 1)
('batch ', 800, ' of ', 5000, ' / epoch ', 1)
('batch ', 900, ' of ', 5000, ' / epoch ', 1)
('batch ', 1000, ' of ', 5000, ' / epoch ', 1)
('batch ', 1100, ' of ', 5000, ' / epoch ', 1)
('batch ', 1200, ' of ', 5000, ' / epoch ', 1)
('batch ', 1300, ' of ', 5000, ' / epoch ', 1)
('batch ', 1400, ' of ', 5000, ' / epoch ', 1)
('batch ', 1500, ' of ', 5000, ' / epoch ', 1)
('batch ', 1600, ' of ', 5000, ' / epoch ', 1)
('batch ', 1700, ' of ', 5000, ' / epoch ', 1)
('batch ', 1800, ' of ', 5000, ' / epoch ', 1)
('batch ', 1900, ' of ', 5000, ' / epoch ', 1)
('batch ', 2000, ' of ', 5000, ' / epoch ', 1)
('batch ', 2100, ' of ', 

('batch ', 2400, ' of ', 5000, ' / epoch ', 4)
('batch ', 2500, ' of ', 5000, ' / epoch ', 4)
('batch ', 2600, ' of ', 5000, ' / epoch ', 4)
('batch ', 2700, ' of ', 5000, ' / epoch ', 4)
('batch ', 2800, ' of ', 5000, ' / epoch ', 4)
('batch ', 2900, ' of ', 5000, ' / epoch ', 4)
('batch ', 3000, ' of ', 5000, ' / epoch ', 4)
('batch ', 3100, ' of ', 5000, ' / epoch ', 4)
('batch ', 3200, ' of ', 5000, ' / epoch ', 4)
('batch ', 3300, ' of ', 5000, ' / epoch ', 4)
('batch ', 3400, ' of ', 5000, ' / epoch ', 4)
('batch ', 3500, ' of ', 5000, ' / epoch ', 4)
('batch ', 3600, ' of ', 5000, ' / epoch ', 4)
('batch ', 3700, ' of ', 5000, ' / epoch ', 4)
('batch ', 3800, ' of ', 5000, ' / epoch ', 4)
('batch ', 3900, ' of ', 5000, ' / epoch ', 4)
('batch ', 4000, ' of ', 5000, ' / epoch ', 4)
('batch ', 4100, ' of ', 5000, ' / epoch ', 4)
('batch ', 4200, ' of ', 5000, ' / epoch ', 4)
('batch ', 4300, ' of ', 5000, ' / epoch ', 4)
('batch ', 4400, ' of ', 5000, ' / epoch ', 4)
('batch ', 45

In [19]:
higher_order_net.save(filename='model_all_pairwise_20epochs.npz')

In [20]:
test_accuracy = higher_order_net.validate(test_data) / 100.0
print("Test accuracy {0} %.".format(test_accuracy))

Test accuracy 92.48 %.
