Neural Network
========
@author: Matthew Rowley

###Acknowledgements
The whole idea for this notebook is taken from the excellent video series by YouTube Channel "3Blue1Brown". This series begins with the video: *But what **is** a Neural Network? | Deep learning, Part 1* (<https://www.youtube.com/watch?v=aircAruvnKk&t=1s>). The channel "3Blue1Brown" is a one-man labor of love produced by the talented Grant Sanderson.

###Description
This notebook will explore the implementation and training of a neural network. The network will take images of hand-drawn numerals as inputs, and output guesses for which numeral was drawn.

Following Grant's lead, the network will have two hidden layers, each with 16 nodes.

In [101]:
from __future__ import division
import numpy as np
import cv2
import os
import idx2numpy
import gc
import pickle

In [144]:
class NeuralNetwork(object):
    """The Neural Network Class"""
    def __init__(self, W1=None, W2=None, W3=None, B2=None, B3=None, B4=None, ReLU=False, variability=None):
        """Initialization routine generates """
        if(W1 is None): W1 = np.random.random([784,16]) - 0.5
        if(W2 is None): W2 = np.random.random([16,16]) - 0.5
        if(W3 is None): W3 = np.random.random([16,10]) - 0.5
        if(B2 is None): B2 = np.random.random(16) - 0.5
        if(B3 is None): B3 = np.random.random(16) - 0.5
        if(B4 is None): B4 = np.random.random(10) - 0.5
        if(variability is None): variability = 0.1
        self.variability = variability  # How much to randomize weights and biases in the randomize function
        self.W1=W1  # Weights
        self.W2=W2
        self.W3=W3
        self.B2=B2  # Node Biases
        self.B3=B3
        self.B4=B4
        self.N1 = np.zeros(784) # Nodes (initialized as zeros)
        self.N2 = np.zeros(16)
        self.N3 = np.zeros(16)
        self.N4 = np.zeros(10)
        self.ReLU = ReLU
        self.error = 0  # Metric for the quality of this network on the training set
        
    def identifyNumber(self, N1=None, trueValue=None):
        """Given an array of pixel values, give the most probable numeral"""
        if (N1 is None): N1 = np.random.random(784)
        if (trueValue is None): trueValue = int(np.random.random(1)*10)
        self.N1 = N1
        self.N2 = self.normalize(np.dot(self.N1, self.W1) - self.B2)
        self.N3 = self.normalize(np.dot(self.N2, self.W2) - self.B3)
        self.N4 = self.normalize(np.dot(self.N3, self.W3) - self.B4)
        max_val = 0
        max_i = 0
        for i, val in enumerate(self.N4):
            if val > max_val:
                max_val = val
                max_i = i
        for i in range(10):
            if i == trueValue:
                self.error += 2.5*(1.0-self.N4[i])**2  # Weigh the correct answer more than others
            else:
                self.error += (0.0-self.N4[i])**2
        return max_i, self.N4, self.error # this is (numeral, [probability values for numerals 0-9], error)
        
    def normalize(self, nodeVals):
        """Normalize the node values according to a sigmoid or ReLU function"""
        if(self.ReLU):
            return nodeVals.clip(min=0)
        else:
            return 1.0 / (1.0 + np.exp(-nodeVals))  
    
    def randomize(self):
        """Modify the current networks parameters by small random values"""
        self.W1 = self.W1 + (np.random.random([784,16]) - 0.5)*self.variability
        self.W2 = self.W2 + (np.random.random([16,16]) - 0.5)*self.variability
        self.W3 = self.W3 + (np.random.random([16,10]) - 0.5)*self.variability
        self.B2 = self.B2 + (np.random.random(16) - 0.5)*self.variability
        self.B3 = self.B3 + (np.random.random(16) - 0.5)*self.variability
        self.B4 = self.B4 + (np.random.random(10) - 0.5)*self.variability

####Now create an instance of the neural network with random weights and zero biases, and test it on a random input.

In [3]:
myNetwork=NeuralNetwork()

In [4]:
print(myNetwork.identifyNumber())

(3, array([ 0.5777491 ,  0.5680225 ,  0.37728706,  0.83418981,  0.38067846,
        0.3822205 ,  0.52272178,  0.61963346,  0.41488867,  0.1699427 ]), 2.5078221853863036)


####I've drawn a "6." Here I import it, and run it on the network.

In [5]:
im = cv2.imread(os.path.join("Data","Test.png"), 0)

In [6]:
im = np.ndarray.flatten(im)/255.0
print(myNetwork.identifyNumber(N1=im, trueValue=6))

(3, array([ 0.57086273,  0.55495475,  0.38059479,  0.83603494,  0.42355018,
        0.36511283,  0.49332266,  0.61725436,  0.41802692,  0.16778318]), 5.1388127444131557)


####Now I will set up a short script for optimizing a network using a single training sample and a roughly genetic-esque optimization routine.

In [7]:
networks = [0,0,0,0,0,0,0,0,0,0]
for i in range(len(networks)):
    networks[i] = NeuralNetwork()
for network in networks:  
    network.identifyNumber(N1=im, trueValue=6)

#### Having set up and run the network once, I am ready to "Train" it to recognize a 6. Run the cell below as many times as necessary to get very good results

In [8]:
min_i = 0
min_val = networks[0].error
for i, network in enumerate(networks):
    if network.error < min_val:
        min_i = i
        min_val = network.error
print("Best Error, index: {}, {}".format(min_val, min_i))
W1 = networks[min_i].W1
W2 = networks[min_i].W2
W3 = networks[min_i].W3
B2 = networks[min_i].B2
B3 = networks[min_i].B3
B4 = networks[min_i].B4
for i in range(len(networks)):
    networks[i] = NeuralNetwork(W1=W1, W2=W2, W3=W3, B2=B2, B3=B3, B4=B4)
for i, network in enumerate(networks):
    if (i != 0):
        network.randomize()
    network.identifyNumber(N1=im, trueValue=6)

Best Error, index: 1.99892414858, 2


In [9]:
networks[0].identifyNumber(N1=im, trueValue=6)

(5, array([ 0.49157783,  0.28965678,  0.43161903,  0.46802914,  0.37718666,
         0.56961876,  0.53066831,  0.2972307 ,  0.56209105,  0.42039042]), 3.9978482971548632)

#### Of course, here I have grossly "overfit" and there is no likelihood that the network is doing any image processing at all, but rather gaming the numbers to always give a 6

###Importing Images

####I need many images (thousands) to adequately train the network. Thankfully, I can rely on the databases provided by Drs. LeCun and Cortes at: <http://yann.lecun.com/exdb/mnist/>

####The databases include two sets of images (60,000 training and 10,000 testing), and two sets of labels(training and testing)

####The image arrays must also be flattened so that the image data is a 1-D array and normalized to a maximum value of 1


In [10]:
unflattened_training_images = idx2numpy.convert_from_file(os.path.join("Data",'train-images.idx3-ubyte'))
training_labels = idx2numpy.convert_from_file(os.path.join("Data",'train-labels.idx1-ubyte'))
unflattened_testing_images = idx2numpy.convert_from_file(os.path.join("Data",'t10k-images.idx3-ubyte'))
testing_labels = idx2numpy.convert_from_file(os.path.join("Data",'t10k-labels.idx1-ubyte'))
print(unflattened_training_images.shape)
training_images = np.empty([60000, 784])
testing_images = np.empty([10000, 784])
for i in range(60000):
    training_images[i] = np.ndarray.flatten(unflattened_training_images[i]) / 255.0
for i in range(10000):
    testing_images[i] = np.ndarray.flatten(unflattened_testing_images[i]) / 255.0
print(training_images.shape)
# Clean up some memory
unflattened_training_images = None
unflattened_testing_images = None
gc.collect()

(60000, 28, 28)
(60000, 784)


9

#### I will also append random data with a 99 label to discourage the network from overconfident guessing

In [11]:
training_images = np.concatenate((training_images, np.random.rand(6000,784)))
training_labels = np.concatenate((training_labels, 99.0 * np.ones(6000)))

testing_images = np.concatenate((testing_images, np.random.rand(1000,784)))
testing_labels = np.concatenate((testing_labels, 99.0 * np.ones(1000)))

print(training_images.shape)
print(training_labels.shape)

(66000, 784)
(66000,)


##Training the network the hard way

####Now we can train the network using my simple genetic algorithm demonstrated above, and a random sample of 500 images from the training data

####It will converge very slowly due to the enormous number of parameters and the randomness of the walk toward a local minimum, but it was relatively easy to code

####Another advantage to this algorithm is that it parallellizes easily, but I haven't bothered with that here


In [52]:
networks = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
for i in range(len(networks)):
    networks[i] = NeuralNetwork()
trainers = np.random.randint(0,65999,500)
for network in networks:
    for index in trainers:
        network.identifyNumber(N1=training_images[index], trueValue=training_labels[index])
min_i = 0
min_val = networks[0].error
for i, network in enumerate(networks):
    if network.error < min_val:
        min_i = i
        min_val = network.error
print("Best Error, index: {}, {}".format(min_val, min_i))

In [169]:
target_error = .435
sample_number = 1000
load_best = True
if load_best:
    loadfile = os.path.join("Data", "Best.pkl")
    with open(loadfile, 'wb') as pickled_network:
        networks[min_i] = pickle.load(pickled_network)
while(min_val > target_error):
    W1 = networks[min_i].W1
    W2 = networks[min_i].W2
    W3 = networks[min_i].W3
    B2 = networks[min_i].B2
    B3 = networks[min_i].B3
    B4 = networks[min_i].B4
    for i in range(len(networks)):
        networks[i] = NeuralNetwork(W1=W1, W2=W2, W3=W3, B2=B2, B3=B3, B4=B4, variability = (1 - target_error/min_val)**2/20)
    trainers = np.random.randint(0,65999,sample_number) # Get new training data with each iteration
    for i, network in enumerate(networks):
        if (i != 0):
            network.randomize()
        for index in trainers:
            network.identifyNumber(N1=training_images[index], trueValue=training_labels[index])
    min_i = 0
    min_val = networks[0].error
    for i, network in enumerate(networks):
        if network.error < min_val:
            min_i = i
            min_val = network.error
    min_val = min_val / sample_number
    print("Best Error, index: {}, {}".format(min_val, min_i))

Best Error, index: 0.576162037037, 6
Best Error, index: 0.510000181124, 9
Best Error, index: 0.507521206212, 44
Best Error, index: 0.514562521854, 22
Best Error, index: 0.537470610846, 20
Best Error, index: 0.56191853577, 23
Best Error, index: 0.515583830139, 46
Best Error, index: 0.538422422095, 22
Best Error, index: 0.524435912644, 3
Best Error, index: 0.508049508232, 22
Best Error, index: 0.552432534258, 44
Best Error, index: 0.493848151862, 10
Best Error, index: 0.520410193126, 24
Best Error, index: 0.530732119131, 28
Best Error, index: 0.526655243057, 40
Best Error, index: 0.459274674026, 22
Best Error, index: 0.50394439781, 11
Best Error, index: 0.499146210893, 44
Best Error, index: 0.493359509607, 10
Best Error, index: 0.549261360695, 17
Best Error, index: 0.513287087681, 19
Best Error, index: 0.479831902077, 45
Best Error, index: 0.532545436753, 19
Best Error, index: 0.505454850292, 30
Best Error, index: 0.489631727699, 8
Best Error, index: 0.485085103616, 20
Best Error, index:

In [170]:
for i in range(10):
    index = np.random.randint(10999)
    print(networks[min_i].identifyNumber(N1=testing_images[index], trueValue=testing_labels[index]), testing_labels[index])

((4, array([  7.25546659e-04,   2.22422028e-06,   1.55013939e-02,
         7.56615117e-04,   9.94002873e-01,   1.25067835e-02,
         3.79787407e-02,   2.31631667e-03,   2.11343119e-02,
         7.05378111e-02]), 434.02348347223693), 4.0)
((9, array([  1.88596671e-04,   3.95053796e-06,   1.03189875e-04,
         1.88088215e-01,   2.52174688e-02,   1.58902202e-01,
         6.79808223e-05,   1.72841663e-01,   4.70090335e-02,
         5.27748331e-01]), 434.67438471696062), 9.0)
((2, array([  1.67861681e-02,   7.65693019e-02,   9.78399667e-01,
         2.46208152e-02,   2.80242292e-03,   7.40418922e-03,
         3.56223131e-01,   2.55965087e-04,   3.04912442e-02,
         1.85999817e-04]), 434.81018938153466), 2.0)
((6, array([  1.18968919e-03,   1.31791107e-03,   3.37182257e-04,
         6.67264618e-03,   5.39503922e-02,   2.27179323e-01,
         9.52465708e-01,   3.41666276e-05,   4.03150514e-02,
         1.31326096e-02]), 434.87220480327596), 6.0)
((4, array([  1.39873773e-02,   9.89

In [171]:
savefile = os.path.join("Data", "Best.pkl")
with open(savefile, 'wb') as output:
    pickle.dump(networks[min_i], output)

In [173]:
im = cv2.imread(os.path.join("Data","One2.png"), 0)
im = np.ndarray.flatten(im)/255.0
print(networks[min_i].identifyNumber(N1=im, trueValue=1))

(8, array([  1.77460565e-03,   5.38313184e-06,   1.11935450e-03,
         1.17980550e-02,   3.67978335e-02,   4.91742404e-02,
         8.40655320e-03,   1.18378643e-02,   9.64306930e-02,
         1.42682652e-02]), 443.75654166911278)


In [156]:
min_i = 0
min_val=300