In [1]:
#Download the dataset from the website Mnist
import urllib
import gzip
import os
import numpy as np
def load_dataset():
    def download(filename, source="http://yann.lecun.com/exdb/mnist/"):
        print("Downloading...", filename)
        import urllib
        urllib.request.urlretrieve(source+filename, filename)

    def load_mnist_images(filename):
        if not os.path.exists(filename):
            download(filename)
        with gzip.open(filename,'rb') as f:
            #open the zip file of images
            data = np.frombuffer(f.read(),np.uint8,offset=16)
            #Each image has 28x28 pixels, its a monochrome image ie only 1 channel
            #it were full-color it would have 3/4 channels R,G,B
            data = data.reshape(-1, 1, 28, 28)
            #the first dimension is the number of images, making this -1
        return data/np.float32(256)
        #this will convert the byte value to a float32 in the range[0,1]
        
    def load_mnist_labels(filename):
        if not os.path.exists(filename):
            download(filename)
            # Read the labels which are in a binary form
        with gzip.open(filename,'rb') as f:
            data = np.frombuffer(f.read(), np.uint8, offset=8)
                #this gives a numpy array of intergers, the digit value corresponding to the image
        return data

        #now we can download and read the training and test data sets
    x_train = load_mnist_images('train-images-idx3-ubyte.gz')
    y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
    x_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
    y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')

    return x_train, y_train, x_test, y_test
    
    


In [2]:
x_train, y_train, x_test, y_test = load_dataset()

In [3]:
import matplotlib
matplotlib.use('TkAgg') # a Default setting for matplotlib for how to render images
import matplotlib.pyplot as plt
plt.show(plt.imshow(x_train[1][0]))

In [4]:
#we are going to use 2 python packages called theano and lasagne
#Theano is a mathematical packaege that allows you to define and 
#mathematical computations. - like numpy but with high dimensional arrays
#Higher dimensional arrays are often called Tensors -  and Theano is a python package to work with them

#Lasagne is a library that uses Theano heavily and supports building of neural networks.
#it comes with functions to set up layers, define error functions train neural networks etc
import lasagne
import theano
import theano.tensor as T

def build_NN(imput_var = None):
    #we are going to create a nueral network with 2 hidden layers of 800 each.
    #the output layer will have 10 nodes - the nodes are numbered 0-9 and the outpu
    #at each node will be a value between 0-1. The node with the highest value will be the predicted output
    
    #First we have an input layer - the expacted input shape is
    #1x28x28 (for 1 image)
    #We will link this input to the input_var (which will be the array of images that we'll pass in later on)
    l_in = lasagne.layers.InputLayer(shape=(None,1,28,28), input_var = input_var)
    
    #we'll add a 20% dropout - this means that randomly 20% of the edges between the 
    #inputs and the next layer will be dropped - this is done to avoid overfitting
    l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)
    
    #Add a layer with 800 nodes. Initially this  will be dense/fully-connected
    #ie. every edge possible
    #will be drawn.
    l_hid1 = lasagne.layers.DenseLayer(l_in_drop, num_units = 800, 
                                       nonlinearity = lasagne.nonlinearities.rectify, 
                                       W = lasagne.init.GlorotUniform())
    
    #This layer has been initialized with some weights. There are some schemes to
    #initialize the weights so that training will be done faster, Glorot's scheme
    #is one of them
    
    #we will add a dropout of 50% to the hidden layer 1
    l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p = 0.5)
    
    #Add another layere, it works exectly the same way
    l_hid2 = lasagne.layers.DenseLayer(l_hid1_drop, num_units = 800, 
                                       nonlinearity = lasagne.nonlinearities.rectify, 
                                       W = lasagne.init.GlorotUniform())
    
    l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p = 0.5)
    
    # Let's now add the final output layer.
    l_out = lasagne.layers.DenseLayer(l_hid2_drop, num_units = 10, 
                                       nonlinearity = lasagne.nonlinearities.softmax)
    
    #the output layer has 10 units. softmax specifies that each of those
    #output is between 0-1 and the max of the those will be the final prediction
    
    return l_out #we return the last layer, but since all the layers are linked
#we effectivily return the whole network

#We've setup the network. now we have to tell the network how to train itself
#ie how should it find the values od all the weights it needs to find

#We'll initialize some empty arrays which will act as placeholders
#for the training/test data that will be given to the network

input_var = T.tensor4('inputs') # An empty 4 demensional array
target_var = T.ivector('targets') # An empty 1 dimensional integer array to represernt the lables

network = build_NN(input_var) #call the fucntion that initialize the neural network

#it training we are going to follow the steps blow
#a. compute an error function

prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
#catagorical cross entropy is one of the standard error fucntions with
#classification problems
loss = loss.mean()

#b. We'll tell the network how to update all its weights based on the
#value of the error fucntion
params = lasagne.layers.get_all_params(network, trainable = True) #current value of all
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate = 0.01, momentum = 0.9)

#Nestrov momemtum is one fo the options that lasagne offers for updating the weights
#in a training step. this is based on Stochastic Gradient Descent - the idea is simple
#Find the slope of the error function at the current point and move downwords
#in the direction  of that slope

#We'll use theano to compile a function that is going to represent a 
#single training step ie. compute the error, find the current weights, updates the weights
train_fn = theano.function([input_var, target_var], loss, updates=updates)
#calling this fucntion for a certain number of times will train the neural network
    


ImportError: cannot import name downsample

In [48]:
num_training_steps = 10 # Ideally you can train for a few 100 steps

for step in range(num_training_steps):
    train_err = train_fn(x_train, y_train)
    #print("Current step is "+str(step))
    print(train_err)


2.4144357001008814
2.39643881138562
2.3714091236356394
2.3470555912452604
2.321185257914009
2.2905010927613243
2.2601833636570983


KeyboardInterrupt: 

In [46]:
#To check the prediction for 1 image we'll need to set up another function
test_prediction = lasagne.layers.get_output(network)
val_fn = theano.function([input_var], test_prediction)

val_fn([x_test[1]]) # This will apply the function on 1 image, the first one in the test set
#The max value if for the digit 7

array([[0.05415224, 0.15168205, 0.04740196, 0.0617309 , 0.13617745,
        0.1505447 , 0.16307532, 0.05555481, 0.14800482, 0.03167576]])

In [47]:
#Let's check the actual value
y_test[1]

2

In [5]:
#step 5: We'll feed a test data set of 10000 images to the trained neural network
#and check it's accuracy

#we'll set up a function that will take in a images and their lables,
#feed the images to our network and compute it's accuracy

test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype = theano.config.floatX)
#checks the index of the max value in each test prediction and matches it agains the actual value
acc_fn = theano.function([input_var,target_var],test_acc)

acc_fn(x_test,y_test)
#this is pretty poor accuracy 


NameError: name 'lasagne' is not defined