In [1]:
import six.moves.cPickle as pickle
import pickle as pkl
import copy_reg
import gzip, os, sys, timeit
os.chdir("/Users/jacobsw/Desktop/IMPLEMENTATION_CAMP/CODE/BASIC_TOPICS/ML_GENERAL/PYTHON_IMPL")
# sys.path.append("/Users/jacobsw/Desktop/IMPLEMENTATION_CAMP/CODE/BASIC_TOPICS/ML_GENERAL/PYTHON_IMPL/ACCESSORIES")
# sys.path.append("/Users/jacobsw/Desktop/IMPLEMENTATION_CAMP/CODE/BASIC_TOPICS/ML_GENERAL/PYTHON_IMPL/DATA")
    # run once is enough.

In [2]:
import numpy as np

In [3]:
import theano
import theano.tensor as T
from theano.tensor import tanh
from theano import function, shared
from theano.tensor.nnet import softmax

In [4]:
class HiddenLayer:
    
    def __init__(self, rng, inpt, nIn, nOut, activation=tanh):
        self.inpt = inpt
        self.W = shared(np.asarray(rng.uniform(low=-np.sqrt(6./(nIn+nOut)), 
                                               high=np.sqrt(6./(nIn+nOut)), 
                                               size=(nIn,nOut)),
                                   dtype=theano.config.floatX),
                        name='W', borrow=True)
        self.b = shared(np.zeros((nOut,), dtype=theano.config.floatX),
                        name='b', borrow=True)
        self.output = activation(T.dot(inpt, self.W) + self.b)
        self.params = [self.W, self.b]

class OutputLayer:
    
    def __init__(self, inpt, nIn, nOut):
        self.inpt = inpt
        self.W = shared(value=np.zeros((nIn,nOut),dtype=theano.config.floatX), 
                        name='W', borrow=True) 
        self.b = shared(value=np.zeros((nOut,),dtype=theano.config.floatX), 
                        name='b', borrow=True) 
        self.pYgivenX = softmax(T.dot(inpt, self.W) + self.b)
        self.yHat = T.argmax(self.pYgivenX, axis=1)
        self.params = [self.W, self.b]      
        
    def nll(self, y): 
        return -T.mean(T.log(self.pYgivenX)[T.arange(y.shape[0]), y])
    
    def errors(self, y):
        assert y.ndim == self.yHat.ndim
        assert y.dtype.startswith('int')
        return T.mean(T.neq(self.yHat, y))

In [6]:
class MLP:
    
    def __init__(self, inpt, sizes, rng=np.random.RandomState(0), hiddenActivation=tanh):
        assert sizes >= 3
        self.numLayers = len(sizes)
        self.layers = [ HiddenLayer(rng=rng,inpt=inpt,nIn=nIn,nOut=nOut) for nIn,nOut in zip(sizes[:-2],sizes[1:]) ]
        self.layers.append(OutputLayer(inpt=self.layers[-1].output,nIn=sizes[-2],nOut=sizes[-1]))
        self.nll, self.errors = self.layers[-1].nll, self.layers[-1].errors
        self.params = []
        for layer in self.layers:
            self.params += layer.params
        self.inpt = inpt

In [46]:
def sgd(train, sizes, lr=.01, epochs=100, batchSize=20, validation=None, test=None):
    
    print "... preparing data"
    
    assert len(train) == 2
    X_train, Y_train = train
    
    print "... building the model"
    
    index = T.iscalar()
    x = T.matrix('x')
    y = T.ivector('y')
    rng = np.random.RandomState(0)
    classifier = MLP(inpt=x, sizes=sizes)
    cost = classifier.nll(y)
    gparams = [T.grad(cost, param) for param in classifier.params]
    updates = [(param, param-lr*gparam) for param,gparam in zip(classifier.params,gparams)]
    train_model = function(inputs=[index], outputs=cost, updates=updates,
                           givens = {x: X_train[index*batchSize: (index+1)*batchSize],
                                     y: Y_train[index*batchSize: (index+1)*batchSize]})
    nTrainBatches = X_train.get_value(borrow=True).shape[0] / batchSize
    if validation is not None: 
        assert len(validation) == 2
        X_dev, Y_dev = validation
        dev_model = function(inputs=[index], outputs=classifier.errors(y),
                             givens = {x: X_dev[index*batchSize: (index+1)*batchSize],
                                       y: Y_dev[index*batchSize: (index+1)*batchSize]})
        nDevBatches = X_dev.get_value(borrow=True).shape[0] / batchSize
    if test is not None: 
        assert len(test) == 2
        X_test, Y_test = test
        test_model = function(inputs=[index], outputs=classifier.errors(y),
                              givens = {x: X_test[index*batchSize: (index+1)*batchSize],
                                        y: Y_test[index*batchSize: (index+1)*batchSize]})   
        nTestBatches = X_test.get_value(borrow=True).shape[0] / batchSize
    
    print "... training the model"
    
    patience = 10000
    patienceIncrease = 2
    improvmentThreshold = .995
    validationFrequency = min(nTrainBatches, patience/2)
    bestValidationLoss = np.inf
    bestIter = 0
    bestScore = 0.
    
    epoch = 0
    doneLooping = False
    
    while (epoch < epochs) and (not doneLooping):
        epoch += 1
        for batchIndex in range(nTrainBatches):
            avgBatchCost = train_model(batchIndex)
            iter = (epoch-1)*nTrainBatches + batchIndex
            if validation is not None and (iter+1) % validationFrequency == 0:
                validationLosses = [dev_model(i) for i in range(nDevBatches)]
                thisValidationLoss = np.mean(validationLosses)
                print "Epoch %i, Batch %i/%i, Validation Error %f%%" % (epoch,batchIndex+1,
                                                                         nTrainBatches,thisValidationLoss*100.)
                if test is not None and thisValidationLoss < bestValidationLoss:
                    if thisValidationLoss < bestValidationLoss*improvmentThreshold:
                        patience = max(patience, iter*patienceIncrease)
                        bestValidationLoss = thisValidationLoss
                        bestIter = iter
                        testLosses = [test_model(i) for i in range(nTestBatches)]
                        testScore = np.mean(testLosses)
                        print "Epoch %i, Batch %i/%i, Test Error of Best: %f%%" % (epoch,batchIndex+1,
                                                                                    nTrainBatches,testScore*100.)
                        #  with open('best_model.pkl','wb') as f:
                        #      pkl.dump(classifier, f)
                if patience <= iter:
                    doneLooping = True
                    break
    if validation is not None: #"Optimization Done, Best Validation Score: %f%%" %
        print "Optimization Done, Best Validation Score: {}%".format(bestValidationLoss*100)
    if test is not None:
        print "Best Test performance achieved at iter %i: %f%%" % (bestIter+1,testScore*100.)  
    return bestValidationLoss

In [16]:
def shared_dataset(data, borrow=True):
    X, Y = data
    sharedX = theano.shared(np.asarray(X,dtype=theano.config.floatX), borrow=borrow)
    sharedY = theano.shared(np.asarray(Y,dtype=theano.config.floatX), borrow=borrow)
    return sharedX, T.cast(sharedY, 'int32') 
def load_mnist():
    import os
    os.chdir("/Users/jacobsw/Desktop/IMPLEMENTATION_CAMP/CODE/BASIC_TOPICS/ML_GENERAL/PYTHON_IMPL/DATA/")
    with gzip.open('mnist.pkl.gz') as f:
        data_train, data_dev, data_test = pickle.load(f)
    X_train, Y_train = shared_dataset(data_train)
    X_dev, Y_dev = shared_dataset(data_dev)
    X_test, Y_test = shared_dataset(data_test)
    
    return [(X_train, Y_train), (X_dev, Y_dev), (X_test, Y_test)]

In [17]:
train, dev, test = load_mnist()

In [47]:
%%time
t = sgd(train, [784, 30, 10], validation=dev, test=test)

... preparing data
... building the model
... training the model
Epoch 1, Batch 2500/2500, Validation Error 11.470000%
Epoch 1, Batch 2500/2500, Test Error of Best: 11.570000%
Epoch 2, Batch 2500/2500, Validation Error 9.120000%
Epoch 2, Batch 2500/2500, Test Error of Best: 9.170000%
Epoch 3, Batch 2500/2500, Validation Error 8.210000%
Epoch 3, Batch 2500/2500, Test Error of Best: 8.380000%
Epoch 4, Batch 2500/2500, Validation Error 7.450000%
Epoch 4, Batch 2500/2500, Test Error of Best: 7.520000%
Epoch 5, Batch 2500/2500, Validation Error 6.930000%
Epoch 5, Batch 2500/2500, Test Error of Best: 7.080000%
Epoch 6, Batch 2500/2500, Validation Error 6.520000%
Epoch 6, Batch 2500/2500, Test Error of Best: 6.690000%
Epoch 7, Batch 2500/2500, Validation Error 6.200000%
Epoch 7, Batch 2500/2500, Test Error of Best: 6.270000%
Epoch 8, Batch 2500/2500, Validation Error 5.800000%
Epoch 8, Batch 2500/2500, Test Error of Best: 5.950000%
Epoch 9, Batch 2500/2500, Validation Error 5.530000%
Epoch 9,