# Model Functions

### A. Softmax + Tanh/Sigmoid

* Single-Hidden Architecture: 
$ \hat{y} = argmax( softmax(w_2\cdot tahn/sigmoid(w_1\cdot x + b_1) + b_2 ) ) $

In [None]:
# E.G. MNIST (784-30-10) ARCHITECTURE
#  argmax(  softmax( w_2  *  tanh/sigmoid( w_1  *  x  +  b_1  )  + b_2 )   )
#     |               |                     |      |      |         |      |   
#     |             30*10                784*30  784*1   30*1     10*1     |
#     |               |                     |______|______|         |      |
#     |               |                            |                |      |
#     |               |                          30*1               |      | 
#     |               |____________________________|________________|      |
#     |                                            |                       |
#     |                                          10*1                      |
#     |____________________________________________|_______________________|
#                                                  |
#                                                  1 (i.e. Pr(y_hat=i|x))

### B. Regularization (L1 & L2, Recap)

* $ L = L + \lambda\parallel\theta\parallel_p^p $, where $\parallel\theta\parallel_p = (\sum_{j=0}^{|\theta|} |\theta_j|^p)^\frac{1}{p}$
* L1/2 Regularization: $p = 1$/$p=2$

# Model Code

In [41]:
import six.moves.cPickle as pickle
import gzip, os, sys, timeit
os.chdir("/Users/jacobsw/Desktop/IMPLEMENTATION_CAMP/CODE/BASIC_TOPICS/ML_GENERAL/PYTHON_IMPL")
# sys.path.append("/Users/jacobsw/Desktop/IMPLEMENTATION_CAMP/CODE/BASIC_TOPICS/ML_GENERAL/PYTHON_IMPL/ACCESSORIES")
# sys.path.append("/Users/jacobsw/Desktop/IMPLEMENTATION_CAMP/CODE/BASIC_TOPICS/ML_GENERAL/PYTHON_IMPL/DATA")
    # run once is enough.

In [42]:
import numpy as np

In [43]:
import theano
import theano.tensor as T
from theano.tensor import tanh
from theano import function, shared
from theano.tensor.nnet import softmax

In [53]:
from accessories import OutputLayer, shared_dataset, load_mnist

In [45]:
class HiddenLayer:
    
    def __init__(self, rng, inpt, nIn, nOut, activation=tanh):
        # rng: np.random.RandomState, a rand. generator.
        # inpt: symbolic tensor, shape=(nDataSize, nIn) (e.g. mnist: 50,000 * 784).
        # nIn, nOut: dimension of input; number of hidden neurons.
        self.inpt = inpt
        self.W = shared(np.asarray(rng.uniform(low=-np.sqrt(6./(nIn+nOut)), 
                                               high=np.sqrt(6./(nIn+nOut)), 
                                               size=(nIn,nOut)),
                                   dtype=theano.config.floatX),
                        name='W', borrow=True)
        self.b = shared(np.zeros((nOut,), dtype=theano.config.floatX),
                        name='b', borrow=True)
        self.output = activation(T.dot(inpt, self.W) + self.b)
        self.params = [self.W, self.b]


In [46]:
class MLP:
    def __init__(self, rng, inpt, nIn, nHidden, nOut):
        self.hiddenLayer = HiddenLayer(rng=rng, inpt=inpt, nIn=nIn, nOut=nHidden, activation=tanh)
        self.logregLayer = OutputLayer(inpt=self.hiddenLayer.output, nIn=nHidden, nOut=nOut)
        self.L1 = ( abs(self.hiddenLayer.W).sum() + abs(self.logregLayer.W).sum() )
        self.L2 = ( (self.hiddenLayer.W**2).sum() + (self.logregLayer.W**2).sum() )
        self.nll = self.logregLayer.nll 
        self.errors = self.logregLayer.errors
        self.params = self.hiddenLayer.params + self.logregLayer.params
        self.inpt = inpt
        

In [47]:
def test_mlp(lr=.01, L1=.0, L2=.0001, epochs=1000, data=load_mnist, batchSize=20, nHidden=500):
    
    datasets = load_mnist()
    X_train, Y_train = datasets[0]
    X_dev, Y_dev = datasets[1]
    X_test, Y_test = datasets[2]
    
    nTrainBatches = X_train.get_value(borrow=True).shape[0] / batchSize # // if Python 3.
    nDevBatches = X_dev.get_value(borrow=True).shape[0] / batchSize
    nTestBatches = X_test.get_value(borrow=True).shape[0] / batchSize
    
    print "... building the model"
    
    index = T.iscalar() # index of a batch.
    x = T.matrix('x')
    y = T.ivector('y')
    
    rng = np.random.RandomState(1234)
    
    classifier = MLP(rng=rng, inpt=x, nIn=28*28, nHidden=nHidden, nOut=10)
    cost = ( classifier.nll(y) + L1*classifier.L1 + L2*classifier.L2 )
    
    test_model = function(inputs=[index], outputs=classifier.errors(y),
                          givens = {x: X_test[index*batchSize: (index+1)*batchSize],
                                    y: Y_test[index*batchSize: (index+1)*batchSize]})
    dev_model = function(inputs=[index], outputs=classifier.errors(y),
                         givens = {x: X_dev[index*batchSize: (index+1)*batchSize],
                                   y: Y_dev[index*batchSize: (index+1)*batchSize]})
    
    gparams = [T.grad(cost, param) for param in classifier.params]
    updates = [(param, param-lr*gparam) for param,gparam in zip(classifier.params, gparams)]
    
    train_model = function(inputs=[index], outputs=cost, updates=updates, 
                           givens = {x: X_train[index*batchSize: (index+1)*batchSize],
                                     y: Y_train[index*batchSize: (index+1)*batchSize]})
    
    print "... training the model"
    
    patience = 10000
    patienceIncrease = 2
    improvementThreshold = .995
    validationFrequency = min(nTrainBatches, patience/2)
    bestValidationLoss = np.inf
    bestIter = 0
    testScore = 0.
    startTime = timeit.default_timer()
    
    epoch = 0
    doneLooping = False
    
    while (epoch < epochs) and (not doneLooping):
        epoch += 1
        for batchIndex in range(nTrainBatches):
            avgBatchCost = train_model(batchIndex)
            iter = (epoch-1)*nTrainBatches + batchIndex
            if (iter+1) % validationFrequency == 0:
                validationLosses = [dev_model(i) for i in range(nDevBatches)]
                thisValidationLoss = np.mean(validationLosses)
                print "Epoch %i, Batch %i/%i, Validation Error %f %%" % (epoch,batchIndex+1,
                                                                         nTrainBatches,thisValidationLoss*100.)
                if thisValidationLoss < bestValidationLoss:
                    if thisValidationLoss < bestValidationLoss*improvementThreshold:
                        patience = max(patience, iter*patienceIncrease)
                        bestValidationLoss = thisValidationLoss
                        bestIter = iter
                        testLosses = [test_model(i) for i in range(nTestBatches)]
                        testScore = np.mean(testLosses)
                        print "Epoch %i, Batch %i/%i, Test Error of Best: %f %%" % (epoch,batchIndex,
                                                                                    nTrainBatches,testScore*100.)
                if patience <= iter:
                    doneLooping = True
                    break
    endTime = timeit.default_timer()
    print "Optimization Done, Best Validation Score: %f %% (at iter %i, best test performance %f %%)" % (bestValidationLoss*100,
                                                                                                         bestIter+1,
                                                                                                         testScore*100.)
    print "Run Time: %f" % (endTime-startTime)

In [52]:
# test_mlp() 
#  ...
#  Epoch 991, Batch 2500/2500, Validation Error 1.700000 %
#  Epoch 992, Batch 2500/2500, Validation Error 1.700000 %
#  Epoch 993, Batch 2500/2500, Validation Error 1.700000 %
#  Epoch 994, Batch 2500/2500, Validation Error 1.700000 %
#  Epoch 995, Batch 2500/2500, Validation Error 1.700000 %
#  Epoch 996, Batch 2500/2500, Validation Error 1.700000 %
#  Epoch 997, Batch 2500/2500, Validation Error 1.700000 %
#  Epoch 998, Batch 2500/2500, Validation Error 1.700000 %
#  Epoch 999, Batch 2500/2500, Validation Error 1.700000 %
#  Epoch 1000, Batch 2500/2500, Validation Error 1.700000 %
#  Optimization Done, Best Validation Score: 1.680000 % (at iter 2050000, best test performance 1.650000 %)
#  Run Time: 4635.659076