Import all the required headers

Helper functions

In [1]:
import theano
from theano import tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
import numpy as np
from theano.tensor.nnet.conv import conv2d
from theano.tensor.signal.downsample import max_pool_2d
import pickle
import os
import sys
sys.path.append("../lib")
from load import faces

Using gpu device 0: GeForce GTX 980


In [2]:

srng = RandomStreams()

currentDir =  os.getcwd();
parampickle = currentDir + "/parameters.pickle"
costPickle = currentDir + "/costPickle.pickle"
errorPickle = currentDir + "/errorPickle.pickle"
logFile = currentDir + "/logFile.log"

def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01) , borrow=True )

def init_biases(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01) , borrow=True )

def rectify(X):
    return T.maximum(X, 0.)

def softmax(X):
    e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
    return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')

def dropout(X, p=0.):
    if p > 0:
        retain_prob = 1 - p
        X = X * srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
        X = (X/retain_prob)
    return X

def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates


Model

In [3]:
def model(X, w1, w2, w3, w4, w5, b5 , w6 ,b6 , w_o, p_drop_conv, p_drop_hidden):
    
    l1 = rectify(conv2d(X, w1, border_mode='full'))

    l2a = rectify(conv2d(l1, w2))
    l2 = max_pool_2d(l2a, (2, 2))
    l2 = dropout(l2, p_drop_conv)
    
    l3a = rectify(conv2d(l2, w3))
    l3 = max_pool_2d(l3a, (2, 2))
    
    l4a = rectify(conv2d(l3,w4))
    l4b = max_pool_2d(l4a, (2, 2))
    l4c = T.flatten(l4b, outdim=2)
    l4 = dropout(l4c, p_drop_conv)

    l5a = T.dot(l4, w5) + b5
    l5 = T.nnet.sigmoid(l5a)
    
    l6a = T.dot(l5, w6) + b6
    l6 = T.nnet.sigmoid(l6a)

    pyx = softmax(T.dot(l6, w_o))
    
    return l1, l2, l3, l4, l5, l6, pyx


Load Datasets into memory

In [4]:
trX, teX, trY, teY = faces()
trX = trX.reshape(-1, 1, 48, 48)
teX = teX.reshape(-1, 1, 48, 48)


Do the Network thing

In [5]:
#initialize All the parameters
X = T.ftensor4()
Y = T.fmatrix()
w1 = init_weights((32, 1, 11, 11))

w2 = init_weights((64, 32, 7, 7))

w3 = init_weights((128, 64, 5, 5))

w4 = init_weights((128, 128, 3, 3))

w5 = init_weights((128 * 5 * 5 , 100))
b5 = init_biases((32,100))

w6 = init_weights((100 , 100))
b6 = init_biases((32,100))

w_o = init_weights((100, 7))

In [6]:

#Train Loop
noise_l1, noise_l2, noise_l3, noise_l4, noise_l5, noise_l6, noise_py_x = model(X, w1, w2, w3, w4, w5, b5, w6, b6, w_o, 0.2, 0.5)
cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
params = [w1, w2, w3, w4, w5 ,b5 ,w6 , b6, w_o]
updates = RMSprop(cost, params, lr=0.009)
train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)



In [None]:
#Predict Loop
l1, l2, l3, l4, l5, l6 , py_x = model(X, w1, w2, w3, w4, w5, b5, w6, b6, w_o, 0., 0.)
y_x = T.argmax(py_x, axis=1)
predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)

Train the network

In [None]:
for i in range(1000):
    for start, end in zip(range(0 ,len(trX) ,32), range(32 ,len(trX) ,32)):
        cost = train(trX[start:end], trY[start:end])
    logline = "Interation number: " + str(i) + ", Cost value : " + str(cost)
    
    print "Interation number: " + str(i)
    print "Cost value : " + str(cost)
    
    f = open(costPickle, 'a+')
    f2 = open(logFile, 'a+')
    
    pickle.dump(logline, f)
    f2.write(logline)
    
    f.close()
    f2.close()
    
    if i%100 == 0 :
        for start, end in zip(range(0, len(teX),32), range(32, len(teX),32)):
            error = np.mean(np.argmax(teY[start:end], axis=1) == predict(teX[start:end]))
            logline = "Error = " + str(error)
    
            print error
    
            f = open(errorPickle, 'a+')
            f2 = open(logFile, 'a+')
    
            pickle.dump(logline, f)
            f2.write(logline)
    
            f.close()
            f2.close()

Interation number: 0
Cost value : 1.75372064114
0.15625
0.28125
0.1875
0.34375
0.1875
0.3125
0.15625
0.3125
0.1875
0.3125
0.28125
0.28125
0.21875
0.15625
0.15625
0.21875
0.25
0.09375
0.1875
0.28125
0.3125
0.1875
0.34375
0.40625
0.1875
0.125
0.0625
0.15625
0.15625
0.28125
0.25
0.1875
0.28125
0.34375
0.28125
0.28125
0.125
0.28125
0.25
0.34375
0.28125
0.1875
0.15625
0.4375
0.1875
0.34375
0.21875
0.1875
0.3125
0.25
0.21875
0.28125
0.1875
0.3125
0.15625
0.25
0.125
0.15625
0.25
0.15625
0.1875
0.46875
0.25
0.25
0.21875
0.375
0.46875
0.375
0.40625
0.1875
0.3125
0.375
0.34375
0.34375
0.25
0.375
0.34375
0.1875
0.1875
0.3125
0.3125
0.28125
0.3125
0.25
0.21875
0.25
0.09375
0.125
0.21875
0.21875
0.1875
0.3125
0.21875
0.21875
0.34375
0.34375
0.25
0.125
0.28125
0.1875
0.1875
0.28125
0.15625
0.21875
0.40625
0.28125
0.125
0.25
0.34375
0.125
0.28125
0.25

Save Parameters

In [None]:
f3 = open(parampickle,'a+')
pickle.dump(params, f3)