In [None]:
import theano
from theano import tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
import numpy as np
import sys
import os
sys.path.append("../lib")
from load import mnist
from load import faces
import pickle
from theano.tensor.nnet.conv import conv2d
from theano.tensor.signal.downsample import max_pool_2d
import theano.misc.pkl_utils.dump as dump


In [2]:
currentDir =  os.getcwd();
parampickle = currentDir + "/parametersConvonet.pickle"
logPickle = currentDir + "/logPickleDBN.pickle"

In [3]:
srng = RandomStreams()

def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

def rectify(X):
    return T.maximum(X, 0.)

def softmax(X):
    e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
    return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')

def dropout(X, p=0.):
    if p > 0:
        retain_prob = 1 - p
        X = X *  srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
        X = (X/retain_prob)
    return X

def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates


In [4]:
def model(X, w, w2, w3, w4, w5 , w6 ,w_o, p_drop_conv, p_drop_hidden):
    l1a = rectify(conv2d(X, w, border_mode='full'))
    l1 = max_pool_2d(l1a, (2, 2))
    l1 = dropout(l1, p_drop_conv)

    l2a = rectify(conv2d(l1, w2))
    l2 = max_pool_2d(l2a, (2, 2))
    l2 = dropout(l2, p_drop_conv)
    
    l3a = rectify(conv2d(l2, w3))
    l3 = max_pool_2d(l3a, (2, 2))
    l3 = dropout(l3, p_drop_conv)

    l4a = rectify(conv2d(l3, w4))
    l4 = T.flatten(l4a, outdim=2)
    l4 = dropout(l4, p_drop_conv)

    l5 = rectify(T.dot(l4, w5))
    l5 = dropout(l5, p_drop_hidden)
    
    l6 = rectify(T.dot(l5, w6))
    l6 = dropout(l6, p_drop_hidden)

    pyx = softmax(T.dot(l6, w_o))
    return l1, l2, l3, l4, l5, l6, pyx


In [5]:
#trX, teX, trY, teY = mnist(onehot=True)
trX, teX, trY, teY = faces(onehot=True)

trX = trX.reshape(-1, 1, 48, 48)
teX = teX.reshape(-1, 1, 48, 48)

#trX = numpy.asarray(trX, dtype=theano.config.floatX)
#teX = teX.reshape(-1, 1, 28, 28)


In [40]:

X = T.ftensor4()
Y = T.fmatrix()

w = init_weights((32, 1, 7, 7))
w2 = init_weights((64, 32, 5, 5))
w3 = init_weights((128, 64, 3, 3))
w4 = init_weights((256, 128, 3, 3))
w5 = init_weights((256 * 3 * 3, 10000)) 
w6 = init_weights((10000, 1000)) 
w_o = init_weights((1000, 7))

#Train Loop
noise_l1, noise_l2, noise_l3, noise_l4, noise_l5 , noise_l6, noise_py_x = model(X, w, w2, w3, w4, w5, w6, w_o, 0.2, 0.5)
cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
params = [w, w2, w3, w4, w5, w6, w_o]
updates = RMSprop(cost, params, lr=0.0009)
train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)

#Predict Loop
l1, l2, l3, l4, l5,l6, py_x = model(X, w, w2, w3, w4, w5, w6, w_o, 0., 0.)
y_x = T.argmax(py_x, axis=1)
predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)



In [41]:
for i in range(500):
    for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
        cost = train(trX[start:end], trY[start:end])
    error = np.mean(np.argmax(teY, axis=1) == predict(teX))
    logline = "Epoch: " + str(i) + "  Error: " + str(error)
    print logline
    f = open(logPickle, 'a+')
    pickle.dump(logline , f);
    f.close()

Epoch: 0  Error: 0.249651713569
Epoch: 1  Error: 0.249651713569
Epoch: 2  Error: 0.332961827807
Epoch: 3  Error: 0.398161047646
Epoch: 4  Error: 0.41961549178
Epoch: 5  Error: 0.441905823349
Epoch: 6  Error: 0.449150181109
Epoch: 7  Error: 0.491780440234
Epoch: 8  Error: 0.485929228197
Epoch: 9  Error: 0.491780440234
Epoch: 10  Error: 0.502089718585
Epoch: 11  Error: 0.515463917526
Epoch: 12  Error: 0.530509891335
Epoch: 13  Error: 0.53747561995
Epoch: 14  Error: 0.53385344107
Epoch: 15  Error: 0.515463917526
Epoch: 16  Error: 0.539426023962
Epoch: 17  Error: 0.545834494288
Epoch: 18  Error: 0.543605461131
Epoch: 19  Error: 0.545834494288
Epoch: 20  Error: 0.542212315408
Epoch: 21  Error: 0.541655057119
Epoch: 22  Error: 0.549178044023
Epoch: 23  Error: 0.555307885205
Epoch: 24  Error: 0.537754249094
Epoch: 25  Error: 0.563388130398
Epoch: 26  Error: 0.545834494288
Epoch: 27  Error: 0.54834215659
Epoch: 28  Error: 0.549178044023
Epoch: 29  Error: 0.549178044023
Epoch: 30  Error: 0.5656

In [12]:
confusion = (np.argmax(teY, axis=1) == predict(teX))

In [11]:
print confusion

[ True  True False ...,  True False False]


In [13]:
predicted = predict(teX)
print predicted.shape

(3589,)


In [15]:
real = np.argmax(teY, axis=1)

In [16]:
confusion = np.zeros((7,7))

In [21]:
for i in range(0 , len(real)):
    j = real[i]
    k = predicted[i]
    #print "Real:  %d , predicted %d"%(j,k)
    confusion[j][k] = confusion[j][k] + 1

In [22]:
print confusion

[[ 210.    4.   51.   47.   92.   11.   52.]
 [   9.   24.    5.    4.    9.    1.    4.]
 [  45.    2.  153.   47.  139.   48.   62.]
 [  39.    0.   22.  716.   44.   19.   55.]
 [ 105.    6.   41.   79.  313.   15.   94.]
 [  28.    0.   27.   25.   21.  295.   19.]
 [  72.    0.   28.   85.  123.   12.  287.]]


0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral

In [35]:
for j in range(0,7):
    #print (confusion[j][j]/sum(confusion[j][:]))
    print (sum(confusion[j][:]) - confusion[j][j])/sum(confusion[j][:])

0.550321199143
0.571428571429
0.691532258065
0.2
0.52067381317
0.289156626506
0.527182866557


In [33]:
np.histogram(real , bins=[0,1, 2, 3 , 4 , 5 , 6 , 7])

(array([467,  56, 496, 895, 653, 415, 607]), array([0, 1, 2, 3, 4, 5, 6, 7]))

In [23]:
logline = " =======Saving Network params==== "
print logline
f = open(logPickle, 'a+')
f2 = open(parampickle , 'a+')
pickle.dump(logline , f);
pickle.dump(params , f2)
f2.close()
f.close()



In [39]:
f2 = open(parampickle , 'a+')
for p in params:
    pickle.dump(p.get_value(),f2)
    
f2.close()