In [1]:
import theano
from theano import tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
import numpy as np
import sys
import os
sys.path.append("../lib")
from load import getValData
from load import faces
import pickle
from theano.tensor.nnet.conv import conv2d
from theano.tensor.signal.downsample import max_pool_2d
from six.moves import cPickle
from numpy import genfromtxt
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.cm as cm

Using gpu device 0: GeForce GTX 980


In [None]:
trX, teX, trY, teY = faces(onehot=True)
trX = trX.reshape(-1, 1, 48, 48)
teX = teX.reshape(-1, 1, 48, 48)
ValX = genfromtxt('../CKPlus/CKfaces.csv', delimiter=',')
ValX = ValX.reshape(-1, 1, 48, 48)
ValY = genfromtxt('../CKPlus/labels.csv', delimiter=',')

In [None]:
srng = RandomStreams()

def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape):
    W = theano.shared(floatX(np.random.randn(*shape) * 0.01))
    return W

def init_bias(shape):
    b_values = np.zeros((shape[0],), dtype=theano.config.floatX)
    b = theano.shared(value=b_values, borrow=True )
    return b

def rectify(X):
    return T.maximum(X, 0.)

def softmax(X):
    e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
    return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')

def dropout(X, p=0.):
    if p > 0:
        retain_prob = 1 - p
        X = X *  srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
        X = (X/retain_prob)
    return X

def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates

def negative_log_likelihood(p_y_given_x,y):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
                \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
            \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label

        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """
        # start-snippet-2
        # y.shape[0] is (symbolically) the number of rows in y, i.e.,
        # number of examples (call it n) in the minibatch
        # T.arange(y.shape[0]) is a symbolic vector which will contain
        # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
        # Log-Probabilities (call it LP) with one row per example and
        # one column per class LP[T.arange(y.shape[0]),y] is a vector
        # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
        # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
        # the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.
        return -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]), y])


In [None]:
def model1(X, w, b , w2, b2 ,  w3, b3, w4, b4, w5 , b5, w6 , b6 , w_o, b_o, p_drop_conv, p_drop_hidden):
    l1a = conv2d(X, w)
    l1a = rectify(l1a + b.dimshuffle('x', 0, 'x', 'x'))
    l1 = max_pool_2d(l1a, (2, 2))
    l1 = dropout(l1, p_drop_conv)

    l2a = conv2d(l1, w2)
    l2a = rectify(l2a + b2.dimshuffle('x', 0, 'x', 'x'))
    #l2 = max_pool_2d(l2a, (2, 2))
    l2 = dropout(l2a, p_drop_conv)
    
    l3a = conv2d(l2, w3)
    l3a = rectify(l3a + b3.dimshuffle('x', 0, 'x', 'x'))
    l3 = max_pool_2d(l3a, (2, 2))
    l3 = dropout(l3, p_drop_conv)

    l4a = conv2d(l3, w4)
    l4b = rectify(l4a + b4.dimshuffle('x', 0, 'x', 'x'))
    l4 = T.flatten(l4b, outdim=2)
    l4 = dropout(l4, p_drop_conv)

    l5 = rectify(T.dot(l4, w5) + b5.dimshuffle('x', 0 ))
    l5 = dropout(l5, p_drop_hidden)
    
    l6 = rectify(T.dot(l5, w6) + b6.dimshuffle('x', 0))
    l6 = dropout(l6, p_drop_hidden)

    # Add numerically stable softmax
    pyx = T.nnet.softmax(T.dot(l6, w_o) + b_o.dimshuffle('x', 0))
    #opVec = T.dot(l6, w_o) + b_o.dimshuffle('x', 0)
    #xdev = opVec-opVec.max(1,keepdims=True)
    #pyx = xdev - T.log(T.sum(T.exp(xdev),axis=1,keepdims=True))
    return l1, l2, l3, l4, l5, l6, pyx

In [None]:
def model2(X, w, b , w2, b2 ,w3, b3 , w4 , b4, w5, b5 , w_o, b_o, p_drop_conv, p_drop_hidden):
    l1a = conv2d(X, w)
    l1a = rectify(l1a + b.dimshuffle('x', 0, 'x', 'x'))
    l1 = max_pool_2d(l1a, (2, 2))
    l1 = dropout(l1, p_drop_conv)

    l2a = conv2d(l1, w2)
    l2a = rectify(l2a + b2.dimshuffle('x', 0, 'x', 'x'))
    #l2 = max_pool_2d(l2a, (2, 2))
    l2 = dropout(l2a, p_drop_conv)
    
    l3a = conv2d(l2, w3)
    l3a = rectify(l3a + b3.dimshuffle('x', 0, 'x', 'x'))
    l3 = max_pool_2d(l3a, (2, 2))
    l3 = dropout(l3, p_drop_conv)

    l4_input = T.flatten(l3, outdim=2)
    l4 = rectify(T.dot(l4_input, w4) + b4.dimshuffle('x', 0 ))
    l4 = dropout(l4, p_drop_hidden)
    
    l5 = rectify(T.dot(l4, w5) + b5.dimshuffle('x', 0 ))

    l6a = T.dot(l5, w_o) + b_o.dimshuffle('x', 0)
    ydev = l6a-l6a.max(1,keepdims=True)
    pyx = ydev - T.log(T.sum(T.exp(ydev),axis=1,keepdims=True))
    return l1, l2, l3, l4, l5, pyx

In [None]:
#Predict Loop
model1Params = open("ModelsV1/ModelSnapshot1000.pkl")
Params1 = cPickle.load(model1Params)

model2Params = open("Models5LayersV2/ModelSnapshot1999.pkl")
Params2 = cPickle.load(model2Params)



In [None]:
X = T.ftensor4()
l11, l12, l13, l14, l15,l16, py_x1 = model1(X, Params1[0], Params1[1], Params1[2], Params1[3], Params1[4], Params1[5], Params1[6], Params1[7], Params1[8], Params1[9], Params1[10], Params1[11], Params1[12], Params1[13], 0., 0.)
l21, l22, l23, l24, l25, py_x2 = model2(X, Params2[0], Params2[1], Params2[2], Params2[3], Params2[4], Params2[5], Params2[6], Params2[7], Params2[8], Params2[9], Params2[10], Params2[11], 0., 0.)

In [None]:
op1dist = theano.function(inputs=[X], outputs=py_x1, allow_input_downcast=True)

op2dist = theano.function(inputs=[X], outputs=py_x2, allow_input_downcast=True)

In [None]:
# pyTe1 = op1dist(teX)
# pyTe2 = op2dist(teX)

In [None]:
# error = np.mean(np.argmax(teY, axis=1) == np.argmax(pyTe2, axis=1))
# print error

# error = np.mean(np.argmax(teY, axis=1) == np.argmax(pyTe1, axis=1))
# print error               

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error

In [None]:
pyTr1 = op1dist(trX)
pyTr2 = op2dist(trX)
compound_result = np.concatenate((pyTr1,pyTr2 ), axis=1)
print compound_result.shape