In [1]:
import theano
from theano import tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
import numpy as np
from load import mnist

Using gpu device 0: GeForce GT 750M (CNMeM is disabled)


In [2]:
srng = RandomStreams()

def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

def rectify(X):
    return T.maximum(X, 0.)

def softmax(X):
    e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
    return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')

def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates

def dropout(X, p=0.):
    if p > 0:
        retain_prob = 1 - p
        X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
        X /= retain_prob
    return X

In [3]:
def model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden):
    X = dropout(X, p_drop_input)
    h = rectify(T.dot(X, w_h))

    h = dropout(h, p_drop_hidden)
    h2 = rectify(T.dot(h, w_h2))

    h2 = dropout(h2, p_drop_hidden)
    py_x = softmax(T.dot(h2, w_o))
    return h, h2, py_x

In [4]:
trX, teX, trY, teY = mnist(onehot=True)

X = T.fmatrix()
Y = T.fmatrix()

w_h = init_weights((784, 625))
w_h2 = init_weights((625, 625))
w_o = init_weights((625, 10))

noise_h, noise_h2, noise_py_x = model(X, w_h, w_h2, w_o, 0.2, 0.5)
h, h2, py_x = model(X, w_h, w_h2, w_o, 0., 0.)
y_x = T.argmax(py_x, axis=1)

cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
params = [w_h, w_h2, w_o]
updates = RMSprop(cost, params, lr=0.001)

train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)


In [5]:
for i in range(100):
    for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
        cost = train(trX[start:end], trY[start:end])
    print np.mean(np.argmax(teY, axis=1) == predict(teX))

0.9379
0.9646
0.9735
0.9762
0.9763
0.9783
0.9791
0.9809
0.9818
0.9826
0.9824
0.982
0.9829
0.9833
0.9818
0.9825
0.9838
0.9845
0.9852
0.9856
0.9839
0.9849
0.9845
0.9846
0.9855
0.9853
0.9857
0.9861
0.9851
0.9863
0.9877
0.9862
0.9868
0.9868
0.9856
0.987
0.9856
0.9854
0.9865
0.9862
0.9853
0.9864
0.9865
0.9866
0.9866
0.9866
0.9869
0.9864
0.9863
0.9873
0.9866
0.9872
0.9873
0.9872
0.9876
0.987
0.9873
0.9873
0.9869
0.9881
0.9866
0.9878
0.9885
0.987
0.987
0.9871
0.9871
0.9877
0.9871
0.9874
0.9883
0.9883
0.9885
0.9873
0.9873
0.9879
0.9882
0.9873
0.9875
0.9875
0.9876
0.9881
0.9886
0.9875
0.9876
0.988
0.9885
0.9868
0.9878
0.9878
0.9884
0.9885
0.9886
0.988
0.9869
0.9882
0.9872
0.9883
0.9873
0.9859
