In [1]:
import theano
from theano import tensor as T
from theano import shared
import numpy as np
from numpy import random
import pickle
import gzip
import sys

In [2]:
with gzip.open('mnist.pkl.gz', 'rb') as f:
    train_set, valid_set, test_set = pickle.load(f)
    train_x, train_y = train_set
    valid_x, valid_y = valid_set
    test_x, test_y = test_set

In [3]:
# import matplotlib.cm as cm
# import matplotlib.pyplot as plt
# plt.imshow(train_x[3].reshape((28, 28)), cmap = cm.Greys_r)
# plt.show()

In [4]:
# 50 000 instances, 784 features

In [5]:
X = T.fmatrix('X')
y = T.lvector('y')

W = shared(random.randn(train_x.shape[1], 10), name='W')
b = shared(np.zeros(10), name="b")

logit = T.dot(X, W) + b # returns a matrix, with N rows and 10 numbers per instance (per class), b is broadcast

probs = T.nnet.softmax(logit) # rowwise softmax for each instance, we get N x 10 matrix with probs for each digit per instance

# NLL is sum of only the log(prob) for the CORRECT class per instance
# we can select them by indexing the probs matrix
filtered = probs[T.arange(y.shape[0]), y] # broadcasted indexes, the result is a vector of prob. for each instance

# now we take the logs and sum them up (with a minus)
loss = -T.log(filtered).sum()

# compute the gradients
grad_W = T.grad(loss, W)
grad_b = T.grad(loss, b)

predictions = T.argmax(probs, axis=1) # map the softmax probabilities to a digit (0 to 9)
missclass_rate = T.neq(predictions, y).mean()
# now define the train function
# we minimize the LOSS, hence we follow the negative gradient
train = theano.function(inputs = [X, y],
                        # outputs = [missclass_rate],
                        updates= [(W, W - 0.13*grad_W), (b, b - 0.13*grad_b)])
# define the predict function. Note: it only needs the data points, no labels as inputs of course.
error_rate = theano.function(inputs=[X, y], outputs=[missclass_rate])
predict = theano.function(inputs=[X], outputs = [predictions])

In [6]:
BATCH_SIZE = 500
# now let's define the minibatch learning
err = 1
counter = 0
while err > 0.1:
    indices = random.choice(train_x.shape[0], BATCH_SIZE)
    minibatch_x = train_x[indices]
    minibatch_y = train_y[indices]
    train(minibatch_x, minibatch_y)
    counter+=1
    if counter == 5000:
        err = error_rate(valid_x, valid_y)
        counter = 0
        print(err)
        sys.stdout.flush()

[array(0.0941)]


[array(0.0951)]


KeyboardInterrupt: 

In [None]:
error_rate(train_x, train_y)