In [2]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
from fuel.datasets.cifar10 import CIFAR10
from fuel.transformers import ScaleAndShift, Cast, Flatten, Mapping
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme, ShuffledScheme

train_batch_size = 100
validation_batch_size = 250


CIFAR10.default_transformers = (
    (ScaleAndShift, [2.0 / 255.0, -1], {'which_sources': 'features'}),
    (Cast, [np.float32], {'which_sources': 'features'}), 
    #(Flatten, [], {'which_sources': 'features'}),
    #(Flatten, [], {'which_sources': 'targets'}),
    #(Mapping, [lambda batch: (b.T for b in batch)], {}) 
    )

cifar_train = CIFAR10(("train",), subset=slice(None,45000))
cifar_train_stream = DataStream.default_stream(
    cifar_train,
    iteration_scheme=ShuffledScheme(cifar_train.num_examples, train_batch_size))

cifar_validation = CIFAR10(("train",), subset=slice(45000, None))
cifar_validation_stream = DataStream.default_stream(
    cifar_validation, iteration_scheme=SequentialScheme(cifar_validation.num_examples, validation_batch_size))

cifar_test = CIFAR10(("test",))
cifar_test_stream = DataStream.default_stream(
    cifar_test, iteration_scheme=SequentialScheme(cifar_test.num_examples, validation_batch_size))

print "The streams return batches containing %s" % (cifar_train_stream.sources,)

print "Each trainin batch consits of a tuple containing:"
for element in next(cifar_train_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)
    
print "Validation/test batches consits of tuples containing:"
for element in next(cifar_test_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)

cifar_labels = ["airplane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]

The streams return batches containing (u'features', u'targets')
Each trainin batch consits of a tuple containing:
 - an array of size (100, 3, 32, 32) containing float32
 - an array of size (100, 1) containing uint8
Validation/test batches consits of tuples containing:
 - an array of size (250, 3, 32, 32) containing float32
 - an array of size (250, 1) containing uint8


In [4]:
import lasagne

import theano
import theano.tensor as T

from theano import function, config, shared, sandbox
import theano.tensor as T
import numpy
import time

vlen = 10 * 30 * 768  # 10 x #cores x # threads per core
iters = 1000

rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], T.exp(x))
print(f.maker.fgraph.toposort())
t0 = time.time()
for i in range(iters):
    r = f()
t1 = time.time()
print("Looping %d times took %f seconds" % (iters, t1 - t0))
print("Result is %s" % (r,))
if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
    print('Used the cpu')
else:
    print('Used the gpu')

[GpuElemwise{exp,no_inplace}(<CudaNdarrayType(float32, vector)>), HostFromGpu(GpuElemwise{exp,no_inplace}.0)]
Looping 1000 times took 0.657853 seconds
Result is [ 1.23178029  1.61879349  1.52278066 ...,  2.20771813  2.29967761
  1.62323296]
Used the gpu


In [7]:
# create Theano variables for input and target minibatch
input_var = T.tensor4('X')
target_var = T.ivector('y')

# create a small convolutional neural network
from lasagne.nonlinearities import leaky_rectify, softmax
network = lasagne.layers.InputLayer((None, 3, 32, 32), input_var)
network = lasagne.layers.Conv2DLayer(network, 128, (5, 5),
                                     nonlinearity=leaky_rectify, pad='same')
network = lasagne.layers.Conv2DLayer(network, 128, (5, 5),
                                     nonlinearity=leaky_rectify, pad='same')
network = lasagne.layers.Conv2DLayer(network, 128, (5, 5),
                                     nonlinearity=leaky_rectify, pad='same')
# network = lasagne.layers.Conv2DLayer(network, 128, (5, 5),
#                                      nonlinearity=leaky_rectify, pad='same')
# network = lasagne.layers.Conv2DLayer(network, 128, (5, 5),
#                                      nonlinearity=leaky_rectify, pad='same')
# network = lasagne.layers.Conv2DLayer(network, 128, (5, 5),
#                                      nonlinearity=leaky_rectify, pad='same')
#network = lasagne.layers.Conv2DLayer(network, 32, (5, 5),
#                                     nonlinearity=leaky_rectify)
network = lasagne.layers.Pool2DLayer(network, (2, 2), stride=2, mode='max')
network = lasagne.layers.Conv2DLayer(network, 128, (5, 5),
                                     nonlinearity=leaky_rectify, pad='same')
network = lasagne.layers.Pool2DLayer(network, (2, 2), stride=2, mode='max')
network = lasagne.layers.Conv2DLayer(network, 128, (5, 5),
                                     nonlinearity=leaky_rectify, pad='same')
network = lasagne.layers.Conv2DLayer(network, 64, (5, 5),
                                     nonlinearity=leaky_rectify, pad='same')
network = lasagne.layers.Pool2DLayer(network, (8, 8), stride=8, mode='max')
#network = lasagne.layers.DenseLayer(lasagne.layers.dropout(network, 0.5),
#                                    128, nonlinearity=leaky_rectify,
#                                    W=lasagne.init.Orthogonal())
network = lasagne.layers.DenseLayer(lasagne.layers.dropout(network, 0.5),
                                    10, nonlinearity=softmax)

# create loss function
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean() + 1e-4 * lasagne.regularization.regularize_network_params(
        network, lasagne.regularization.l2)

a0 = np.float32(0.02)
tau = np.float32(cifar_train.num_examples)
learning_rate = theano.shared(np.array(a0, dtype=config.floatX))
t = T.scalar()
anneal_learning_rate = theano.function([t], None, updates=[
        (learning_rate, a0 * (tau / T.max([t, tau])))])

# create parameter update expressions
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=learning_rate,
                                            momentum=0.9)
#updates = lasagne.updates.rmsprop(loss, params, learning_rate=0.1)
# compile training function that updates parameters and returns training loss
train_fn = theano.function([input_var, target_var], loss, updates=updates)


test_prediction = lasagne.layers.get_output(network, deterministic=True)

test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var)
test_loss = loss.mean() + 1e-4 * lasagne.regularization.regularize_network_params(
        network, lasagne.regularization.l2)

test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX)

val_fn = theano.function([input_var, target_var], [test_loss, test_acc])


In [8]:
best_val_err = np.inf
best_params = lasagne.layers.get_all_param_values(network)
best_params_epoch = 0
    
num_epochs = 5
patience_expansion = 1.5


print("Starting training...")
epoch = 0
t = 0
while epoch < num_epochs:
    epoch += 1
    train_err = 0
    train_batches = 0
    start_time = time.time()
    for input_batch, target_batch in cifar_train_stream.get_epoch_iterator():
        train_err += train_fn(input_batch, target_batch.ravel())
        train_batches += 1
        t += train_batch_size
        anneal_learning_rate(t)
        
    # And a full pass over the validation data:
    val_err = 0
    val_acc = 0
    val_batches = 0
    for input_batch, target_batch in cifar_validation_stream.get_epoch_iterator():
        err, acc = val_fn(input_batch, target_batch.ravel())
        val_err += err
        val_acc += acc
        val_batches += 1

    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch, num_epochs, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))

    print("Learning rate: {:.6f}".format(float(learning_rate.get_value())))
    
    # patience expansion
    if val_err < best_val_err:
        planned_num_epochs = int(np.maximum(num_epochs, epoch * patience_expansion + 1))
        if planned_num_epochs > num_epochs:
            print "After epoch %d: increased planned number of epochs to %d" % (epoch, planned_num_epochs)
        num_epochs = planned_num_epochs
        best_val_err = val_err
        best_params = lasagne.layers.get_all_param_values(network)
        best_params_epoch = epoch
        


Starting training...


KeyboardInterrupt: 

In [137]:
# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for input_batch, target_batch in cifar_test_stream.get_epoch_iterator():
    err, acc = val_fn(input_batch, target_batch.ravel())
    test_err += err
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))


lasagne.layers.set_all_param_values(network, best_params)

test_err = 0
test_acc = 0
test_batches = 0
for input_batch, target_batch in cifar_test_stream.get_epoch_iterator():
    err, acc = val_fn(input_batch, target_batch.ravel())
    test_err += err
    test_acc += acc
    test_batches += 1
print("Final results (using best_params from epoch {}:".format(best_params_epoch))
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))


Final results:
  test loss:			2.503963
  test accuracy:		8.34 %


ValueError: mismatch: got 12 values to set 20 parameters