In [19]:
from __future__ import print_function
import time
import numpy as np
import theano
import theano.tensor as T
import lasagne

from cifar10_data import load_cifar10
import lasagne_trainer

In [21]:
# set up plots

%matplotlib inline

import matplotlib.pyplot as plt
import seaborn as sns

# reload external libs during development
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# load data

In [22]:
# get data
X_train, y_train, X_val, y_val, X_test, y_test = load_cifar10()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Train data shape:  (49000, 3, 32, 32)
Train labels shape:  (49000,)
Validation data shape:  (1000, 3, 32, 32)
Validation labels shape:  (1000,)
Test data shape:  (10000, 3, 32, 32)
Test labels shape:  (10000,)


# theano input_var

In [23]:
input_var = T.tensor4('inputs')

## v1: [conv-relu-pool]xN - conv - relu - [affine]xM - [softmax or SVM]
## v2: [conv-relu-pool]XN - [affine]XM - [softmax or SVM]

In [29]:
def create_v1(input_var, input_shape=(3, 32, 32),
              crp_num_filters=32, crp_filter_size=5,
              num_cr=1,
              num_fc=1, fc_num_units=64,
              output_type='softmax', num_classes=10,
              **junk):
    
    # input layer
    network = lasagne.layers.InputLayer(shape=(None,) + input_shape,
                                        input_var=input_var)
    # conv-relu-pool layers
    network = lasagne.layers.Conv2DLayer(
        network, num_filters=crp_num_filters,
        filter_size=(crp_filter_size, crp_filter_size),
        pad='same',
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.GlorotUniform(gain='relu'))
    
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
   
    # conv-relu
#     network = lasagne.layers.Conv2DLayer(
#         network, num_filters=crp_num_filters,
#         filter_size=(crp_filter_size, crp_filter_size),
#         pad='same',
#         nonlinearity=lasagne.nonlinearities.rectify,
#         W=lasagne.init.GlorotUniform(gain='relu'))
    
    # fc-relu
    network = lasagne.layers.DenseLayer(
        lasagne.layers.dropout(network, p=.5),
        num_units=fc_num_units,
        nonlinearity=lasagne.nonlinearities.rectify)
    
    # output layer
    network = lasagne.layers.DenseLayer(
        lasagne.layers.dropout(network, p=.5),
        num_units=num_classes,
        nonlinearity=lasagne.nonlinearities.softmax)
    
    return network


In [30]:
# defaut params
param = dict(num_crp=1, crp_filter_size=7, crp_num_filters=32,
             num_fc=1, fc_num_units=32,
             reg=1e-3,
             learning_rate=1e-4, learning_rate_decay=0.95,
             momentum=0.9, momentum_decay=0.9,
             batch_size=100, num_epochs=20)

In [31]:
param.update(num_crp=3, crp_filter_size=3, crp_num_filters=128,
             num_fc=3, fc_num_units=256, num_epochs=100,
             learning_rate=1e-4, batch_size=128)

In [None]:
network = create_v1(input_var, **param)

In [35]:
model, loss_history, train_acc_history, val_acc_history = lasagne_trainer.train(
    network, input_var, X_train, y_train, X_val, y_val,
    learning_rate=param['learning_rate'], num_epochs=param['num_epochs'],
    batch_size=param['batch_size'])
print('%.3f' % min(loss_history), max(train_acc_history), max(val_acc_history), \
    ' '.join('%s=%s' % (k,param[k]) for k in param))

Compiling...
Training...
epoch 1 / 100 in 31.4s: loss 2.132195, train: 0.386, val 0.353, lr 1.000000e-04 mom 9.000000e-01
epoch 2 / 100 in 31.2s: loss 1.972668, train: 0.416, val 0.385, lr 9.500000e-05 mom 9.050000e-01
epoch 3 / 100 in 31.4s: loss 1.899322, train: 0.433, val 0.402, lr 9.025000e-05 mom 9.097500e-01
epoch 4 / 100 in 31.1s: loss 1.849415, train: 0.445, val 0.413, lr 8.573750e-05 mom 9.142625e-01
epoch 5 / 100 in 31.2s: loss 1.806976, train: 0.459, val 0.417, lr 8.145062e-05 mom 9.185494e-01
epoch 6 / 100 in 31.4s: loss 1.770747, train: 0.472, val 0.431, lr 7.737809e-05 mom 9.226219e-01
epoch 7 / 100 in 31.5s: loss 1.738997, train: 0.482, val 0.440, lr 7.350919e-05 mom 9.264908e-01
epoch 8 / 100 in 31.1s: loss 1.713459, train: 0.502, val 0.439, lr 6.983373e-05 mom 9.301662e-01
epoch 9 / 100 in 31.7s: loss 1.689101, train: 0.507, val 0.445, lr 6.634204e-05 mom 9.336579e-01


KeyboardInterrupt: 

In [None]:
plt.subplot(2, 1, 1)
plt.plot(np.array(loss_history).clip(max=3))
plt.xlabel('iteration')
plt.ylabel('loss')
plt.subplot(2, 1, 2)
plt.plot(train_acc_history)
plt.plot(val_acc_history)
plt.legend(['train', 'val'], loc='upper left')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.show()

In [None]:
import pickle
with open('v1.pickle', 'wb') as f:
    pickle.dump(model, f, -1)

param

In [None]:
y_true = []
y_pred = []

In [None]:
test_prediction = lasagne.layers.get_output(network, deterministic=True)
pred = test_prediction.argmax(-1)
f_predict = theano.function([input_var], pred)

In [None]:
for i in range(len(X_test)):
    y_true.append(int(y_test[i]))
    y_pred.append(f_predict([X_test[i]])[0])

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
CM = confusion_matrix(y_true, y_pred)

In [None]:
import seaborn as sns

fig = plt.figure(figsize=(20, 20))
sns.heatmap(CM, annot=True, fmt="d", linewidths=.5)
plt.show()