In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import matplotlib.pyplot as plt
import csv
import os
import sys
import numpy as np
import collections
import time
import scipy
import helpers 

# CNN bits
import theano
import theano.tensor as T
import lasagne

# for evaluation
sys.path.append(os.path.expanduser('~/projects/engaged_hackathon/'))
from engaged.features import evaluation
from sklearn import metrics
from sklearn.metrics import roc_curve, auc

Using gpu device 0: GeForce GTX 770


In [9]:
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
        
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]
        

def iterate_balanced_minibatches(inputs, targets, batchsize):
    assert len(inputs) == len(targets)

    indices_pos = np.where(targets==1)[0]
    indices_neg = np.where(targets==0)[0]

    np.random.shuffle(indices_pos)
    np.random.shuffle(indices_neg)

    for start_idx in range(0, len(indices_neg) - batchsize + 1, batchsize):
        # in each batch, new negative data is provided, positive data is reused
        
        # get indices for each of the excerpts, wrapping back to the beginning...
        excerpt_pos = np.take(
            indices_pos, np.arange(start_idx, start_idx + batchsize), mode='wrap')
        excerpt_neg = np.take(
            indices_neg, np.arange(start_idx, start_idx + batchsize), mode='wrap')
        
        # reform the full balanced inputs and output
        full_idxs = np.hstack((excerpt_pos, excerpt_neg))
        yield inputs[full_idxs], targets[full_idxs]




In [10]:
import scipy.io
from sklearn.cross_validation import train_test_split

datapath = '/home/michael/projects/engaged_hackathon_data/multilabel_classification/'

# Load the dataset
print("Loading data...")
X_train, y_train, X_val, y_val, X_test, y_test, classes = \
    helpers.load_multilabel_dataset(
        datapath + 'all_train.mat', datapath + 'all_test.mat', max_samples=10000)
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
y_val = y_val.astype(np.int32)
print X_train.shape
print X_test.shape
print X_val.shape

Loading data...
['split_arrays', 'slices', 'labels', '__header__', '__globals__', '__version__', 'class_names']
['split_arrays', 'slices', 'labels', '__header__', '__globals__', '__version__', 'class_names']
(10000, 1, 128, 19)
(10000, 1, 128, 19)
(10000, 1, 128, 19)


In [46]:
def build_cnn(input_var=None):
    # As a third model, we'll create a CNN of two convolution + pooling stages
    # and a fully-connected hidden layer in front of the output layer.

    # Input layer, as usual:
    im_width, im_height = X_train.shape[2], X_train.shape[3]
    network = lasagne.layers.InputLayer(shape=(None, 1, im_width, im_height),
                                        input_var=input_var)
    # This time we do not apply input dropout, as it tends to work less well
    # for convolutional layers.
    network = lasagne.layers.dropout(network, p=0.2)
    
    # Convolutional layer with 32 kernels of size 5x5. Strided and padded
    # convolutions are supported as well; see the docstring.
    network = lasagne.layers.Conv2DLayer(
            network, num_filters=50, filter_size=(3, 3),
#             stride=(2, 2),
            nonlinearity=lasagne.nonlinearities.very_leaky_rectify,
            W=lasagne.init.GlorotUniform())

    # Max-pooling layer of factor 2 in both dimensions:
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 1), stride=(2, 1))

    # Another convolution with 32 5x5 kernels, and another 2x2 pooling:
    network = lasagne.layers.Conv2DLayer(
            network, num_filters=50, filter_size=(3,3),
#             stride=(2, 2),
            nonlinearity=lasagne.nonlinearities.very_leaky_rectify,
            W=lasagne.init.GlorotUniform())
    
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 1), stride=(2, 1))
  
    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            num_units=1000,
            nonlinearity=lasagne.nonlinearities.very_leaky_rectify)

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            num_units=1000,
            nonlinearity=lasagne.nonlinearities.very_leaky_rectify)
    
    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            num_units=2,
            nonlinearity=lasagne.nonlinearities.softmax)

    return network




In [47]:
def prepare_network():
    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')


    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")
    network = build_cnn(input_var)

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    reg_l2 = lasagne.regularization.regularize_network_params(network, lasagne.regularization.l2)
    loss = loss + 0.001 * reg_l2

    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
            loss, params, learning_rate=0.0249, momentum=0.91)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var)
    test_loss = test_loss.mean()

    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    print "WARNING - hack, removed test_acc from this..."
    val_fn = theano.function([input_var, target_var], [test_loss, test_loss])

    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    predict_fn = theano.function([input_var], test_prediction)
    
    return network, train_fn, predict_fn, val_fn

In [48]:
print X_train.shape, y_train.shape
print X_test.shape, y_test.shape
print X_val.shape, y_val.shape

(10000, 1, 128, 19) (10000, 28)
(10000, 1, 128, 19) (10000, 28)
(10000, 1, 128, 19) (10000, 28)


In [1]:
import cPickle as pickle
sys.path.append('/home/michael/projects/engaged_hackathon/notebooks/detection')

num_epochs = 50

minibatch_size = 120  # optimise

# Finally, launch the training loop.
y_train = np.squeeze(y_train).astype(np.int32)
y_val = np.squeeze(y_val).astype(np.int32)
y_test = np.squeeze(y_test).astype(np.int32)

for class_idx, class_name in enumerate(classes):

    this_y_train = y_train[:, class_idx]
    this_y_test = y_test[:, class_idx]
    this_y_val = y_val[:, class_idx]
    
    if this_y_train.mean() < 0.01 or this_y_val.mean() < 0.01 or this_y_test.mean() < 0.01:
        print "Not enough of class ", class_name
        continue
    
    network, train_fn, predict_fn, val_fn = prepare_network()
    
    print("Starting training..." + class_name)
    print "There will be %d minibatches per batch" % (y_train.shape[0] / minibatch_size)

    print """     epoch   train loss   valid loss   train/val    valid acc     dur
       -------  -----------  -----------  -----------  -----------  -------""",

    best_validation_accuracy = 0.0
    best_model = None
    
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for count, batch in enumerate(
                iterate_balanced_minibatches(X_train, this_y_train, minibatch_size)):
            if count % 100 == 0:
                print '.',
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0

        for batch in iterate_balanced_minibatches(
                    X_val, this_y_val, int(minibatch_size)):

            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        train_loss = train_err / train_batches
        val_loss = val_err / val_batches
        mean_val_accuracy = val_acc / val_batches
        print "\n",
        print "     " + str(epoch).ljust(8),
        print ("%0.06f" % (train_loss)).ljust(12),
        print ("%0.06f" % (val_loss)).ljust(12),
        print ("%0.06f" % (train_loss / val_loss)).ljust(12),
        print ("%0.06f" % (mean_val_accuracy)).ljust(10),
        print ("%0.04f" % (time.time() - start_time)).ljust(10),
        sys.stdout.flush()
        
        if mean_val_accuracy > best_validation_accuracy:
            best_model = (network, predict_fn)
            best_validation_accuracy = mean_val_accuracy

    # After training, we compute and print the test error:
#     test_prediction = lasagne.layers.get_output(best_model, deterministic=True)
    predict_fn = best_model[1]
    
    y_preds, y_gts = [], []
    for batch in iterate_minibatches(X_test, this_y_test, 500, shuffle=False):
        inputs, targets = batch
        y_preds.append( predict_fn(inputs)[:, 1])
        y_gts.append(targets)

    norm_acc = evaluation.normalised_accuracy(
        np.hstack(y_gts), np.hstack(y_preds))

    print("Final acc: %f\n\n") % norm_acc

    # save the network
    with open('./models/' + class_name.strip() + '.pkl', 'w') as f:
        pickle.dump(best_model, f, -1)
        
    
    # Optionally, you could now dump the network weights to a file like this:
    # np.savez('model.npz', lasagne.layers.get_all_param_values(network))

NameError: name 'sys' is not defined

In [None]:
# now let's find per-class test accuracy
y_preds, y_gts = [], []

for batch in iterate_minibatches(X_train, y_train, int(minibatch_size)):

    inputs, targets = batch
    err, acc = val_fn(inputs, targets)
    y_preds.append( predict_fn(inputs))
    y_gts.append(targets)

In [None]:
y_pred = np.vstack(y_preds)
y_gt = np.vstack(y_gts)

In [None]:
import collections
from sklearn import metrics

results = collections.OrderedDict()

for class_name, pred_row, gt_row in zip(classes, y_pred.T, y_gt.T):
    print gt_row.sum(), (pred_row>0.5).sum()
    results[class_name] = metrics.accuracy_score(gt_row, pred_row > 0.5)
    print class_name.ljust(20), results[class_name]


In [None]:
plt.plot(y_pred.sum(axis=0), y_gt.sum(axis=0), '.')
plt.xlabel('Y prediction')
plt.ylabel('Y gt')
plt.axis('equal')

In [None]:
plt.figure(figsize=(15, 15))
plt.subplot(121)
plt.imshow(y_pred[:100, :], interpolation='nearest')
plt.clim(0, 1)
# plt.colorbar()
plt.subplot(122)
plt.imshow(y_gt[:100, :], interpolation='nearest')

In [None]:
print np.mean((y_pred-y_gt)**2)