In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import matplotlib.pyplot as plt
import csv
import os
import sys
import numpy as np
import collections
import scipy.io
import time

# CNN bits
import theano
import theano.tensor as T
import lasagne

# for evaluation
sys.path.append(os.path.expanduser('~/projects/engaged_hackathon/'))
from engaged.features import evaluation
from sklearn import metrics
from sklearn.metrics import roc_curve, auc

Using gpu device 0: GeForce GTX 770


In [2]:
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]



In [3]:
def iterate_balanced_minibatches(inputs, targets, batchsize):
    assert len(inputs) == len(targets)

    indices_pos = np.where(targets==1)[0]
    indices_neg = np.where(targets==0)[0]

    np.random.shuffle(indices_pos)
    np.random.shuffle(indices_neg)

    for start_idx in range(0, len(indices_neg) - batchsize + 1, batchsize):
        # in each batch, new negative data is provided, positive data is reused
        
        # get indices for each of the excerpts, wrapping back to the beginning...
        excerpt_pos = np.take(
            indices_pos, np.arange(start_idx, start_idx + batchsize), mode='wrap')
        excerpt_neg = np.take(
            indices_neg, np.arange(start_idx, start_idx + batchsize), mode='wrap')
        
        # reform the full balanced inputs and output
        full_idxs = np.hstack((excerpt_pos, excerpt_neg))
        yield inputs[full_idxs], targets[full_idxs]



In [11]:
from sklearn.cross_validation import train_test_split
import scipy
datapath = '/home/michael/projects/engaged_hackathon_data/detection/train_test_patches/'

# def load_dataset(loadpath):
#     data_big = scipy.io.loadmat(loadpath)
#     X_train_val = data_big['X_train']
#     y_train_val = data_big['y_train'].ravel()
#     X_test = data_big['X_test']
#     y_test = data_big['y_test'].ravel()
    
#     train_idxs, val_idxs = train_test_split(
#         np.arange(X_train_val.shape[0]), test_size=0.3)
        
#     X_train = X_train_val[train_idxs]
#     y_train = y_train_val[train_idxs]
#     X_val = X_train_val[val_idxs]
#     y_val = y_train_val[val_idxs]
    
#     return X_train, y_train, X_val, y_val, X_test, y_test

# Load the dataset
import lasagne_helpers
print("Loading data...")
X_train, y_train, X_val, y_val, X_test, y_test = lasagne_helpers.load_dataset(datapath+'unbalanced_256.mat')
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
y_val = y_val.astype(np.int32)

 Loading data...


In [12]:
print X_train.shape

(70000, 1, 256, 19)


In [22]:
def build_custom_mlp(
        input_var=None, 
        drop_input=.2,    
        conv_depth = 2,
        num_filters1=32,
        filter_size1 = 3,
        num_filters2=32,
        filter_size2 = 5,
        pool_size = 2,  # same pool size for each max pool
#         drop_conv = 0.01,
        dense_depth=2,
        dense_width=800, 
        drop_dense_hidden=.5):

    # Input layer and dropout (with shortcut `dropout` for `DropoutLayer`):
    im_width, im_height = X_train.shape[2], X_train.shape[3]
    network = lasagne.layers.InputLayer(shape=(None, 1, im_width, im_height),
                                        input_var=input_var)
    if drop_input:
        network = lasagne.layers.dropout(network, p=drop_input)
        
    # Conv layers (fixing to two sets of layers, each with 2*conv 
    # and a max pool)
    for _ in range(conv_depth):
        network = lasagne.layers.Conv2DLayer(
            network, num_filters=num_filters1, 
            filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.GlorotUniform())
        
        network = lasagne.layers.Conv2DLayer(
            network, num_filters=num_filters2, 
            filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.GlorotUniform())
        
        network = lasagne.layers.MaxPool2DLayer(
            network, pool_size=(pool_size, pool_size))
        
#         if drop_conv:
#             network = lasagne.layers.dropout(network, p=drop_conv)
    
    # Dense layers and dropout
    nonlin = lasagne.nonlinearities.rectify
    
    for _ in range(dense_depth):
        network = lasagne.layers.DenseLayer(
                network, dense_width, nonlinearity=nonlin)
        if drop_hidden:
            network = lasagne.layers.dropout(network, p=drop_dense_hidden)
            
    # Output layer:
    softmax = lasagne.nonlinearities.softmax
    return network




In [23]:
def build_cnn(input_var=None):
    # As a third model, we'll create a CNN of two convolution + pooling stages
    # and a fully-connected hidden layer in front of the output layer.

    # Input layer, as usual:
    im_width, im_height = X_train.shape[2], X_train.shape[3]
    network = lasagne.layers.InputLayer(shape=(None, 1, im_width, im_height),
                                        input_var=input_var)
    # This time we do not apply input dropout, as it tends to work less well
    # for convolutional layers.
    network = lasagne.layers.dropout(network, p=0.18)
    
    # Convolutional layer with 32 kernels of size 5x5. Strided and padded
    # convolutions are supported as well; see the docstring.
    network = lasagne.layers.Conv2DLayer(
            network, num_filters=64, filter_size=(3, 3),
#             stride=(2, 2),
            nonlinearity=lasagne.nonlinearities.leaky_rectify,
            W=lasagne.init.GlorotUniform())
    # Expert note: Lasagne provides alternative convolutional layers that
    # override Theano's choice of which implementation to use; for details
    # please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html.

    # Max-pooling layer of factor 2 in both dimensions:
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))

    # Another convolution with 32 5x5 kernels, and another 2x2 pooling:
    network = lasagne.layers.Conv2DLayer(
            network, num_filters=64, filter_size=(3,3),
#             stride=(2, 2),
            nonlinearity=lasagne.nonlinearities.leaky_rectify,
            W=lasagne.init.GlorotUniform())
    
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
  

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.3),
            num_units=800,
            nonlinearity=lasagne.nonlinearities.leaky_rectify)

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.3),
            num_units=800,
            nonlinearity=lasagne.nonlinearities.leaky_rectify)

    
    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.3),
            num_units=2,
            nonlinearity=lasagne.nonlinearities.softmax)

    return network


In [24]:
# Prepare Theano variables for inputs and targets
input_var = T.tensor4('inputs')
target_var = T.ivector('targets')
num_epochs = 20

minibatch_size = 150  # optimise

# Create neural network model (depending on first command line parameter)
print("Building model and compiling functions...")
network = build_cnn(input_var)

# Create a loss expression for training, i.e., a scalar objective we want
# to minimize (for our multi-class problem, it is the cross-entropy loss):
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
# We could add some weight decay as well here, see lasagne.regularization.

# Create update expressions for training, i.e., how to modify the
# parameters at each training step. Here, we'll use Stochastic Gradient
# Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(
        loss, params, learning_rate=0.0249, momentum=0.91)

# Create a loss expression for validation/testing. The crucial difference
# here is that we do a deterministic forward pass through the network,
# disabling dropout layers.
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                        target_var)
test_loss = test_loss.mean()
# As a bonus, also create an expression for the classification accuracy:
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                  dtype=theano.config.floatX)

# Compile a function performing a training step on a mini-batch (by giving
# the updates dictionary) and returning the corresponding training loss:
train_fn = theano.function([input_var, target_var], loss, updates=updates)

# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

test_prediction = lasagne.layers.get_output(network, deterministic=True)
predict_fn = theano.function([input_var], T.argmax(test_prediction, axis=1))

Building model and compiling functions...


In [None]:
# Finally, launch the training loop.
print("Starting training...")
# We iterate over epochs:
for epoch in range(num_epochs):
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_batches = 0
    start_time = time.time()
    print "Batch: ",
    for count, batch in enumerate(iterate_balanced_minibatches(X_train, y_train, minibatch_size)):
        if count % 100 == 0:
            print count,
        inputs, targets = batch
        train_err += train_fn(inputs, targets)
        train_batches += 1

    # And a full pass over the validation data:
    val_err = 0
    val_acc = 0
    val_batches = 0
    y_preds, y_gts = [], []
    for batch in lasagne_helpers.iterate_balanced_minibatches(
                X_val, y_val, int(minibatch_size)):

        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        val_err += err
        val_acc += acc
        val_batches += 1

        y_preds.append( predict_fn(inputs))
        y_gts.append(targets)

        norm_acc = lasagne_helpers.normalised_accuracy(
            np.hstack(y_gts), np.hstack(y_preds))

    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))
    print("  normalised accuracy:\t\t{:.2f} %".format(norm_acc * 100))
    sys.stdout.flush()

# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
    inputs, targets = batch
    err, acc = val_fn(inputs, targets)
    test_err += err
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))

# Optionally, you could now dump the network weights to a file like this:
# np.savez('model.npz', lasagne.layers.get_all_param_values(network))

## Testing on the balanced test set

In [None]:
# do prediction one bit at a time...
test_prediction = lasagne.layers.get_output(network, deterministic=True)
predict_fn = theano.function([input_var], T.argmax(test_prediction, axis=1))

y_preds = []
for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
    inputs, targets = batch
    y_preds.append( predict_fn(inputs))
    
y_pred = np.hstack(y_preds)
accs = 0
print y_test.shape, y_pred.shape

In [None]:

for thresh in np.linspace(0, 1, 11):
    accs= 0
    for target in [0, 1]:
        idxs = y_test == target
        accs += metrics.accuracy_score(y_test[idxs], y_pred[idxs]>thresh)
    accs /= 2
    print thresh, accs

In [None]:
predict_fn(inputs).dtype

In [None]:
print y_test.mean()
print y_pred.mean()

In [None]:
# saving the model
import cPickle as pickle
with open('/home/michael/projects/engaged_hackathon_data/detection/models/cnn_balanced_new_params_256.pkl', 'w') as f:
    pickle.dump(network, f)