In [1]:
import time

import lasagne
import numpy as np
import theano
import theano.tensor as T

from features.common import load_combined_features

In [5]:
H1_SIZE = 100
H2_SIZE = 100

class NNModel(object):
    def __init__(self, input_size):
        # create Theano variables for input and target minibatch
        self.input_var = T.matrix('X')
        self.target_var = T.fvector('y')

        # Input layer
        l_in = lasagne.layers.InputLayer(shape=(None, input_size),
                                         input_var=self.input_var,
                                         name="input")

        # Apply 20% dropout to the input data:
        l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)

        # Add a fully-connected layer of H1_SIZE units, using the linear rectifier, and
        # initializing weights with Glorot's scheme (which is the default anyway):
        l_hid1 = lasagne.layers.DenseLayer(
                l_in_drop, num_units=H1_SIZE,
                nonlinearity=lasagne.nonlinearities.elu,
                W=lasagne.init.GlorotUniform())

        # We'll now add dropout of 50%:
        l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)

        # Another H2_SIZE-unit layer:
        l_hid2 = lasagne.layers.DenseLayer(
                l_hid1_drop, num_units=H2_SIZE,
                nonlinearity=lasagne.nonlinearities.elu,
                W=lasagne.init.GlorotUniform())

        # 50% dropout again:
        l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)

        # Finally, we'll add the fully-connected output layer, of 2 softmax units:
        self.l_out = lasagne.layers.DenseLayer(
                l_hid2_drop, num_units=1,
                nonlinearity=lasagne.nonlinearities.sigmoid,
                W=lasagne.init.GlorotUniform())

        # create loss function
        prediction = lasagne.layers.get_output(self.l_out).flatten()
        loss = lasagne.objectives.binary_crossentropy(prediction, self.target_var)
        loss = loss.mean()

        # create parameter update expressions
        params = lasagne.layers.get_all_params(self.l_out, trainable=True)
        #updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.1,
        #                                            momentum=0.9)
        updates = lasagne.updates.adam(loss, params)

        # Create a loss expression for validation/testing. The crucial difference
        # here is that we do a deterministic forward pass through the network,
        # disabling dropout layers.
        test_prediction = lasagne.layers.get_output(self.l_out, deterministic=True).flatten()
        test_loss = lasagne.objectives.binary_crossentropy(test_prediction, self.target_var)
        test_loss = test_loss.mean()
        
        # As a bonus, also create an expression for the classification accuracy:
        test_pred = T.round(test_prediction)
        test_acc = T.mean(T.isclose(T.round(test_prediction), self.target_var),
            dtype=theano.config.floatX)
    
        # compile training function that updates parameters and returns training loss
        self.train_fn = theano.function([self.input_var, self.target_var], loss, updates=updates,
                                        allow_input_downcast=True)

        # Compile a second function computing the validation loss and accuracy:
        self.val_fn = theano.function([self.input_var, self.target_var], [test_loss, test_acc, test_prediction, test_pred],
                                      allow_input_downcast=True)
        
    def iterate_minibatches(self, inputs, targets, batchsize, shuffle=False):
        indices = np.arange(np.shape(inputs)[0])
        if shuffle:
            np.random.shuffle(indices)
        for start_idx in range(0, np.shape(inputs)[0] - batchsize + 1, batchsize):
            excerpt = indices[start_idx:start_idx + batchsize]
            if isinstance(inputs, np.ndarray):
                i = inputs[excerpt]
            else:
                i = inputs.tocsr()[excerpt,:].toarray()
            yield i, targets.iloc[excerpt]
            
    def validate(self, features, validate_Y):
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in self.iterate_minibatches(features["validate_X"], validate_Y, 500, shuffle=False):
            inputs, targets = batch
            err, acc, pred1, pred2 = self.val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1
            print "Err: %s" % err
            print "Acc: %s" % acc
            print "Pred1: %s" % pred1
            print "Pred2: %s" % pred2
            break
            
        return val_err / val_batches, val_acc / val_batches * 100
                
    def train(self, features, num_epochs):
        # Finally, launch the training loop.
        print("Starting training...")
        train_Y = features["train_Y"].astype("float32")
        validate_Y = features["validate_Y"].astype("float32")

        # We iterate over epochs:
        for epoch in range(num_epochs):
            # In each epoch, we do a full pass over the training data:
            train_err = 0
            train_batches = 0
            start_time = time.time()
            for batch in self.iterate_minibatches(features["train_X"], train_Y, 500, shuffle=True):
                inputs, targets = batch
                train_loss = 0 + self.train_fn(inputs, targets)
                train_err += train_loss
                train_batches += 1
                #print "Train loss at {}: {:.3f}".format(
                #    train_batches, train_loss)
                
                if (train_batches % 10) == 0:
                    _, val_accuracy = self.validate(features, validate_Y)
                    print("  validation accuracy:\t\t{:.2f} %".format(val_accuracy))
                
            val_loss, val_accuracy = self.validate(features, validate_Y)

            # Then we print the results for this epoch:
            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs, time.time() - start_time))
            print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
            print("  validation loss:\t\t{:.6f}".format(val_loss))
            print("  validation accuracy:\t\t{:.2f} %".format(val_accuracy))

In [6]:
def test_combined_features(feature_names):
    features = load_combined_features(feature_names)
    model = NNModel(np.shape(features["train_X"])[1])
    model.train(features, 1)

In [8]:
#test_combined_features(["diff-tfidf-2", "reply-tfidf-2", "reply-len", "reply-counts"])
test_combined_features(["reply-counts", "reply-len"])

Loading features reply-counts.
Loading features reply-len.
Combining features: (308216, 1) + (308216, 1)
Starting training...
Err: nan
Acc: 0.0
Pred1: [ nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  n

KeyboardInterrupt: 