In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import time
from scipy.stats import itemfreq

import theano
import theano.tensor as T
import lasagne
from sklearn.model_selection import StratifiedKFold


FILE_PATH = '/home/sam/Hhd/twitter_sentiment/'
# FILE_PATH = '/home/sam/Data/twitter_sentiment/'

Using cuDNN version 5105 on context None
Mapped name None to device cuda: GeForce GTX 1080 (0000:01:00.0)


In [2]:
# laod data
pos_array = np.load(FILE_PATH+'pos.npy')
neg_array = np.load(FILE_PATH+'neg.npy')

In [3]:
print pos_array.shape
print neg_array.shape
data = np.concatenate([pos_array, neg_array])
N, M, D = data.shape
data = data.reshape(-1, 1, M, D).astype(theano.config.floatX)
label = np.concatenate([np.ones(pos_array.shape[0]), np.ones(neg_array.shape[0])+1]) # 1 for positive, 2 for negative
label = np.int8(label)-1
print data.shape
print label.shape

(2183, 20, 100)
(2416, 20, 100)
(4599, 1, 20, 100)
(4599,)


In [4]:
# ##################### Build the CNN neural network model #######################
def build_cnn(input_var=None):
    # We create a CNN of two convolution + pooling stages
    # and a fully-connected hidden layer in front of the output layer.

    # Input layer, as usual:
    network = lasagne.layers.InputLayer(shape=(None, 1, M, D), input_var=input_var)
    # This time we do not apply input dropout, as it tends to work less well
    # for convolutional layers.

    # Convolutional layer with 32 kernels of size 3x3. Strided and padded
    # convolutions are supported as well; see the docstring.
    network = lasagne.layers.Conv2DLayer(network, num_filters=32, filter_size=(3, 3), \
                                         nonlinearity=lasagne.nonlinearities.rectify, \
                                         W=lasagne.init.GlorotUniform(), pad=1, stride=(1, 1), \
                                         untie_biases=True)
    # Max-pooling layer of factor 2 in both dimensions:
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))

    # Another convolution with 32 3x3 kernels, and another 2x2 pooling:
    network = lasagne.layers.Conv2DLayer(network, num_filters=32, filter_size=(3, 3), \
                                         nonlinearity=lasagne.nonlinearities.rectify, pad=1, \
                                         stride=(1, 1), untie_biases=True)
    
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))

    # A fully-connected layer of ??? units with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(lasagne.layers.dropout(network, p=0.1), num_units=600, \
                                        nonlinearity=lasagne.nonlinearities.rectify)

    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(lasagne.layers.dropout(network, p=0.1), num_units=2,  \
                                        nonlinearity=lasagne.nonlinearities.softmax)

    return network

In [5]:
# ############################## Main program ################################
def train_cnn(X_train, y_train, X_val, y_val, X_test, num_epochs=500):
    # Load the dataset
    print("Loading data...")
    #X_train, y_train, X_val, y_val, X_test = load_dataset()
    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")

    network = build_cnn(input_var)
    accuracy_rate = []

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.05, momentum=0.9)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    # print test_prediction.flatten()
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var)
    test_loss = test_loss.mean()
    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX)

    predict_label = T.argmax(test_prediction,axis=1)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
    test_fn = theano.function([input_var], predict_label)

    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        start_time = time.time()
        # In each epoch, we do a full pass over the training data:
        train_err = train_fn(X_train, y_train)
        # And a full pass over the validation data:
        val_err, val_acc = val_fn(X_val, y_val)
        accuracy_rate.append(val_acc)
        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{}".format(train_err))
        print("  validation loss:\t\t{}".format(val_err))
        print("  validation accuracy:\t\t{} %".format(val_acc * 100))
            
        # After training, we compute and print the test error:
        test_pred = test_fn(X_test)
#         print set(test_pred)

    return test_pred

In [6]:
# stratified k-fold cross-validation
skf = StratifiedKFold(n_splits=3)
for train_index, val_index in skf.split(data, label):
    print("TRAIN:", train_index, "TEST:", val_index)
    data_train, data_val = data[train_index], data[val_index]
    label_train, label_val = label[train_index], label[val_index]
    
    freq_train = itemfreq(label_train)
    print "train freq", freq_train[:,1]
    freq_val = itemfreq(label_val)
    print "val freq", freq_val[:,1]
    
    train_cnn(data_train, label_train, data_val, label_val, data_val)

('TRAIN:', array([ 728,  729,  730, ..., 4596, 4597, 4598]), 'TEST:', array([   0,    1,    2, ..., 2986, 2987, 2988]))
train freq [1455 1610]
val freq [728 806]
Loading data...
Building model and compiling functions...
Starting training...
Epoch 1 of 500 took 0.149s
  training loss:		0.693222939968
  validation loss:		0.692399740219
  validation accuracy:		53.1290769577 %
Epoch 2 of 500 took 0.151s
  training loss:		0.692976415157
  validation loss:		0.692079126835
  validation accuracy:		52.7379393578 %
Epoch 3 of 500 took 0.146s
  training loss:		0.692907512188
  validation loss:		0.691745102406
  validation accuracy:		53.3246397972 %
Epoch 4 of 500 took 0.148s
  training loss:		0.692195653915
  validation loss:		0.691441178322
  validation accuracy:		52.9986977577 %
Epoch 5 of 500 took 0.144s
  training loss:		0.692115008831
  validation loss:		0.691190600395
  validation accuracy:		52.3468077183 %
Epoch 6 of 500 took 0.148s
  training loss:		0.691323578358
  validation loss:		0.69

GpuArrayException: out of memory
Apply node that caused the error: GpuDnnPoolGrad{mode='max'}(GpuContiguous.0, GpuContiguous.0, GpuContiguous.0, TensorConstant{(2,) of 2}, TensorConstant{(2,) of 2}, TensorConstant{(2,) of 0})
Toposort index: 233
Inputs types: [GpuArrayType<None>(float32, (False, False, False, False)), GpuArrayType<None>(float32, (False, False, False, False)), GpuArrayType<None>(float32, (False, False, False, False)), TensorType(int64, vector), TensorType(int64, vector), TensorType(int64, vector)]
Inputs shapes: [(3067, 32, 20, 100), (3067, 32, 10, 50), (3067, 32, 10, 50), (2,), (2,), (2,)]
Inputs strides: [(256000, 8000, 400, 4), (64000, 2000, 200, 4), (64000, 2000, 200, 4), (8,), (8,), (8,)]
Inputs values: ['not shown', 'not shown', 'not shown', array([2, 2]), array([2, 2]), array([0, 0])]
Outputs clients: [[GpuElemwise{Composite{((i0 * i1) + (i2 * i1 * sgn(i3)))}}[(0, 1)]<gpuarray>(GpuArrayConstant{[[[[ 0.5]]]]}, GpuDnnPoolGrad{mode='max'}.0, GpuArrayConstant{[[[[ 0.5]]]]}, GpuElemwise{Add}[(0, 0)]<gpuarray>.0)]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.