CNN for NLP
------

The use of Convolution1D for text classification.

[Based on this code](https://github.com/fchollet/keras/blob/master/examples/imdb_cnn.py)

In [1]:
reset -fs

In [2]:
from keras.datasets import imdb

Using TensorFlow backend.


In [3]:
print('Loading data...')
max_features = 5000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

Loading data...
25000 train sequences
25000 test sequences


In [4]:
from keras.preprocessing import sequence

In [5]:
print('Pad sequences (samples x time)')
maxlen = 400
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

Pad sequences (samples x time)
x_train shape: (25000, 400)
x_test shape: (25000, 400)


In [6]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import Conv1D, GlobalMaxPooling1D

In [7]:
print('Build model...')
model = Sequential()

# we start off with an efficient embedding layer which maps
# our vocab indices into embedding_dims dimensions
embedding_dims = 50
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))
model.add(Dropout(0.2))

# Add a Convolution1D, which will learn filters
# word group filters of size filter_length:
filters = 250
kernel_size = 3
model.add(Conv1D(filters,
                 kernel_size,
                 padding='valid',
                 activation='relu',
                 strides=1))

# Add max pooling:
model.add(GlobalMaxPooling1D())

# Add a vanilla hidden layer:
hidden_dims = 250
model.add(Dense(hidden_dims))
# model.add(Dropout(0.2))
model.add(Activation('relu'))

# Project onto a single unit output layer, and squash it with a sigmoid:
model.add(Dense(1))
model.add(Activation('sigmoid'))

Build model...


In [8]:
print("Training model...")
model.compile(loss='binary_crossentropy',
              optimizer='SGD',
              metrics=['accuracy'])
batch_size = 32
epochs = 2
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=True,
          validation_split=0.1)

Training model...
Train on 22500 samples, validate on 2500 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x12ca40b38>

In [9]:
score, accuracy = model.evaluate(x_test, y_test,
                                batch_size=batch_size, 
                                verbose=True)



In [10]:
print('Test score: {:.3}'.format(score))
print('Test accuracy: {:.3}'.format(accuracy))

Test score: 0.693
Test accuracy: 0.5


__TODO__: How does accuracy compare to LSTM for same datset?

__TODO__: How does training speed compare to LSTM for same datset?