# IMDB processing with ConvNets

This notebook explores classification task for IMDB dataset using 1D Convolution NN
Using RNN we achived score of 86%. (without hyperparams tunning)

In [2]:
import matplotlib.pyplot as plt
from keras.datasets import imdb
from keras.preprocessing import sequence
from keras import models, layers, optimizers

# Load and preprocess data

We will padd all reviews to max_len words, since Keras requires that all reviews in the batch should have the same length

In [3]:
# 10K words
max_features = 10000
# Take only 500 first characters
max_len = 500
batch_size = 32

(train_data, y_train), (test_data, y_test) = imdb.load_data(num_words=max_features)
print("There are {} train reviews".format(len(train_data)))
print("There are {} test reviews".format(len(test_data)))
print('Pad reviews (samples x time)')
x_train = sequence.pad_sequences(train_data, maxlen=max_len)
x_test = sequence.pad_sequences(test_data, maxlen=max_len)
print('input_train shape: {}'.format(x_train.shape))
print('input_test shape: {}'.format(x_test.shape))

There are 25000 train reviews
There are 25000 test reviews
Pad reviews (samples x time)
input_train shape: (25000, 500)
input_test shape: (25000, 500)


# Train

## Helper functions

In [4]:
def plot_loss(history):
    history_dict = history.history
    loss_values = history_dict['loss']
    val_loss_values = history_dict['val_loss']
    epochs = range(1, len(loss_values)+1)

    plt.plot(epochs, loss_values, 'bo', label='Training loss')
    plt.plot(epochs, val_loss_values, 'b', label='Validation loss')
    plt.title('Training and Validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()
    
def plot_accuracy(history):
    history_dict = history.history
    acc_values = history_dict['acc']
    val_acc_values = history_dict['val_acc']
    epochs = range(1, len(acc_values)+1)

    plt.plot(epochs, acc_values, 'bo', label='Training accuracy')
    plt.plot(epochs, val_acc_values, 'b', label='Validation accuracy')
    plt.title('Training and Validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

## Train model

In [None]:
model = models.Sequential()
model.add(layers.Embedding(max_features, 128, input_length=max_len))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.MaxPooling1D(5))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dense(1))
# model.summary()

model.compile(optimizer=optimizers.RMSprop(lr=1e-4), loss='binary_crossentropy', metrics=['acc'])
history = model.fit(x_train, y_train, epochs=10, batch_size=128, validation_split=0.2)
model.evaluate(x_test, y_test)

Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Train on 20000 samples, validate on 5000 samples
Epoch 1/10


### Plot loss

In [None]:
plot_loss(history)

In [None]:
plot_accuracy(history)