# CNN, First model

### Imports and fetch data

In [1]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, BatchNormalization, Flatten, Conv1D, GlobalMaxPooling1D
from keras.layers.embeddings import Embedding
import pickle
import numpy as np
from keras.callbacks import TensorBoard, EarlyStopping

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [10]:
# import data
with open('X_train_even.pickle', 'rb') as handle:
    X_train = pickle.load(handle)
    
with open('Y_train_even.pickle', 'rb') as handle:
    Y_train = pickle.load(handle)
    
with open('X_test_even.pickle', 'rb') as handle:
    X_test = pickle.load(handle)
    
with open('Y_test_even.pickle', 'rb') as handle:
    Y_test = pickle.load(handle)
    
# import dictionaries
with open('ix_to_word.pickle', 'rb') as handle:
    ix_to_word = pickle.load(handle)
    
# small change in format
tmp = np.concatenate(X_train).ravel()
X_train = np.reshape(tmp,(len(X_train),100))

tmp = np.concatenate(X_test).ravel()
X_test = np.reshape(tmp,(len(X_test),100))

review_length = X_test.shape[1]
num_words = 46210

print('Loaded dataset with {} training samples, {} test samples'.format(len(X_train), len(X_test)))
print('The length of each review is {}.'.format(review_length))


Loaded dataset with 199483 training samples, 49871 test samples
The length of each review is 100.


In [3]:
num_datapoints = 10000

small_Y = Y_train[0:num_datapoints]
small_X = X_train[0:num_datapoints]

### Define models to try

In [4]:
def one_conv_model(review_length, num_words):
    model = Sequential()
    emb = Embedding(num_words, 200, input_length=review_length)
    model.add(emb)
    model.add(Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1))
    model.add(GlobalMaxPooling1D())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [5]:
def two_conv_model(review_length, num_words):
    model = Sequential()
    emb = Embedding(num_words, 200, input_length=review_length)
    model.add(emb)
    model.add(Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1))
    model.add(Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1))
    model.add(GlobalMaxPooling1D())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [22]:
def two_conv_regularized_model(review_length, num_words):
    model = Sequential()
    emb = Embedding(num_words, 200, input_length=review_length)
    model.add(emb)
    model.add(Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1))
    model.add(BatchNormalization())
    model.add(GlobalMaxPooling1D())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [48]:
def two_conv_regularized_larger_kernel_model(review_length, num_words):
    model = Sequential()
    emb = Embedding(num_words, 200, input_length=review_length)
    model.add(emb)
    model.add(Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=100, kernel_size=4, padding='valid', activation='relu', strides=1))
    model.add(BatchNormalization())
    model.add(GlobalMaxPooling1D())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [37]:
def three_conv_regularized_model(review_length, num_words):
    model = Sequential()
    emb = Embedding(num_words, 200, input_length=review_length)
    model.add(emb)
    model.add(Conv1D(filters=50, kernel_size=2, padding='valid', activation='relu', strides=1))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=30, kernel_size=2, padding='valid', activation='relu', strides=1))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=20, kernel_size=2, padding='valid', activation='relu', strides=1))
    model.add(BatchNormalization())
    model.add(GlobalMaxPooling1D())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [40]:
def three_conv_more_filter_model(review_length, num_words):
    model = Sequential()
    emb = Embedding(num_words, 200, input_length=review_length)
    model.add(emb)
    model.add(Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1))
    model.add(BatchNormalization())
    model.add(GlobalMaxPooling1D())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [42]:
def three_conv_larger_kernel_model(review_length, num_words):
    model = Sequential()
    emb = Embedding(num_words, 200, input_length=review_length)
    model.add(emb)
    model.add(Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=100, kernel_size=3, padding='valid', activation='relu', strides=1))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=100, kernel_size=5, padding='valid', activation='relu', strides=1))
    model.add(BatchNormalization())
    model.add(GlobalMaxPooling1D())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [6]:
def vanilla_nn(review_length, num_words):
    model = Sequential()
    emb = Embedding(num_words, 200, input_length=review_length)
    model.add(emb)
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

### Train models

In [7]:
model = one_conv_model(review_length, num_words)
tb = TensorBoard(log_dir='./logs/one_conv_model')
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, validation_split=0.2, epochs=10, batch_size=256, verbose=1, callbacks=[tb])
#model.fit(small_X, small_Y, validation_split=0.2, epochs=10, batch_size=256, verbose=1, callbacks=[tb])

Train on 159586 samples, validate on 39897 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f65ff0b9ba8>

In [14]:
model = two_conv_model(review_length, num_words)
tb = TensorBoard(log_dir='./logs/two_conv_model')
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, validation_split=0.2, epochs=10, batch_size=256, verbose=1, callbacks=[tb])
#model.fit(small_X, small_Y, validation_split=0.2, epochs=10, batch_size=256, verbose=1, callbacks=[tb])

Train on 159586 samples, validate on 39897 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f6552697d30>

In [24]:
model = two_conv_regularized_model(review_length, num_words)
tb = TensorBoard(log_dir='./logs/two_conv_regularized_model')
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, validation_split=0.2, epochs=10, batch_size=256, verbose=1, callbacks=[tb])
#model.fit(small_X, small_Y, validation_split=0.2, epochs=10, batch_size=256, verbose=1)

Train on 159586 samples, validate on 39897 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f6551a3e668>

In [49]:
model = two_conv_regularized_larger_kernel_model(review_length, num_words)
tb = TensorBoard(log_dir='./logs/two_conv_regularized_larger_kernel_model')
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, validation_split=0.2, epochs=10, batch_size=256, verbose=1, callbacks=[tb])
#model.fit(small_X, small_Y, validation_split=0.2, epochs=10, batch_size=256, verbose=1)

Train on 159586 samples, validate on 39897 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f653b41d828>

In [38]:
model = three_conv_regularized_model(review_length, num_words)
model.summary()
tb = TensorBoard(log_dir='./logs/three_conv_regularized_model')
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, validation_split=0.2, epochs=10, batch_size=256, verbose=1, callbacks=[tb])
#model.fit(small_X, small_Y, validation_split=0.2, epochs=10, batch_size=256, verbose=1, callbacks=[tb])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_19 (Embedding)     (None, 100, 200)          9242000   
_________________________________________________________________
conv1d_32 (Conv1D)           (None, 99, 50)            20050     
_________________________________________________________________
batch_normalization_14 (Batc (None, 99, 50)            200       
_________________________________________________________________
conv1d_33 (Conv1D)           (None, 98, 30)            3030      
_________________________________________________________________
batch_normalization_15 (Batc (None, 98, 30)            120       
_________________________________________________________________
conv1d_34 (Conv1D)           (None, 97, 20)            1220      
_________________________________________________________________
batch_normalization_16 (Batc (None, 97, 20)            80        
__________

<keras.callbacks.History at 0x7f6541f0ff60>

In [41]:
model = three_conv_more_filter_model(review_length, num_words)
tb = TensorBoard(log_dir='./logs/three_conv_more_filter_model')
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, validation_split=0.2, epochs=10, batch_size=256, verbose=1, callbacks=[tb])
#model.fit(small_X, small_Y, validation_split=0.2, epochs=10, batch_size=256, verbose=1)

Train on 159586 samples, validate on 39897 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f6541038fd0>

In [43]:
model = three_conv_larger_kernel_model(review_length, num_words)
tb = TensorBoard(log_dir='./logs/three_conv_larger_kernel_model')
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, validation_split=0.2, epochs=10, batch_size=256, verbose=1, callbacks=[tb])
#model.fit(small_X, small_Y, validation_split=0.2, epochs=10, batch_size=256, verbose=1)

Train on 159586 samples, validate on 39897 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f6541241da0>

In [17]:
model = vanilla_nn(review_length, num_words)
tb = TensorBoard(log_dir='./logs/vanilla_nn_model')
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, validation_split=0.2, epochs=10, batch_size=256, verbose=1, callbacks=[tb])
#model.fit(small_X, small_Y, validation_split=0.2, epochs=10, batch_size=256, verbose=1, callbacks=[tb])

Train on 159586 samples, validate on 39897 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f6551a18f98>

### Predict

In [None]:
model.save('two_conv_regularized_larger_kernel_model.h5')

In [2]:
model = load_model('two_conv_regularized_larger_kernel_model.h5')

In [5]:
print(model.summary())
preds = np.round(model.predict(X_test)).T
preds = preds[0]

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_23 (Embedding)     (None, 100, 200)          9242000   
_________________________________________________________________
conv1d_43 (Conv1D)           (None, 99, 100)           40100     
_________________________________________________________________
batch_normalization_25 (Batc (None, 99, 100)           400       
_________________________________________________________________
conv1d_44 (Conv1D)           (None, 96, 100)           40100     
_________________________________________________________________
batch_normalization_26 (Batc (None, 96, 100)           400       
_________________________________________________________________
global_max_pooling1d_19 (Glo (None, 100)               0         
_________________________________________________________________
dense_31 (Dense)             (None, 256)               25856     
__________

In [24]:
misclassifications = np.where(preds!=Y_test)
misclassifications = misclassifications[0]
print('Out of {} test reviews, {} were misclassified.'.format(len(Y_test), len(misclassifications)))
print('A few examples of the misclassified reviews:')


def print_sentence(index):
    sentence_ix = misclassifications[index]
    tmp = []
    for val in X_test[sentence_ix]:
        tmp.append(ix_to_word[str(val)])
    processed_sentence = ' '.join(tmp)
    print(processed_sentence)
  
print('1. Predicted output: {} Actual label: {}'.format(preds[misclassifications[0]], Y_test[misclassifications[0]]))
print_sentence(0)
print('2. Predicted output: {} Actual label: {}'.format(preds[misclassifications[1]], Y_test[misclassifications[1]]))
print_sentence(1)
print('3. Predicted output: {} Actual label: {}'.format(preds[misclassifications[2]], Y_test[misclassifications[2]]))
print_sentence(2)

Out of 49871 test reviews, 5278 were misclassified.
A few examples of the misclassified reviews:
1. Predicted output: 1.0 Actual label: 0
caution is in order if they're safe they deserve 5 stars because they're a special tasting and nearly pure treat if they're not safe they should be pulled off of the market there's a lot of google chatter about this product being toxic just look up kingdom pets chicken jerky it looks to me like only one or a few dogs got sick and those one or few incidents are being cited over and over on different sites making it seem like there's an epidemic if only a few got sick it might be from an unknown something else that
2. Predicted output: 1.0 Actual label: 0
i have to say that i really like the packaging the cereal name and part of the image have raised graphics that i felt as soon as i picked up the box however the cereal falls a little short of the cool packaging i have tried both the chocolate krave as shown here and the double chocolate krave the choc