# IMBD CNN

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb

In [None]:
max_features = 10000
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=max_features) # consider the top 10000 words

In [3]:
print('data ', train_data[0][:10])
print('labels ', train_labels[0])

data  [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65]
labels  1


In [4]:
print('Train data ', len(train_data))
print('Test data ', len(test_data))

Train data  25000
Test data  25000


In [5]:
max_seq_len = 80 # Only consider the first 200 words of each movie review

In [6]:
# preprocess data 
# this will truncate sequences longer than max_seq_len words and zero-pad sequences shorter than max_seq_len words
x_train = keras.preprocessing.sequence.pad_sequences(train_data, maxlen=max_seq_len)
x_test = keras.preprocessing.sequence.pad_sequences(test_data, maxlen=max_seq_len)

# Conv Model

In [7]:
emb_dim = 128

model = keras.Sequential()
model.add(layers.Embedding(max_features, emb_dim))

model.add(layers.Conv1D(64, 3, activation = 'relu'))
model.add(layers.GlobalMaxPooling1D())

model.add(layers.Dense(1, activation='sigmoid'))

In [8]:
model.compile(optimizer="adam",
              loss="binary_crossentropy",
              metrics=["accuracy"])

history = model.fit(x_train,
                    train_labels,
                    epochs=5,
                    batch_size=128,
                    validation_split=0.2,
                    shuffle = True,
                    verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [9]:
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 128)         1280000   
_________________________________________________________________
conv1d (Conv1D)              (None, None, 64)          24640     
_________________________________________________________________
global_max_pooling1d (Global (None, 64)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 65        
Total params: 1,304,705
Trainable params: 1,304,705
Non-trainable params: 0
_________________________________________________________________
None
