# IMBD LSTM

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb


In [None]:
max_features = 10000
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=max_features) # consider the top 10000 words

In [None]:
print('data ', train_data[0][:10])
print('labels ', train_labels[0])

In [None]:
print('Train data ', len(train_data))
print('Test data ', len(test_data))

In [None]:
max_seq_len = 80 # Only consider the first 200 words of each movie review

In [None]:
# preprocess data 
# this will truncate sequences longer than max_seq_len words and zero-pad sequences shorter than max_seq_len words
x_train = keras.preprocessing.sequence.pad_sequences(train_data, maxlen=max_seq_len)
x_test = keras.preprocessing.sequence.pad_sequences(test_data, maxlen=max_seq_len)

In [None]:
x_train.shape

# Model

In [None]:
emb_dim = 128

model = keras.Sequential()
model.add(layers.Embedding(max_features, emb_dim))
model.add(layers.LSTM(128))
model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
model.compile(optimizer="adam",
              loss="binary_crossentropy",
              metrics=["accuracy"])

history = model.fit(x_train,
                    train_labels,
                    epochs=5,
                    batch_size=128,
                    validation_split=0.2,
                    shuffle = True,
                    verbose = 1)

In [None]:
print(model.summary())