In [5]:
import os
import pickle
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

In [6]:
sub_word_imdb, sub_word_info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)
BUFFER_SIZE = 10000
BATCH_SIZE = 256

sub_word_tokenizer = sub_word_info.features['text'].encoder

train_data, test_data = sub_word_imdb['train'], sub_word_imdb['test']
train_dataset = train_data.shuffle(BUFFER_SIZE)

train_dataset = train_dataset.padded_batch(BATCH_SIZE)
test_dataset = test_data.padded_batch(BATCH_SIZE)



In [7]:
def create_model(tokenizer):
    embedding_dim = 64
    lstm1_dim = 64
    lstm2_dim = 32
    dense_dim = 64

    # Build the model
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(tokenizer.vocab_size, embedding_dim),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(lstm1_dim, return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(lstm2_dim)),
        tf.keras.layers.Dense(dense_dim, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    # Print the model summary
    model.summary()

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [8]:
model = create_model(sub_word_tokenizer)

if not os.path.exists('model-nlp-example-1.h5'):
    history = model.fit(train_dataset, epochs=10, validation_data=test_dataset)

    model.save('model-nlp-example-1.h5')

    with open('/trainHistoryDict', 'wb') as file_pi:
        pickle.dump(history.history, file_pi)
else:
    model = tf.keras.models.load_model('model-nlp-example-1.h5')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 64)          523840    
                                                                 
 bidirectional (Bidirectiona  (None, None, 128)        66048     
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (None, 64)               41216     
 nal)                                                            
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 635,329
Trainable params: 635,329
Non-trai

KeyboardInterrupt: 

In [None]:
for example in test_dataset.take(1):
    # Get a random review from the validation dataset
    review_index = np.random.randint(0, BATCH_SIZE)
    review, label = example[0][review_index], example[1][review_index]

    # Decode the review
    decoded_review = sub_word_tokenizer.decode(review)
    # Predict the sentiment of the review
    prediction = model.predict(tf.expand_dims(review, 0))
    print('Review: {}'.format(decoded_review))
    print('Sentiment: {}'.format('Positive' if prediction[0][0] > 0.5 else 'Negative'))