In [6]:
import numpy as np

import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the dataset
max_features = 10000  # Number of words to consider as features
max_len = 300  # Cut texts after this number of words (among top max_features most common words)

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(x_train.shape, y_train.shape)
# Pad sequences to ensure uniform input size
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

# Build the model
model = Sequential()
model.add(Embedding(max_features, 128, input_length=max_len))
model.add(Bidirectional(LSTM(64)))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])


print(model.summary())

(25000,) (25000,)
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 300, 128)          1280000   
                                                                 
 bidirectional_4 (Bidirecti  (None, 128)               98816     
 onal)                                                           
                                                                 
 dense_4 (Dense)             (None, 1)                 129       
                                                                 
Total params: 1378945 (5.26 MB)
Trainable params: 1378945 (5.26 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [7]:

model.fit(x_train, y_train,
          batch_size=32,
          epochs=3,
          validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=32)
print(f'Test score: {score}')
print(f'Test accuracy: {acc}')


Epoch 1/3
Epoch 2/3
Epoch 3/3
Test score: 0.39145371317863464
Test accuracy: 0.8446400165557861


In [35]:
sample_text = "The setting of the movie is great, but the acting is terrible, I would not recommend it"
sample_text = "The movie is great, I would recommend it. The action scenes are amazing and the acting is superb, I loved it"
sample_sequence = [imdb.get_word_index().get(word, 0) for word in sample_text.lower().split()]
sample_padded = pad_sequences([sample_sequence], maxlen=max_len)
prediction = model.predict(sample_padded)
print(f'Sentiment score: {prediction[0][0]}')
if prediction[0][0] > 0.5:
    print('Positive sentiment')
else:
    print('Negative sentiment')


Sentiment score: 0.5859974026679993
Positive sentiment
