In [1]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Define the number of words to keep in the vocabulary and the maximum review length
num_words = 10000  # Vocabulary size
maxlen = 200       # Max length of a review

# Load the IMDb dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=num_words)

# Pad sequences to ensure uniform input size
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

# Model parameters
embedding_dim = 100  # Embedding dimensions

# Building the model
model = Sequential()
model.add(Embedding(input_dim=num_words, output_dim=embedding_dim, input_length=maxlen))
model.add(SimpleRNN(64))  # Using 64 units in the RNN layer
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 200, 100)          1000000   
                                                                 
 simple_rnn (SimpleRNN)      (None, 64)                10560     
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 1010625 (3.86 MB)
Trainable params: 1010625 (3.86 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [3]:
model.fit(X_train, y_train, batch_size=32, epochs=5, validation_split=0.2)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7cebda733b50>

In [4]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")


Test Accuracy: 0.6579599976539612


In [6]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the word index dictionary
word_index = imdb.get_word_index()

def predict_sentiment(review_text):
    # Convert words in the review to indices
    tokens = review_text.split()
    token_indices = [word_index.get(word, 0) for word in tokens]

    # Pad the sequence
    padded_sequence = pad_sequences([token_indices], maxlen=maxlen)

    # Make the prediction
    prediction = model.predict(padded_sequence)

    # Interpret the prediction
    return "Positive" if prediction[0][0] > 0.5 else "Negative"

# Example usage
print(predict_sentiment("An amazing movie, I loved it!"))


Positive
