In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences



In [2]:
# Parameters
vocab_size = 10000  
max_sequence_length = 500  
embedding_dim = 32

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

x_train = pad_sequences(x_train, maxlen=max_sequence_length, padding='post')
x_test = pad_sequences(x_test, maxlen=max_sequence_length, padding='post')

print(f'Training data shape: {x_train.shape}')
print(f'Test data shape: {x_test.shape}')


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step
Training data shape: (25000, 500)
Test data shape: (25000, 500)


In [6]:
# Define the model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim))
model.add(SimpleRNN(32, return_sequences=False))  
model.add(Dense(1, activation='sigmoid'))  




In [7]:
# Compile the model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [8]:
model.summary()

In [9]:
# Train the model with batch size 128 and validate on 20% of the training data
history = model.fit(
    x_train, y_train,
    epochs=10,
    batch_size=128,
    validation_split=0.2  # Use 20% of the training data for validation
)


Epoch 1/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 199ms/step - accuracy: 0.4988 - loss: 0.6945 - val_accuracy: 0.5046 - val_loss: 0.6934
Epoch 2/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 197ms/step - accuracy: 0.5338 - loss: 0.6857 - val_accuracy: 0.5068 - val_loss: 0.6923
Epoch 3/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 193ms/step - accuracy: 0.5354 - loss: 0.6838 - val_accuracy: 0.5168 - val_loss: 0.7209
Epoch 4/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 182ms/step - accuracy: 0.5241 - loss: 0.6771 - val_accuracy: 0.4974 - val_loss: 0.6969
Epoch 5/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 192ms/step - accuracy: 0.5494 - loss: 0.6584 - val_accuracy: 0.4990 - val_loss: 0.6998
Epoch 6/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 187ms/step - accuracy: 0.5490 - loss: 0.6508 - val_accuracy: 0.5064 - val_loss: 0.6979
Epoch 7/10

In [10]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 36ms/step - accuracy: 0.4916 - loss: 0.7138
Test Loss: 0.7135508060455322
Test Accuracy: 0.4997600018978119


In [11]:
import numpy as np

def predict_review(review, model):
    review = pad_sequences([review], maxlen=max_sequence_length, padding='post')
    prediction = model.predict(review)
    sentiment = 'Positive' if prediction[0] > 0.5 else 'Negative'
    confidence = prediction[0][0] if sentiment == 'Positive' else 1 - prediction[0][0]
    print(f"Predicted Sentiment: {sentiment}, Confidence: {confidence:.2f}")

sample_review = x_test[0] 
predict_review(sample_review, model)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 350ms/step
Predicted Sentiment: Positive, Confidence: 0.52
