In [None]:
# Importing important libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [None]:
# Step 1 : Load the IMDB dataset
vocab_size = 10000 # Number of words to use from the dataset
max_len = 200 # Maximum length of a review
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# Step 2 : Preprocess the data
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

In [None]:
# Step 3 : Build the model
model = Sequential([
    Embedding(input_dim = vocab_size, output_dim = 32, input_length = max_len),
    SimpleRNN(units = 32),
    Dense(units = 1, activation='sigmoid')
])

In [None]:
# Compiling the model
model.compile(optimizer='SGD', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Summary
model.summary()

In [None]:
# Step 4 : Training the model
history = model.fit(x_train, y_train, batch_size = 64, epochs = 5, validation_data = (x_test, y_test))

Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 76ms/step - accuracy: 0.4965 - loss: 0.6976 - val_accuracy: 0.5096 - val_loss: 0.6933
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 64ms/step - accuracy: 0.5207 - loss: 0.6918 - val_accuracy: 0.5014 - val_loss: 0.6960
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 61ms/step - accuracy: 0.5297 - loss: 0.6898 - val_accuracy: 0.5420 - val_loss: 0.6883
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 73ms/step - accuracy: 0.5555 - loss: 0.6857 - val_accuracy: 0.5647 - val_loss: 0.6823
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 64ms/step - accuracy: 0.5729 - loss: 0.6786 - val_accuracy: 0.5233 - val_loss: 0.6918


In [None]:
# Step 5 -> Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy :", test_acc)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 13ms/step - accuracy: 0.5286 - loss: 0.6886
Test Accuracy : 0.5233200192451477


In [None]:
# Step 6 -> Prediction
sample_review = "This movie was fantastic! I loved every minute of it."

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
tokenizer = imdb.get_word_index()

# Tokenize the input text
sample_review_tokens = [tokenizer.get(word, 0) for word in sample_review.lower().split()]
sample_review_tokens_padded = pad_sequences([sample_review_tokens], maxlen = max_len)

# Making the predictions
prediction = model.predict([sample_review_tokens_padded])
print(f"Predicted Sentiment: {'Positive' if prediction > 0.5 else 'Negative'}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Predicted Sentiment: Positive
