<a href="https://colab.research.google.com/github/ravi-3690/ML-WORKSHOP-PROJECTS/blob/main/RNN(imdb)(26nov).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [None]:
#load IMDB dataset
vocab_size = 10000  #Only consider the top 10,000 words
max_length = 500    #Maximum length of each sequences
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
#preprocess Data: Pad sequences to make them all the same length
x_train = pad_sequences(X_train, maxlen=max_length)
x_test = pad_sequences(X_test, maxlen=max_length)

In [None]:
#define the model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=32, input_length=max_length),
    SimpleRNN(units=32, return_sequences=False),    # RNN layer
    Dense(1, activation='sigmoid')     # Output Layer
])



In [None]:
#Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
#Train the model
print("Training the model...")
model.fit(x_train, y_train, epochs=3, batch_size=64, validation_split=0.2)

Training the model...
Epoch 1/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 154ms/step - accuracy: 0.6077 - loss: 0.6395 - val_accuracy: 0.8126 - val_loss: 0.4349
Epoch 2/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 149ms/step - accuracy: 0.8457 - loss: 0.3663 - val_accuracy: 0.8478 - val_loss: 0.3609
Epoch 3/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 146ms/step - accuracy: 0.9075 - loss: 0.2378 - val_accuracy: 0.7988 - val_loss: 0.4324


<keras.src.callbacks.history.History at 0x7e31e2f5bc70>

In [None]:
#Evaluate the model
print("\nEvaluating the model...")
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Evaluating the model...
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 30ms/step - accuracy: 0.8021 - loss: 0.4301
Test Accuracy: 80.49%


In [None]:
#Test with a customer input
#Decode IMDB word index
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}
def decode_review(sequence):
    return " ".join([reverse_word_index.get(i - 3, '?') for i in sequence])

test_review = x_test[0]
test_review_padded = pad_sequences([test_review], maxlen=max_length)
prediction = model.predict(test_review_padded)
print("Sentiment Prediction:", "Positive" if prediction > 0.5 else "Negative")

#print("\nDecoded Review:", )

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 192ms/step
Sentiment Prediction: Positive


In [None]:
#step 1: Load the imdb word index
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}


In [None]:
#step 2: Preprocess the customer review
def preprocess_review(review_text):
  #Convert the review to lowercase and split into words
  words = review_text.lower().split()
  #Convert words to integers using the IMDB word index
  tokenized_review = [word_index.get(word, 2) for word in words]   #Use

  # Pad the tokenized sequence
  return pad_sequences([tokenized_review], maxlen=max_length)



In [None]:
#step 3: TEst with custom input
custom_review = "This movie was terrible!, with poor acting and a boring story."
processed_review = preprocess_review(custom_review)

#Predict the semtiment
prediction = model.predict(processed_review)
print("\nCustom review:", custom_review)
print("Processed review:", processed_review)
print("Sentiment prediction:", "Positive" if prediction> 0.5 e)