<a href="https://colab.research.google.com/github/sanidhya2506/ML/blob/main/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1️⃣ - Import libraries
from tensorflow.keras import datasets, layers, models, preprocessing
from tensorflow.keras.preprocessing import sequence
import tensorflow as tf

# Step 2️⃣ - Load IMDB dataset
# It contains 50,000 movie reviews already tokenized (into numbers)
# train/test split = 25,000 each
max_features = 10000   # we'll only use the 10k most common words
maxlen = 200           # each review is truncated/padded to 200 words

(x_train, y_train), (x_test, y_test) = datasets.imdb.load_data(num_words=max_features)

print("Train data shape:", x_train.shape)
print("Test data shape:", x_test.shape)
print("Example review:", x_train[0][:10])
print("Label (0=neg, 1=pos):", y_train[0])


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Train data shape: (25000,)
Test data shape: (25000,)
Example review: [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65]
Label (0=neg, 1=pos): 1


In [2]:
# Step 3️⃣ - Pad sequences (make all reviews same length)
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

print("After padding:", x_train.shape)


After padding: (25000, 200)


In [3]:
# Step 4️⃣ - Build RNN model
model = models.Sequential([
    layers.Embedding(max_features, 128),   # convert word indices → 128-dim embeddings
    layers.SimpleRNN(64, return_sequences=False), # main RNN layer (64 memory units)
    layers.Dense(1, activation='sigmoid')  # output layer (1 neuron for binary sentiment)
])

model.summary()


In [4]:
# Step 5️⃣ - Compile model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [5]:
# Step 6️⃣ - Train the model
history = model.fit(x_train, y_train,
                    epochs=3,
                    batch_size=64,
                    validation_split=0.2)


Epoch 1/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 100ms/step - accuracy: 0.5875 - loss: 0.6456 - val_accuracy: 0.8040 - val_loss: 0.4519
Epoch 2/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 109ms/step - accuracy: 0.8480 - loss: 0.3550 - val_accuracy: 0.6332 - val_loss: 0.6547
Epoch 3/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 102ms/step - accuracy: 0.8795 - loss: 0.2972 - val_accuracy: 0.7524 - val_loss: 0.6412


In [6]:
# Step 7️⃣ - Evaluate on test data
test_loss, test_acc = model.evaluate(x_test, y_test)
print("Test Accuracy:", test_acc)


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 18ms/step - accuracy: 0.7418 - loss: 0.6403
Test Accuracy: 0.7454400062561035


In [7]:
# Step 8️⃣ - Predict new review (optional)
sample = x_test[0:1]
pred = model.predict(sample)
print("Predicted sentiment (0=neg, 1=pos):", (pred[0][0] > 0.5))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 201ms/step
Predicted sentiment (0=neg, 1=pos): False


In [8]:
print("Predicted probability of being positive:", pred[0][0])


Predicted probability of being positive: 0.41463032


In [9]:
# Step 9️⃣ - Decode the review numbers back to words
word_index = datasets.imdb.get_word_index()  # dictionary: word -> index

# Reverse the dictionary to index -> word
index_word = {index + 3: word for word, index in word_index.items()}
index_word[0] = "<PAD>"
index_word[1] = "<START>"
index_word[2] = "<UNK>"
index_word[3] = "<UNUSED>"

# Decode the first sample review
decoded_review = ' '.join([index_word.get(i, "?") for i in x_test[0]])

print("Decoded review:\n")
print(decoded_review)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Decoded review:

<PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <STAR