<a href="https://colab.research.google.com/github/rahul-bellam/nlp-lab/blob/main/lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install tensorflow



In [5]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense

In [6]:

# Load Dataset
df = pd.read_csv("IMDB Dataset.csv")

# Keep 1.1K Positive & 1.1K Negative Reviews
df_positive = df[df['sentiment'] == 'positive'].sample(n=1100, random_state=1)
df_negative = df[df['sentiment'] == 'negative'].sample(n=1100, random_state=1)
df = pd.concat([df_positive, df_negative]).sample(frac=1, random_state=1)

df['sentiment'] = df['sentiment'].map({'negative': 0, 'positive': 1})

# Train-Test Split
train_data = df.iloc[:2000]
test_data = df.iloc[2000:]


In [7]:
X_train, y_train = train_data['review'].values, train_data['sentiment'].values
X_test, y_test = test_data['review'].values, test_data['sentiment'].values

In [8]:
# Tokenization & Padding
vocab_size = 10000
max_length = 500
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)


X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_seq, maxlen=max_length, padding='post', truncating='post')
X_test_padded = pad_sequences(X_test_seq, maxlen=max_length, padding='post', truncating='post')


In [9]:
# Convert labels to numpy arrays (if not already)
y_train = np.array(y_train)
y_test = np.array(y_test)


In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense


embedding_dim = 100
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim))
model.add(SimpleRNN(100))
model.add(Dense(1, activation='sigmoid'))

# Explicitly build the model so summary shows parameters
model.build(input_shape=(None, max_length))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

None


In [11]:
model.fit(X_train_padded, y_train, epochs=8, batch_size=128)


Epoch 1/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 407ms/step - accuracy: 0.4975 - loss: 0.7120
Epoch 2/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 341ms/step - accuracy: 0.5493 - loss: 0.6774
Epoch 3/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 369ms/step - accuracy: 0.5619 - loss: 0.6634
Epoch 4/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 328ms/step - accuracy: 0.5552 - loss: 0.6626
Epoch 5/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 343ms/step - accuracy: 0.5630 - loss: 0.6589
Epoch 6/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 335ms/step - accuracy: 0.5369 - loss: 0.6510
Epoch 7/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 355ms/step - accuracy: 0.5777 - loss: 0.6336
Epoch 8/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 403ms/step - accuracy: 0.5456 - loss: 0.7033


<keras.src.callbacks.history.History at 0x795fcc1a7850>

In [12]:
model.save('my_rnn_model.h5')




In [13]:
scores = model.evaluate(X_test_padded, y_test, verbose=0)
print("Test Accuracy: %.2f%%" % (scores[1]*100))

Test Accuracy: 47.50%


In [14]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

new_review = "The movie was bad!"
new_seq = tokenizer.texts_to_sequences([new_review])
new_padded = pad_sequences(new_seq, maxlen=max_length, padding='post', truncating='post')
prediction = model.predict(new_padded)
print("Predicted Sentiment:", "Positive" if prediction[0] >= 0.5 else "Negative")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 191ms/step
Predicted Sentiment: Negative


In [16]:
from tensorflow.keras.layers import Embedding, LSTM, Dense
max_review_length = 500
embedding_dim = 100
model = Sequential()

model.add(Embedding(vocab_size, embedding_dim, input_length=max_review_length))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
model.build(input_shape=(None, max_review_length))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())




None


In [17]:
model.fit(X_train_padded, y_train, epochs=8, batch_size=128)



Epoch 1/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 1s/step - accuracy: 0.4802 - loss: 0.6944
Epoch 2/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1s/step - accuracy: 0.5344 - loss: 0.6924
Epoch 3/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 1s/step - accuracy: 0.5223 - loss: 0.6919
Epoch 4/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 1s/step - accuracy: 0.5399 - loss: 0.6898
Epoch 5/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1s/step - accuracy: 0.5156 - loss: 0.6859
Epoch 6/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.5451 - loss: 0.6678
Epoch 7/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1s/step - accuracy: 0.5358 - loss: 0.6684
Epoch 8/8
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 1s/step - accuracy: 0.5310 - loss: 0.6631


<keras.src.callbacks.history.History at 0x795fcb226110>

In [18]:
scores = model.evaluate(X_test_padded, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 46.00%


In [19]:
model.save("LSTM.h5")



In [20]:
from tensorflow.keras.models import Sequential, load_model
loaded_model = load_model('LSTM.h5')


new_review = "The movie was utterly bad "

new_seq = tokenizer.texts_to_sequences([new_review])

new_padded = pad_sequences(new_seq, maxlen=max_review_length, padding='post', truncating='post')


prediction = loaded_model.predict(new_padded)
print("Predicted Sentiment:", "Positive" if prediction[0] >= 0.5 else "Negative")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 282ms/step
Predicted Sentiment: Negative
