In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [13]:
df=pd.read_csv("imdb_10K_sentimnets_reviews.csv")
df.head()

Unnamed: 0,review,sentiment
0,"Okay, I know this does'nt project India in a g...",1
1,Despite John Travolta's statements in intervie...,0
2,"I am a kung fu fan, but not a Woo fan. I have ...",1
3,He seems to be a control freak. I have heard h...,0
4,"Admittedly, there are some scenes in this movi...",1


In [14]:
df.shape

(10000, 2)

In [15]:
#counting pos and neg reviews
df["sentiment"].value_counts()

Unnamed: 0_level_0,count
sentiment,Unnamed: 1_level_1
1,5037
0,4963


In [16]:
#spliting data
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

In [19]:

# Tokenize text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(train_data["review"])
X_train = pad_sequences(tokenizer.texts_to_sequences(train_data["review"]), maxlen=200)
X_test = pad_sequences(tokenizer.texts_to_sequences(test_data["review"]), maxlen=200)

In [20]:
#training
Y_train = train_data["sentiment"]
Y_test = test_data["sentiment"]

In [21]:
print(Y_train)

9254    1
1561    0
1670    1
6087    1
6669    0
       ..
5734    0
5191    0
5390    1
860     1
7270    1
Name: sentiment, Length: 8000, dtype: int64


In [22]:
# building the model
model = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=200),
    LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation="sigmoid")
])



In [23]:
model.build(input_shape=(None, 200))

model.summary()

In [24]:
#compiling
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

In [25]:
model.fit(X_train, Y_train, epochs=5, batch_size=64, validation_split=0.2)

Epoch 1/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 657ms/step - accuracy: 0.5759 - loss: 0.6718 - val_accuracy: 0.7944 - val_loss: 0.4618
Epoch 2/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 647ms/step - accuracy: 0.8316 - loss: 0.4066 - val_accuracy: 0.8225 - val_loss: 0.4055
Epoch 3/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 640ms/step - accuracy: 0.8782 - loss: 0.3013 - val_accuracy: 0.8150 - val_loss: 0.4177
Epoch 4/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 626ms/step - accuracy: 0.8954 - loss: 0.2604 - val_accuracy: 0.8131 - val_loss: 0.4337
Epoch 5/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 627ms/step - accuracy: 0.9297 - loss: 0.1952 - val_accuracy: 0.8056 - val_loss: 0.4910


<keras.src.callbacks.history.History at 0x7a4ce7c583b0>

In [26]:
#evaluating the model
loss,accuracy=model.evaluate(X_test,Y_test)
print("loss",loss)
print("accuracy",accuracy)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - accuracy: 0.7939 - loss: 0.5330
loss 0.5070149302482605
accuracy 0.8029999732971191


In [27]:
# a function to predict the sentiments
def sentimentpred(review):
  #tokenizing the review text to sequences
  sequence = tokenizer.texts_to_sequences([review])
  padded_sequence = pad_sequences(sequence, maxlen=200)
  pred = model.predict(padded_sequence)
  sentiment = "positive" if pred[0][0] > 0.5 else "negative"
  return sentiment

In [28]:
new_review = "It was really long and boring, nothing special."
sentiment = sentimentpred(new_review)
print(f"The sentiment of the review is: {sentiment}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 755ms/step
The sentiment of the review is: negative


In [29]:
new_review = "AMAZING. we really enjoyed this film"
sentiment = sentimentpred(new_review)
print(f"The sentiment of the review is: {sentiment}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
The sentiment of the review is: negative


In [30]:
new_review = "what was that? was it suppose to be horror..."
sentiment = sentimentpred(new_review)
print(f"The sentiment of the review is: {sentiment}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
The sentiment of the review is: negative
