In [None]:
# 6. Sentiment analysis using LSTM network or GRU.

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, LSTM

In [2]:
# Load dataset
data = pd.read_csv(r'C:\Users\User\Desktop\Datasets\Data_assig_6\IMDB Dataset.csv')

In [3]:
# Preprocess the text
data['review'] = data['review'].str.replace('<.*?>', '', regex=True)  # Remove HTML tags
data['review'] = data['review'].str.replace('[^A-Za-z]', ' ', regex=True)  # Remove non-alphabetical characters
data['review'] = data['review'].str.lower()  # Convert to lowercase
data['sentiment'] = data['sentiment'].map({'positive': 1, 'negative': 0})  # Map sentiments to binary values


In [4]:
# Split the dataset
x_train, x_test, y_train, y_test = train_test_split(data['review'], data['sentiment'], test_size=0.2, random_state=42)

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(x_train)

x_train = pad_sequences(tokenizer.texts_to_sequences(x_train), maxlen=100)
x_test = pad_sequences(tokenizer.texts_to_sequences(x_test), maxlen=100)


In [5]:
# Build the model
model = Sequential([
    Embedding(len(tokenizer.word_index) + 1, 32, input_length=100),
    LSTM(64),
    Dense(1, activation="sigmoid")
])

# Compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])




In [6]:
# Train the model
model.fit(x_train, np.array(y_train), epochs=3, batch_size=128)


Epoch 1/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 92ms/step - accuracy: 0.7122 - loss: 0.5310
Epoch 2/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 95ms/step - accuracy: 0.9101 - loss: 0.2315
Epoch 3/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 93ms/step - accuracy: 0.9481 - loss: 0.1451


<keras.src.callbacks.history.History at 0x2934cd09010>

In [7]:
# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, np.array(y_test))
print(f"Accuracy: {test_acc*100:.2f}%")


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.8460 - loss: 0.3841
Accuracy: 85.03%


In [9]:
# Function to predict sentiment
def predict_sentiment(review):
    review = review.lower()
    review = ''.join([c if c.isalpha() or c.isspace() else ' ' for c in review])
    sequence = tokenizer.texts_to_sequences([review])
    padded_sequence = pad_sequences(sequence, maxlen=100)
    result = model.predict(padded_sequence)[0][0]
    sentiment = 'positive' if result >= 0.5 else 'negative'
    print(f"Review: {review}\nPredicted Sentiment: {sentiment}")


In [10]:
# Test the prediction in a loop
while True:
    user_review = input("Enter a movie review (or type 'exit' to quit): ")
    if user_review.lower() == 'exit':
        break
    predict_sentiment(user_review)


Enter a movie review (or type 'exit' to quit):  nice movie, but can be better


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 265ms/step
Review: nice movie  but can be better
Predicted Sentiment: positive


Enter a movie review (or type 'exit' to quit):  exit
