<a href="https://colab.research.google.com/github/pavankumar2594/sentiment/blob/main/sentiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

# PARAMETERS
max_words = 10000   # Maximum number of words to consider in the vocabulary
max_len = 200       # Maximum review length (in words)
embedding_dim = 128
batch_size = 64
epochs = 8
data = pd.read_csv("/content/IMDB Dataset.csv")

data['sentiment'] = data['sentiment'].map({'negative': 0, 'positive': 1})

# Split dataset
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Tokenize the text
tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
tokenizer.fit_on_texts(train_data['review'])

# Convert texts to sequences of integers
X_train = tokenizer.texts_to_sequences(train_data['review'])
X_test = tokenizer.texts_to_sequences(test_data['review'])

# Pad sequences so that all inputs have the same length
X_train = pad_sequences(X_train, maxlen=max_len, padding='post', truncating='post')
X_test = pad_sequences(X_test, maxlen=max_len, padding='post', truncating='post')

# Extract labels
y_train = train_data['sentiment'].values
y_test = test_data['sentiment'].values

# Build the LSTM model
model = Sequential([
    Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_len),
    LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
model.summary()

#  to avoid overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=epochs,
    batch_size=batch_size,
    callbacks=[early_stop]
)

# Evaluate the model on test data
loss, accuracy = model.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {accuracy*100:.2f}%")
def predict_review(review_text):
    # Convert the input text into a sequence of integers using the tokenizer
    sequence = tokenizer.texts_to_sequences([review_text])
    # Pad the sequence to match the length used during training
    padded_sequence = pad_sequences(sequence, maxlen=max_len, padding='post', truncating='post')
    # Predict the sentiment (model output is probability from sigmoid, thresholded at 0.5)
    prediction = model.predict(padded_sequence)
    sentiment = "Positive" if prediction[0][0] > 0.5 else "Negative"
    return sentiment

# user input
user_input = input("\nEnter a movie review to test: ")
result_sentiment = predict_review(user_input)
print(f"The review is {result_sentiment}.")




Epoch 1/8
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m331s[0m 653ms/step - accuracy: 0.5091 - loss: 0.6938 - val_accuracy: 0.5257 - val_loss: 0.6888
Epoch 2/8
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 639ms/step - accuracy: 0.5724 - loss: 0.6735 - val_accuracy: 0.5851 - val_loss: 0.6636
Epoch 3/8
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m323s[0m 641ms/step - accuracy: 0.6023 - loss: 0.6566 - val_accuracy: 0.5924 - val_loss: 0.6488
Epoch 4/8
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 637ms/step - accuracy: 0.6801 - loss: 0.5794 - val_accuracy: 0.8023 - val_loss: 0.4561
Epoch 5/8
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m321s[0m 643ms/step - accuracy: 0.8482 - loss: 0.3698 - val_accuracy: 0.8529 - val_loss: 0.3615
Epoch 6/8
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m321s[0m 640ms/step - accuracy: 0.8908 - loss: 0.2783 - val_accuracy: 0.8585 - val_loss: 0.3314
Epoch 7/8
