In [1]:
pip install tensorflow numpy pandas scikit-learn




In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('/content/IMDB Dataset.csv', engine='python', on_bad_lines='skip')

print("Label type before conversion:", df['sentiment'].dtype)

df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})

print("Label type after conversion:", df['sentiment'].dtype)

texts = df['review'].values
labels = df['sentiment'].values

X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)


Label type before conversion: object
Label type after conversion: int64


In [13]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

vocabulary_size = 5000
max_words = 500

tokenizer = Tokenizer(num_words=vocabulary_size)
tokenizer.fit_on_texts(X_train)
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)

X_train = pad_sequences(X_train, maxlen=max_words)
X_test = pad_sequences(X_test, maxlen=max_words)


In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

model = Sequential([
    Embedding(input_dim=vocabulary_size, output_dim=128),
    LSTM(50, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=64)


Epoch 1/3
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m559s[0m 876ms/step - accuracy: 0.7413 - loss: 0.5094 - val_accuracy: 0.8518 - val_loss: 0.3571
Epoch 2/3
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m558s[0m 869ms/step - accuracy: 0.8424 - loss: 0.3717 - val_accuracy: 0.8454 - val_loss: 0.3685
Epoch 3/3
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m563s[0m 872ms/step - accuracy: 0.8604 - loss: 0.3440 - val_accuracy: 0.8724 - val_loss: 0.3159


<keras.src.callbacks.history.History at 0x7f3e863a4b20>

In [16]:
scores = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {scores[1]*100:.2f}%")


[1m318/318[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 204ms/step - accuracy: 0.8693 - loss: 0.3197
Test Accuracy: 87.24%


In [17]:
def preprocess_text(text, tokenizer, max_words=500):
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_words)
    return padded


In [18]:
def predict_sentiment(text, model, tokenizer):
    # Preprocess the text
    processed_text = preprocess_text(text, tokenizer)

    # Make prediction
    prediction = model.predict(processed_text)

    # Convert prediction to label
    predicted_label = 'Positive' if prediction[0][0] > 0.5 else 'Negative'
    return predicted_label


In [43]:
sample_text = "John Garfield plays a Marine who is blinded by a grenade while fighting on Guadalcanal and who has to learn to live with his disability. He has all the stereotypical notions about blindness, and is sure he'll be a burden to everyone. The hospital staff and his fellow wounded Marines can't get through to him. Neither can his girl back home played by Eleanor Parker. He's stubborn and blinded by his own fears, self pity, and prejudices. It's a complex role that Garfield carries off memorably in a great performance that keeps one watching in spite of the ever present syrupy melodrama. The best scenes are on Guadalcanal, where he's in a machine gun nest trying to fend off the advancing Japanese soldiers in a hellish looking night time battle, and later a dream sequence in the hospital where he sees himself walking down a train platform with a white cane, dark glasses, and holding out a tin cup, all the while his girlfriend walks backward away from the camera"

sentiment = predict_sentiment(sample_text, model, tokenizer)
print(f"The sentiment of the review is: {sentiment}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
The sentiment of the review is: Positive
