In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


In [5]:
# 1. Load dataset
df = pd.read_csv('IMDB Dataset.csv')

In [7]:
# 2. Convert labels: positive -> 1, negative -> 0
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})

In [9]:
# 3. Extract text and labels
texts = df['review'].astype(str).tolist()
labels = df['sentiment'].astype(int).tolist()

In [11]:
# 4. Tokenize text
max_words = 10000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)

In [12]:
# 5. Convert texts to binary matrix (bag of words model)
x_data = tokenizer.texts_to_matrix(texts, mode='binary')
y_data = np.array(labels).astype('float32')

In [13]:
# 6. Split into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3, random_state=42)

In [15]:
# 7. Build model
model = Sequential()
model.add(Dense(16, activation='relu', input_shape=(max_words,)))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
# 8. Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [17]:
# 9. Train model
history = model.fit(x_train, y_train, epochs=10, batch_size=512, validation_split=0.2, verbose=1)

Epoch 1/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/step - accuracy: 0.7254 - loss: 0.5726 - val_accuracy: 0.8906 - val_loss: 0.2967
Epoch 2/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9135 - loss: 0.2469 - val_accuracy: 0.8951 - val_loss: 0.2662
Epoch 3/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.9379 - loss: 0.1775 - val_accuracy: 0.8929 - val_loss: 0.2738
Epoch 4/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9522 - loss: 0.1423 - val_accuracy: 0.8903 - val_loss: 0.2947
Epoch 5/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9640 - loss: 0.1174 - val_accuracy: 0.8857 - val_loss: 0.3231
Epoch 6/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9693 - loss: 0.0976 - val_accuracy: 0.8814 - val_loss: 0.3520
Epoch 7/10
[1m55/55[0m [32m━━━━

In [18]:
# 10. Evaluate
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"\nTest Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")

[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8650 - loss: 0.5079

Test Accuracy: 0.8681, Test Loss: 0.5108


In [20]:
# 11. Predict on test set
predictions = model.predict(x_test)

[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [22]:
# 12. Show sample predictions
for i in range(10):
    predicted_label = 1 if predictions[i] >= 0.5 else 0
    print(f"Review {i+1}: Predicted = {predicted_label}, Actual = {int(y_test[i])}")

Review 1: Predicted = 1, Actual = 1
Review 2: Predicted = 1, Actual = 1
Review 3: Predicted = 0, Actual = 0
Review 4: Predicted = 1, Actual = 1
Review 5: Predicted = 0, Actual = 0
Review 6: Predicted = 1, Actual = 1
Review 7: Predicted = 1, Actual = 1
Review 8: Predicted = 0, Actual = 1
Review 9: Predicted = 0, Actual = 0
Review 10: Predicted = 0, Actual = 0
