In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from sklearn.model_selection import train_test_split

df = pd.read_csv("/content/movie.csv")
texts = df['text'].astype(str).tolist()
labels = df['label'].astype(int).tolist()


vocab_size = 5000
maxlen = 100
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=maxlen)


X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=64, input_length=maxlen),
    LSTM(64, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(np.array(X_train), np.array(y_train), epochs=5, batch_size=32, validation_split=0.1)

loss, accuracy = model.evaluate(np.array(X_test), np.array(y_test))
print(f"Test Accuracy: {accuracy:.2f}")


Epoch 1/5




[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 72ms/step - accuracy: 0.7170 - loss: 0.5338 - val_accuracy: 0.8344 - val_loss: 0.3803
Epoch 2/5
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 70ms/step - accuracy: 0.8561 - loss: 0.3392 - val_accuracy: 0.8450 - val_loss: 0.3786
Epoch 3/5
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 69ms/step - accuracy: 0.8766 - loss: 0.2983 - val_accuracy: 0.8534 - val_loss: 0.3524
Epoch 4/5
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 69ms/step - accuracy: 0.8949 - loss: 0.2619 - val_accuracy: 0.8478 - val_loss: 0.3575
Epoch 5/5
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 70ms/step - accuracy: 0.9064 - loss: 0.2311 - val_accuracy: 0.8562 - val_loss: 0.3619
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.8537 - loss: 0.3643
Test Accuracy: 0.86
