In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense




In [3]:
# 1) Sample Data
text_samples = [
    "i love this movie it is wonderful",
    "this film was so dull i hate it",
    "amazing movie i would watch it again",
    "absolutely boring film do not watch",
    "wonderful plot superb direction",
    "waste of money not recommended"
]
labels = np.array([1, 0, 1, 0, 1, 0])


In [4]:

# 2) Tokenizing
vocab_size = 1000
max_length = 8
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<UNK>")
tokenizer.fit_on_texts(text_samples)
sequences = tokenizer.texts_to_sequences(text_samples)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')


In [5]:

# 3) Split Data
train_sequences = padded_sequences[:4]
train_labels = labels[:4]
test_sequences = padded_sequences[4:]
test_labels = labels[4:]


In [6]:

# 4) Build Model
embedding_dim = 16
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    LSTM(32),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()




In [7]:

# 5) Train Model
epochs = 10
history = model.fit(train_sequences, train_labels,
                    validation_data=(test_sequences, test_labels),
                    epochs=epochs, verbose=2)


Epoch 1/10
1/1 - 5s - 5s/step - accuracy: 0.5000 - loss: 0.6932 - val_accuracy: 0.5000 - val_loss: 0.6920
Epoch 2/10
1/1 - 0s - 223ms/step - accuracy: 0.7500 - loss: 0.6922 - val_accuracy: 1.0000 - val_loss: 0.6919
Epoch 3/10
1/1 - 0s - 337ms/step - accuracy: 1.0000 - loss: 0.6913 - val_accuracy: 1.0000 - val_loss: 0.6917
Epoch 4/10
1/1 - 0s - 251ms/step - accuracy: 1.0000 - loss: 0.6903 - val_accuracy: 1.0000 - val_loss: 0.6916
Epoch 5/10
1/1 - 0s - 247ms/step - accuracy: 1.0000 - loss: 0.6892 - val_accuracy: 1.0000 - val_loss: 0.6914
Epoch 6/10
1/1 - 0s - 175ms/step - accuracy: 1.0000 - loss: 0.6881 - val_accuracy: 1.0000 - val_loss: 0.6912
Epoch 7/10
1/1 - 0s - 170ms/step - accuracy: 1.0000 - loss: 0.6870 - val_accuracy: 1.0000 - val_loss: 0.6910
Epoch 8/10
1/1 - 0s - 441ms/step - accuracy: 1.0000 - loss: 0.6858 - val_accuracy: 0.5000 - val_loss: 0.6908
Epoch 9/10
1/1 - 0s - 189ms/step - accuracy: 1.0000 - loss: 0.6845 - val_accuracy: 0.5000 - val_loss: 0.6906
Epoch 10/10
1/1 - 0s -

In [8]:

# 6) Evaluate Model
loss, accuracy = model.evaluate(test_sequences, test_labels, verbose=0)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


Test Loss: 0.6903
Test Accuracy: 0.5000


In [9]:

# 7) Predict on New Sentences
new_text = ["i really love this film", "do not watch this movie it is terrible"]
new_sequences = tokenizer.texts_to_sequences(new_text)
new_padded = pad_sequences(new_sequences, maxlen=max_length, padding='post', truncating='post')

predictions = model.predict(new_padded)
for text, pred in zip(new_text, predictions):
    print(f"Sentence: {text}")
    print(f"Predicted positive probability: {pred[0]:.4f}")
    print("---")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 507ms/step
Sentence: i really love this film
Predicted positive probability: 0.5015
---
Sentence: do not watch this movie it is terrible
Predicted positive probability: 0.5018
---
