In [3]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense


vocab_size = 10000
max_length = 100
embedding_dim = 32

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Preprocess data: Padding sequences to make them of equal length
x_train = pad_sequences(x_train, maxlen=max_length)
x_test = pad_sequences(x_test, maxlen=max_length)

# creat recurrent neural netwrok model. Recurrent NNs are typically used in natural language processing 
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    LSTM(units=64, return_sequences=False),
    Dense(1, activation='sigmoid')  # Binary classification (positive/negative)
])

#compile
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


batch_size = 64
epochs = 5

model.fit(x_train, y_train, validation_split=0.2, batch_size=batch_size, epochs=epochs)

#evaluate
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

Epoch 1/5




[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 34ms/step - accuracy: 0.6797 - loss: 0.5603 - val_accuracy: 0.8366 - val_loss: 0.3672
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 31ms/step - accuracy: 0.8955 - loss: 0.2649 - val_accuracy: 0.8468 - val_loss: 0.3548
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 31ms/step - accuracy: 0.9231 - loss: 0.2131 - val_accuracy: 0.8364 - val_loss: 0.4366
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 31ms/step - accuracy: 0.9389 - loss: 0.1697 - val_accuracy: 0.8316 - val_loss: 0.4939
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 34ms/step - accuracy: 0.9608 - loss: 0.1160 - val_accuracy: 0.8350 - val_loss: 0.5378
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.8261 - loss: 0.5658
Test Loss: 0.5608222484588623
Test Accuracy: 0.826479971408844


In [47]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# input your message right here
new_message = ["INPUT MESSAGE HERE"]

#make tokenizer 
tokenizer = imdb.get_word_index()  # Use IMDB word index
reverse_word_index = {value: key for key, value in tokenizer.items()}
tokenizer = {key: value + 3 for key, value in tokenizer.items()}  # Offset for special tokens
tokenizer["<PAD>"] = 0
tokenizer["<START>"] = 1
tokenizer["<UNK>"] = 2
tokenizer["<UNUSED>"] = 3


tokenized_message = [[tokenizer.get(word, 2) for word in message.split()] for message in new_message]

# Padding to match test data
padded_message = pad_sequences(tokenized_message, maxlen=max_length)


prediction = model.predict(padded_message)
print(f"Prediction (Probability of Positive Sentiment): {prediction[0][0]}")
print("Sentiment:", "Positive" if prediction[0][0] > 0.5 else "Negative")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Prediction (Probability of Positive Sentiment): 0.20650528371334076
Sentiment: Negative
