In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd

# Example data
data = pd.DataFrame({
    'review': [
        "One of the other reviewers has mentioned that ...",
        "A wonderful little production. <br /><br />The...",
        "I thought this was a wonderful way to spend ti...",
        "Basically there's a family where a little boy ...",
        "Petter Mattei's 'Love in the Time of Money' is..."
    ],
    'sentiment': ['positive', 'positive', 'positive', 'negative', 'positive']
})

# Label encoding for sentiment
label_encoder = LabelEncoder()
data['sentiment_encoded'] = label_encoder.fit_transform(data['sentiment'])

# Splitting data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(
    data['review'], data['sentiment_encoded'], test_size=0.2, random_state=42)

# Initialize a tokenizer and fit it on the training data
num_words = 10000  # Keeping the top 10,000 words
tokenizer = keras.preprocessing.text.Tokenizer(num_words=num_words)

# Fit the tokenizer on the training data
tokenizer.fit_on_texts(x_train)

# Convert text data to sequences of integers
x_train_sequences = tokenizer.texts_to_sequences(x_train)
x_test_sequences = tokenizer.texts_to_sequences(x_test)

# Pad the sequences to a consistent length
max_len = 500
x_train_padded = keras.preprocessing.sequence.pad_sequences(x_train_sequences, maxlen=max_len)
x_test_padded = keras.preprocessing.sequence.pad_sequences(x_test_sequences, maxlen=max_len)

# Build the model
model = keras.Sequential([
    layers.Embedding(input_dim=num_words, output_dim=16, input_length=max_len),  # Embedding layer
    layers.GlobalAveragePooling1D(),  # Global average pooling
    layers.Dense(16, activation='relu'),  # Dense hidden layer
    layers.Dense(1, activation='sigmoid')  # Output layer with sigmoid for binary classification
])

# Compile the model with Adam optimizer, binary cross-entropy loss, and accuracy metric
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model with validation data
model.fit(x_train_padded, y_train, epochs=10, batch_size=512, validation_data=(x_test_padded, y_test), verbose=1)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(x_test_padded, y_test)
print("Test accuracy:", test_acc)

# Example: Make predictions with the model
# Create new sample data (must be tokenized and padded like the training data)
new_review = ["I really enjoyed the movie. It was fantastic!"]  # Example text review
new_review_sequences = tokenizer.texts_to_sequences(new_review)
new_review_padded = keras.preprocessing.sequence.pad_sequences(new_review_sequences, maxlen=max_len)

# Make a prediction
prediction = model.predict(new_review_padded)

# Display the prediction (a value close to 0 means negative, close to 1 means positive)
print("Prediction:", prediction)


Epoch 1/10




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.7500 - loss: 0.6925 - val_accuracy: 1.0000 - val_loss: 0.6556
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.7500 - loss: 0.6749 - val_accuracy: 1.0000 - val_loss: 0.6178
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.7500 - loss: 0.6573 - val_accuracy: 1.0000 - val_loss: 0.5825
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.7500 - loss: 0.6417 - val_accuracy: 1.0000 - val_loss: 0.5490
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.7500 - loss: 0.6277 - val_accuracy: 1.0000 - val_loss: 0.5182
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.7500 - loss: 0.6156 - val_accuracy: 1.0000 - val_loss: 0.4882
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[