In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample data
data = pd.DataFrame({
    'text': ['I love this product', 'This is terrible', 'Amazing experience', 'I am not happy', 'Fantastic service'],
    'sentiment': [1, 0, 1, 0, 1]  # 1 for positive, 0 for negative
})

# Splitting the data
X = data['text']
y = data['sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenization
tokenizer = Tokenizer(num_words=5000)  # Set num_words to the size of your vocabulary
tokenizer.fit_on_texts(X_train)

X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

# Padding sequences
max_len = 100  # You can adjust this based on your data
X_train_padded = pad_sequences(X_train_sequences, maxlen=max_len)
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_len)


In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Model parameters
embedding_dim = 100
rnn_units = 64

model = Sequential([
    Embedding(input_dim=5000, output_dim=embedding_dim, input_length=max_len),
    LSTM(rnn_units, return_sequences=False),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])




In [3]:
history = model.fit(
    X_train_padded, y_train,
    epochs=5,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)


Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.0000e+00 - loss: 0.7020 - val_accuracy: 0.0000e+00 - val_loss: 0.7117
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step - accuracy: 1.0000 - loss: 0.6676 - val_accuracy: 0.0000e+00 - val_loss: 0.7323
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - accuracy: 1.0000 - loss: 0.6497 - val_accuracy: 0.0000e+00 - val_loss: 0.7561
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step - accuracy: 1.0000 - loss: 0.6055 - val_accuracy: 0.0000e+00 - val_loss: 0.7843
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - accuracy: 1.0000 - loss: 0.5663 - val_accuracy: 0.0000e+00 - val_loss: 0.8190


In [4]:
loss, accuracy = model.evaluate(X_test_padded, y_test)
print(f"Test Accuracy: {accuracy:.4f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.0000e+00 - loss: 0.8967
Test Accuracy: 0.0000


In [7]:
new_texts = ["I feel bad about this!", "This is the good purchase ever."]
new_sequences = tokenizer.texts_to_sequences(new_texts)
new_padded = pad_sequences(new_sequences, maxlen=max_len)

predictions = model.predict(new_padded)
print(predictions)  # Outputs probabilities of the positive class


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[[0.5823576 ]
 [0.59209865]]
