Without Dataset

In [3]:
# =============================
# Sentiment Analysis using LSTM
# Train on 5 statements (tiny dataset)
# Test on 1 new statement
# =============================

import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# -------------------------
# 1) Training Sentences (5 samples)
# -------------------------
train_sentences = [
    "I love this movie",       # positive
    "The food was excellent",  # positive
    "This is a terrible product", # negative
    "I hate the service",      # negative
    "The experience was good"  # positive
]

train_labels = np.array([1,1,0,0,1])  # 1=Positive, 0=Negative

# -------------------------
# 2) Tokenize and pad data
# -------------------------
tokenizer = Tokenizer(num_words=100, oov_token="<UNK>")
tokenizer.fit_on_texts(train_sentences)

train_sequences = tokenizer.texts_to_sequences(train_sentences)
train_padded = pad_sequences(train_sequences, maxlen=6, padding='post')

# -------------------------
# 3) Model (LSTM based)
# -------------------------
model = Sequential([
    Embedding(input_dim=100, output_dim=16, input_length=6),
    LSTM(16),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# -------------------------
# 4) Train
# -------------------------
model.fit(train_padded, train_labels, epochs=20, verbose=1)

# -------------------------
# 5) Testing on a new sentence
# -------------------------
test_sentence = ["The movie was bad"]   # new unseen input
test_seq = tokenizer.texts_to_sequences(test_sentence)
test_pad = pad_sequences(test_seq, maxlen=6, padding='post')

prediction = model.predict(test_pad)[0][0]
sentiment = "Positive" if prediction > 0.5 else "Negative"

print("\nTest Sentence: ", test_sentence[0])
print("Predicted Sentiment:", sentiment, "| Probability:", round(float(prediction),4))


Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.6000 - loss: 0.6908
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step - accuracy: 0.6000 - loss: 0.6897
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.6000 - loss: 0.6886
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step - accuracy: 0.6000 - loss: 0.6874
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - accuracy: 0.6000 - loss: 0.6863
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - accuracy: 0.6000 - loss: 0.6851
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.6000 - loss: 0.6839
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - accuracy: 0.6000 - loss: 0.6827
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [4]:
# =============================
# Sentiment Analysis using LSTM
# Dataset from CSV (text + label)
# Train on dataset, test on 1 new sentence
# =============================

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from sklearn.model_selection import train_test_split

# -------------------------
# 1) Load dataset from CSV
# -------------------------
# Make sure sentiment_data.csv is in the same folder as your notebook
df = pd.read_csv("Book1.csv")

# Columns: 'text' and 'label'
texts = df['text'].astype(str).values
labels = df['label'].values  # should be 0/1

print("Total samples:", len(texts))

# -------------------------
# 2) Train-test split
# -------------------------
x_train_texts, x_test_texts, y_train, y_test = train_test_split(
    texts, labels, test_size=0.2, random_state=42, stratify=labels
)

# -------------------------
# 3) Tokenize and pad text
# -------------------------
max_words = 1000   # vocab size
max_len = 10       # max words per sentence (keep small for demo)

tokenizer = Tokenizer(num_words=max_words, oov_token="<UNK>")
tokenizer.fit_on_texts(x_train_texts)

x_train_seq = tokenizer.texts_to_sequences(x_train_texts)
x_test_seq  = tokenizer.texts_to_sequences(x_test_texts)

x_train_padded = pad_sequences(x_train_seq, maxlen=max_len, padding='post', truncating='post')
x_test_padded  = pad_sequences(x_test_seq,  maxlen=max_len, padding='post', truncating='post')

# -------------------------
# 4) Build LSTM model
# -------------------------
model = Sequential([
    Embedding(input_dim=max_words, output_dim=16, input_length=max_len),
    LSTM(16),
    Dense(1, activation='sigmoid')
])

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()

# -------------------------
# 5) Train model
# -------------------------
model.fit(
    x_train_padded,
    y_train,
    epochs=20,
    batch_size=4,
    verbose=1,
    validation_data=(x_test_padded, y_test)
)

# -------------------------
# 6) Evaluate on test set
# -------------------------
test_loss, test_acc = model.evaluate(x_test_padded, y_test, verbose=0)
print("\nTest accuracy:", round(float(test_acc), 4))
print("Test loss    :", round(float(test_loss), 4))

# -------------------------
# 7) Test on 1 new (unseen) sentence
# -------------------------
new_sentence = "The product was really bad"  # not present in dataset
new_seq = tokenizer.texts_to_sequences([new_sentence])
new_pad = pad_sequences(new_seq, maxlen=max_len, padding='post', truncating='post')

pred_prob = model.predict(new_pad)[0][0]
pred_label = "Positive" if pred_prob > 0.5 else "Negative"

print("\nNew sentence:", new_sentence)
print("Predicted sentiment:", pred_label, "| Probability:", round(float(pred_prob), 4))


Total samples: 10




Epoch 1/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 645ms/step - accuracy: 0.2500 - loss: 0.6950 - val_accuracy: 0.5000 - val_loss: 0.6934
Epoch 2/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step - accuracy: 0.3750 - loss: 0.6937 - val_accuracy: 0.5000 - val_loss: 0.6934
Epoch 3/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step - accuracy: 0.5000 - loss: 0.6930 - val_accuracy: 0.5000 - val_loss: 0.6935
Epoch 4/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step - accuracy: 0.6250 - loss: 0.6927 - val_accuracy: 0.5000 - val_loss: 0.6937
Epoch 5/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - accuracy: 0.6250 - loss: 0.6925 - val_accuracy: 0.5000 - val_loss: 0.6938
Epoch 6/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step - accuracy: 0.7500 - loss: 0.6922 - val_accuracy: 0.5000 - val_loss: 0.6940
Epoch 7/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━