In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/sentiment-analysis-dataset/sentiment_analysis.csv


 # load dataset  

In [2]:
df = pd.read_csv("/kaggle/input/sentiment-analysis-dataset/sentiment_analysis.csv")  
print(df.head())


   id  label                                              tweet
0   1      0  #fingerprint #Pregnancy Test https://goo.gl/h1...
1   2      0  Finally a transparant silicon case ^^ Thanks t...
2   3      0  We love this! Would you go? #talk #makememorie...
3   4      0  I'm wired I know I'm George I was made that wa...
4   5      1  What amazing service! Apple won't even talk to...


# using simple RNN

In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Bidirectional, Dense, Dropout, Embedding, SimpleRNN
import tensorflow as tf

df.columns = ["id", "label", "text"]

# text preprocessing
df.dropna(inplace=True)  
df["text"] = df["text"].str.lower()  

# tokenization
vocab_size = 10000  
max_length = 100  

tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(df["text"])
sequences = tokenizer.texts_to_sequences(df["text"])

padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
labels = np.array(df["label"])

# split data
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# RNN model
rnn_model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=128),
    Bidirectional(SimpleRNN(128, return_sequences=True)),
    Dropout(0.3),
    Bidirectional(SimpleRNN(64)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  
])

rnn_model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0005), metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# train the model
history = rnn_model.fit(X_train, y_train, epochs=15, batch_size=32, 
                        validation_data=(X_test, y_test), callbacks=[early_stop])

# save model
rnn_model.save("sentiment_rnn_model.keras")


Epoch 1/15
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 81ms/step - accuracy: 0.7644 - loss: 0.5058 - val_accuracy: 0.8668 - val_loss: 0.3010
Epoch 2/15
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 77ms/step - accuracy: 0.9101 - loss: 0.2336 - val_accuracy: 0.8883 - val_loss: 0.2549
Epoch 3/15
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 77ms/step - accuracy: 0.9380 - loss: 0.1775 - val_accuracy: 0.8826 - val_loss: 0.3580
Epoch 4/15
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 77ms/step - accuracy: 0.9607 - loss: 0.1162 - val_accuracy: 0.8807 - val_loss: 0.3834
Epoch 5/15
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 80ms/step - accuracy: 0.9748 - loss: 0.0733 - val_accuracy: 0.8826 - val_loss: 0.3781
Epoch 6/15
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 78ms/step - accuracy: 0.9846 - loss: 0.0457 - val_accuracy: 0.8681 - val_loss: 0.5224
Epoch 7/15
[1m1

 # Evaluate the model  &  accuracy for RNN model


In [13]:
rnn_loss, rnn_accuracy = rnn_model.evaluate(X_test, y_test, verbose=2)
print(f"\nSimple RNN Test Accuracy: {rnn_accuracy:.4f}")
print(f"Simple RNN Test Loss: {rnn_loss:.4f}")


50/50 - 1s - 19ms/step - accuracy: 0.8883 - loss: 0.2549

Simple RNN Test Accuracy: 0.8883
Simple RNN Test Loss: 0.2549


# Prediction for  RNN model


In [14]:
def predict_rnn_sentiment(text):
    sequence = tokenizer.texts_to_sequences([text.lower()])
    padded = pad_sequences(sequence, maxlen=max_length, padding='post', truncating='post')
    prediction = rnn_model.predict(tf.expand_dims(padded[0], axis=0))[0][0]
    return "Negative" if prediction > 0.5 else "Positive"

# Step 16: Sample Predictions using Simple RNN
sample_texts = [
    "I love this product! It's amazing.",
    "This is the worst experience I've ever had.",
    "I am so happy with this experience",
    "This dress is so ugly",
    "The price of the dress is good"
]

for text in sample_texts:
    print(f"Text: {text} => Sentiment: {predict_rnn_sentiment(text)}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 441ms/step
Text: I love this product! It's amazing. => Sentiment: Positive
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Text: This is the worst experience I've ever had. => Sentiment: Negative
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Text: I am so happy with this experience => Sentiment: Positive
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Text: This dress is so ugly => Sentiment: Negative
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Text: The price of the dress is good => Sentiment: Positive


# using LSTM

In [3]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Bidirectional, Dense, Dropout, Embedding, LSTM
import tensorflow as tf

df.columns = ["id", "label", "text"]

# text preprocessing
df.dropna(inplace=True)  
df["text"] = df["text"].str.lower()  

# tokenization
vocab_size = 10000  
max_length = 100  
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(df["text"])
sequences = tokenizer.texts_to_sequences(df["text"])

padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
labels = np.array(df["label"])


# split Data
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)


#  LSTM model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=128),
    Bidirectional(LSTM(128, return_sequences=True)),
    Dropout(0.3),
    Bidirectional(LSTM(64)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0005), metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# train the model
history = model.fit(X_train, y_train, epochs=15, batch_size=32, 
                    validation_data=(X_test, y_test), callbacks=[early_stop])

# save model 
model.save("sentiment_rnn_model.keras")



Epoch 1/15
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 244ms/step - accuracy: 0.7789 - loss: 0.4736 - val_accuracy: 0.8870 - val_loss: 0.2682
Epoch 2/15
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 243ms/step - accuracy: 0.9224 - loss: 0.1946 - val_accuracy: 0.8990 - val_loss: 0.2376
Epoch 3/15
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 240ms/step - accuracy: 0.9566 - loss: 0.1239 - val_accuracy: 0.8876 - val_loss: 0.2762
Epoch 4/15
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 242ms/step - accuracy: 0.9710 - loss: 0.0845 - val_accuracy: 0.8712 - val_loss: 0.3407
Epoch 5/15
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 242ms/step - accuracy: 0.9752 - loss: 0.0763 - val_accuracy: 0.8813 - val_loss: 0.4521
Epoch 6/15
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 245ms/step - accuracy: 0.9880 - loss: 0.0415 - val_accuracy: 0.8782 - val_loss: 0.4589
Epoch 7/15

 # Evaluate the model  &  accuracy for LSTM model


In [6]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}") 
print(f"Test Loss: {loss:.4f}")          

[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 69ms/step - accuracy: 0.9019 - loss: 0.2297
Test Accuracy: 0.8990
Test Loss: 0.2376


# Prediction for  LSTM model
 

In [5]:

def predict_sentiment(text):
    sequence = tokenizer.texts_to_sequences([text.lower()])
    padded = pad_sequences(sequence, maxlen=max_length, padding='post', truncating='post')
    prediction = model.predict(tf.expand_dims(padded[0], axis=0))[0][0]
    return "Negative" if prediction > 0.5 else "Positive"
    
sample_texts = [
    "I love this product! It's amazing.",
    "This is the worst experience I've ever had.",
    "i am so happy with this experience",
    "this dress is so ugly",
    "the price of dress is good"
]

for text in sample_texts:
    print(f"Text: {text} => Sentiment: {predict_sentiment(text)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
Text: I love this product! It's amazing. => Sentiment: Positive
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Text: This is the worst experience I've ever had. => Sentiment: Negative
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Text: i am so happy with this experience => Sentiment: Positive
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Text: this dress is so ugly => Sentiment: Negative
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Text: the price of dress is good => Sentiment: Positive
