<a href="https://colab.research.google.com/github/madan-genai/1-Review-Sentiment-Analysis-RNN/blob/main/Review_Sentiment_Analysis_using_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [28]:
!pip install -q pandas numpy scikit-learn tensorflow

In [29]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding

In [30]:
df=pd.read_csv("/content/swiggy (1).csv")
df

Unnamed: 0,ID,Area,City,Restaurant Price,Avg Rating,Total Rating,Food Item,Food Type,Delivery Time,Review
0,1,Suburb,Ahmedabad,600,4.2,6198,Sushi,Fast Food,30-40 min,"Good, but nothing extraordinary."
1,2,Business District,Pune,200,4.7,4865,Pepperoni Pizza,Non-Vegetarian,50-60 min,"Good, but nothing extraordinary."
2,3,Suburb,Bangalore,600,4.7,2095,Waffles,Fast Food,50-60 min,Late delivery ruined it.
3,4,Business District,Mumbai,900,4.0,6639,Sushi,Vegetarian,50-60 min,Best meal I've had in a while!
4,5,Tech Park,Mumbai,200,4.7,6926,Spring Rolls,Gluten-Free,20-30 min,Mediocre experience.
...,...,...,...,...,...,...,...,...,...,...
7995,7996,City Center,Mumbai,300,4.0,3303,BBQ Ribs,Vegan,20-30 min,My new favorite dish!
7996,7997,Downtown,Chennai,100,4.7,8742,Butter Chicken,Non-Vegetarian,20-30 min,Amazing taste and quick delivery.
7997,7998,Tech Park,Chennai,900,4.5,4645,Mango Shake,Fast Food,30-40 min,Nothing special but edible.
7998,7999,Old Town,Delhi,500,4.2,3218,Grilled Cheese,Non-Vegetarian,50-60 min,It was okay.


In [31]:
print("Columns in the dataset")
print(df.columns.to_list())

Columns in the dataset
['ID', 'Area', 'City', 'Restaurant Price', 'Avg Rating', 'Total Rating', 'Food Item', 'Food Type', 'Delivery Time', 'Review']


**TEXT CLEANING AND SENIMENT LABELING**

In [32]:
df["Review"]=df["Review"].str.lower()

In [33]:
df["Review"]=df["Review"].replace(r'[^a-z0-9\s]','',regex=True)

In [34]:
df["sentiment"]=df["Avg Rating"].apply (lambda x:1 if x>=3.5 else 0)

In [35]:
df=df.dropna()

**TOKINZATION AND PADDING**

In [36]:
max_features = 5000
max_length = 200

tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(df["Review"])
X = pad_sequences(tokenizer.texts_to_sequences(
    df["Review"]), maxlen=max_length)
y = df['sentiment'].values

**Splitting the data**

In [37]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.1, random_state=42, stratify=y_train
)

**Building RNN Model**

In [38]:
model=Sequential([
    Embedding(input_dim=max_features,output_dim=16,input_length=max_length),
    SimpleRNN(64,activation="tanh",return_sequences=False),
    Dense(1,activation="sigmoid")
    ])
model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
              )



**Training the model **

In [39]:
model.fit(
    X_train,y_train,
    epochs=5,
    batch_size=32,
    validation_data=(X_val,y_val),
    verbose=1
)

Epoch 1/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 44ms/step - accuracy: 0.8374 - loss: 0.4499 - val_accuracy: 0.8578 - val_loss: 0.4075
Epoch 2/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 48ms/step - accuracy: 0.8510 - loss: 0.4221 - val_accuracy: 0.8578 - val_loss: 0.4084
Epoch 3/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 44ms/step - accuracy: 0.8490 - loss: 0.4251 - val_accuracy: 0.8578 - val_loss: 0.4079
Epoch 4/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 44ms/step - accuracy: 0.8570 - loss: 0.4109 - val_accuracy: 0.8578 - val_loss: 0.4126
Epoch 5/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 49ms/step - accuracy: 0.8539 - loss: 0.4162 - val_accuracy: 0.8578 - val_loss: 0.4093


<keras.src.callbacks.history.History at 0x7a902b5e03e0>

In [40]:
score=model.evaluate(X_test,y_test,verbose=0)
print(f"Test accuracy: {score[1]:.2f}")

Test accuracy: 0.86


**Predicting the Sentiment**

In [41]:
def predict_sentiment(review_text):
    text = review_text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)

    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_length)

    prediction = model.predict(padded)[0][0]
    return f"{'Positive' if prediction >= 0.5 else 'Negative'} (Probability: {prediction:.2f})"


sample_review = "The food was great."
print(f"Review: {sample_review}")
print(f"Sentiment: {predict_sentiment(sample_review)}")

Review: The food was great.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step
Sentiment: Positive (Probability: 0.83)


In [42]:
predict_sentiment("i am the best")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step


'Positive (Probability: 0.84)'

In [43]:
predict_sentiment("i am zero")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step


'Positive (Probability: 0.84)'

In [44]:
predict_sentiment("i am the worst")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step


'Positive (Probability: 0.84)'