In [20]:
import pandas as pd
import numpy as np
import re  
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding

In [15]:
data=pd.read_csv('swiggy.csv')
print("Columns in the dataset:", data.columns.tolist())

Columns in the dataset: ['ID', 'Area', 'City', 'Restaurant Price', 'Avg Rating', 'Total Rating', 'Food Item', 'Food Type', 'Delivery Time', 'Review']


In [16]:
data["Review"]=data["Review"].str.lower()
data["Review"]=data["Review"].replace(r'[^a-z0-9\s]','',regex=True)
data['sentiment']=data['Avg Rating'].apply(lambda x: 1 if x>=3.5 else 0)
data=data.dropna()
data.head()


Unnamed: 0,ID,Area,City,Restaurant Price,Avg Rating,Total Rating,Food Item,Food Type,Delivery Time,Review,sentiment
0,1,Suburb,Ahmedabad,600,4.2,6198,Sushi,Fast Food,30-40 min,good but nothing extraordinary,1
1,2,Business District,Pune,200,4.7,4865,Pepperoni Pizza,Non-Vegetarian,50-60 min,good but nothing extraordinary,1
2,3,Suburb,Bangalore,600,4.7,2095,Waffles,Fast Food,50-60 min,late delivery ruined it,1
3,4,Business District,Mumbai,900,4.0,6639,Sushi,Vegetarian,50-60 min,best meal ive had in a while,1
4,5,Tech Park,Mumbai,200,4.7,6926,Spring Rolls,Gluten-Free,20-30 min,mediocre experience,1


In [17]:
max_features=5000
max_len=200 

tokenizer=Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(data['Review'])
X=pad_sequences(tokenizer.texts_to_sequences(data['Review']),maxlen=max_len)
y=data['sentiment'].values
data.head()


Unnamed: 0,ID,Area,City,Restaurant Price,Avg Rating,Total Rating,Food Item,Food Type,Delivery Time,Review,sentiment
0,1,Suburb,Ahmedabad,600,4.2,6198,Sushi,Fast Food,30-40 min,good but nothing extraordinary,1
1,2,Business District,Pune,200,4.7,4865,Pepperoni Pizza,Non-Vegetarian,50-60 min,good but nothing extraordinary,1
2,3,Suburb,Bangalore,600,4.7,2095,Waffles,Fast Food,50-60 min,late delivery ruined it,1
3,4,Business District,Mumbai,900,4.0,6639,Sushi,Vegetarian,50-60 min,best meal ive had in a while,1
4,5,Tech Park,Mumbai,200,4.7,6926,Spring Rolls,Gluten-Free,20-30 min,mediocre experience,1


In [18]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
X_train,X_val,y_train,y_val=train_test_split(X_train,y_train,test_size=0.1,random_state=42)

building the rnn model 

In [21]:
model=Sequential([
    Embedding(input_dim=max_features,output_dim=16,input_length=max_len),
    SimpleRNN(64,activation='tanh',return_sequences=False),
    Dense(1,activation='sigmoid')]






)

model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])



In [22]:
history=model.fit(X_train,y_train,epochs=5,batch_size=32,validation_data=(X_val,y_val),verbose=2)

score=model.evaluate(X_test,y_test,verbose=0)
print("Test Loss:", score[0])
print("Test Accuracy:", score[1])

Epoch 1/5
180/180 - 5s - 28ms/step - accuracy: 0.8569 - loss: 0.4153 - val_accuracy: 0.8438 - val_loss: 0.4327
Epoch 2/5
180/180 - 3s - 19ms/step - accuracy: 0.8582 - loss: 0.4091 - val_accuracy: 0.8438 - val_loss: 0.4318
Epoch 3/5
180/180 - 3s - 19ms/step - accuracy: 0.8582 - loss: 0.4086 - val_accuracy: 0.8438 - val_loss: 0.4307
Epoch 4/5
180/180 - 3s - 19ms/step - accuracy: 0.8582 - loss: 0.4092 - val_accuracy: 0.8438 - val_loss: 0.4305
Epoch 5/5
180/180 - 4s - 20ms/step - accuracy: 0.8582 - loss: 0.4082 - val_accuracy: 0.8438 - val_loss: 0.4315
Test Loss: 0.4109722077846527
Test Accuracy: 0.8587499856948853


In [26]:
def predict_sentiment(review_text):
    text=review_text.lower()
    text=re.sub(r'[^a-z0-9\s]','',text)
    seq=tokenizer.texts_to_sequences([text])
    padded=pad_sequences(seq,maxlen=max_len)
    pred=model.predict(padded)[0][0]
    return "Positive" if pred>=0.5 else "Negative"
sample_review="The food was absolutely hectic, from preparation to presentation, very unpleasant."
print(f"Review: {sample_review}\nPredicted Sentiment: {predict_sentiment(sample_review)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Review: The food was absolutely hectic, from preparation to presentation, very unpleasant.
Predicted Sentiment: Positive


In [27]:
def predict_sentiment(review_text):
    text=review_text.lower()
    text=re.sub(r'[^a-z0-9\s]','',text)
    seq=tokenizer.texts_to_sequences([text])
    padded=pad_sequences(seq,maxlen=max_len)
    pred=model.predict(padded)[0][0]
    return "Positive" if pred>=0.5 else "Negative"
sample_review="The food was absolutely hectic, from preparation to presentation, very unpleasant."
print(f"Review: {sample_review}\nPredicted Sentiment: {predict_sentiment(sample_review)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Review: The food was absolutely hectic, from preparation to presentation, very unpleasant.
Predicted Sentiment: Positive
