In [20]:
import pandas as pd
import numpy as np
import re  
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding

In [15]:
data=pd.read_csv('swiggy.csv')
print("Columns in the dataset:", data.columns.tolist())

Columns in the dataset: ['ID', 'Area', 'City', 'Restaurant Price', 'Avg Rating', 'Total Rating', 'Food Item', 'Food Type', 'Delivery Time', 'Review']


In [16]:
data["Review"]=data["Review"].str.lower()
data["Review"]=data["Review"].replace(r'[^a-z0-9\s]','',regex=True)
data['sentiment']=data['Avg Rating'].apply(lambda x: 1 if x>=3.5 else 0)
data=data.dropna()
data.head()


Unnamed: 0,ID,Area,City,Restaurant Price,Avg Rating,Total Rating,Food Item,Food Type,Delivery Time,Review,sentiment
0,1,Suburb,Ahmedabad,600,4.2,6198,Sushi,Fast Food,30-40 min,good but nothing extraordinary,1
1,2,Business District,Pune,200,4.7,4865,Pepperoni Pizza,Non-Vegetarian,50-60 min,good but nothing extraordinary,1
2,3,Suburb,Bangalore,600,4.7,2095,Waffles,Fast Food,50-60 min,late delivery ruined it,1
3,4,Business District,Mumbai,900,4.0,6639,Sushi,Vegetarian,50-60 min,best meal ive had in a while,1
4,5,Tech Park,Mumbai,200,4.7,6926,Spring Rolls,Gluten-Free,20-30 min,mediocre experience,1


In [17]:
max_features=5000
max_len=200 

tokenizer=Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(data['Review'])
X=pad_sequences(tokenizer.texts_to_sequences(data['Review']),maxlen=max_len)
y=data['sentiment'].values
data.head()


Unnamed: 0,ID,Area,City,Restaurant Price,Avg Rating,Total Rating,Food Item,Food Type,Delivery Time,Review,sentiment
0,1,Suburb,Ahmedabad,600,4.2,6198,Sushi,Fast Food,30-40 min,good but nothing extraordinary,1
1,2,Business District,Pune,200,4.7,4865,Pepperoni Pizza,Non-Vegetarian,50-60 min,good but nothing extraordinary,1
2,3,Suburb,Bangalore,600,4.7,2095,Waffles,Fast Food,50-60 min,late delivery ruined it,1
3,4,Business District,Mumbai,900,4.0,6639,Sushi,Vegetarian,50-60 min,best meal ive had in a while,1
4,5,Tech Park,Mumbai,200,4.7,6926,Spring Rolls,Gluten-Free,20-30 min,mediocre experience,1


In [18]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
X_train,X_val,y_train,y_val=train_test_split(X_train,y_train,test_size=0.1,random_state=42)

building the rnn model 

In [28]:
from tensorflow.keras.layers import LSTM, Dropout, Bidirectional

model=Sequential([
    Embedding(input_dim=max_features,output_dim=128,input_length=max_len),
    Bidirectional(LSTM(64,return_sequences=True)),
    Dropout(0.5),
    Bidirectional(LSTM(32)),
    Dropout(0.5),
    Dense(64,activation='relu'),
    Dropout(0.3),
    Dense(1,activation='sigmoid')
])

model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])



In [29]:
history=model.fit(X_train,y_train,epochs=10,batch_size=32,validation_data=(X_val,y_val),verbose=2)

score=model.evaluate(X_test,y_test,verbose=0)
print("Test Loss:", score[0])
print("Test Accuracy:", score[1])

Epoch 1/10
180/180 - 22s - 123ms/step - accuracy: 0.8559 - loss: 0.4351 - val_accuracy: 0.8438 - val_loss: 0.4334
Epoch 2/10
180/180 - 16s - 91ms/step - accuracy: 0.8582 - loss: 0.4210 - val_accuracy: 0.8438 - val_loss: 0.4338
Epoch 3/10
180/180 - 17s - 92ms/step - accuracy: 0.8582 - loss: 0.4158 - val_accuracy: 0.8438 - val_loss: 0.4322
Epoch 4/10
180/180 - 17s - 92ms/step - accuracy: 0.8582 - loss: 0.4174 - val_accuracy: 0.8438 - val_loss: 0.4318
Epoch 5/10
180/180 - 17s - 92ms/step - accuracy: 0.8582 - loss: 0.4170 - val_accuracy: 0.8438 - val_loss: 0.4331
Epoch 6/10
180/180 - 16s - 91ms/step - accuracy: 0.8582 - loss: 0.4175 - val_accuracy: 0.8438 - val_loss: 0.4320
Epoch 7/10
180/180 - 17s - 97ms/step - accuracy: 0.8582 - loss: 0.4136 - val_accuracy: 0.8438 - val_loss: 0.4350
Epoch 8/10
180/180 - 18s - 98ms/step - accuracy: 0.8582 - loss: 0.4122 - val_accuracy: 0.8438 - val_loss: 0.4335
Epoch 9/10
180/180 - 18s - 98ms/step - accuracy: 0.8582 - loss: 0.4137 - val_accuracy: 0.8438 -

In [30]:
def predict_sentiment(review_text):
    text=review_text.lower()
    text=re.sub(r'[^a-z0-9\s]','',text)
    seq=tokenizer.texts_to_sequences([text])
    padded=pad_sequences(seq,maxlen=max_len)
    pred=model.predict(padded)[0][0]
    return "Positive" if pred>=0.5 else "Negative"
sample_review="The food was absolutely hectic, from preparation to presentation, very unpleasant."
print(f"Review: {sample_review}\nPredicted Sentiment: {predict_sentiment(sample_review)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 518ms/step
Review: The food was absolutely hectic, from preparation to presentation, very unpleasant.
Predicted Sentiment: Positive


In [31]:
def predict_sentiment(review_text):
    text=review_text.lower()
    text=re.sub(r'[^a-z0-9\s]','',text)
    seq=tokenizer.texts_to_sequences([text])
    padded=pad_sequences(seq,maxlen=max_len)
    pred=model.predict(padded)[0][0]
    return "Positive" if pred>=0.5 else "Negative"
sample_review="The food was absolutely hectic, from preparation to presentation, very unpleasant."
print(f"Review: {sample_review}\nPredicted Sentiment: {predict_sentiment(sample_review)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Review: The food was absolutely hectic, from preparation to presentation, very unpleasant.
Predicted Sentiment: Positive


opinion mining 

In [32]:
from transformers import pipeline

In [35]:
sentiment_pipeline=pipeline("sentiment-analysis",model='nlptown/bert-base-multilingual-uncased-sentiment')
text="The food was absolutely hectic, from preparation to presentation, very unpleasant."
result=sentiment_pipeline(text)[0]

label_map={
    '1 star': 'Very Negative',
    '2 stars': 'Negative',
    '3 stars': 'Neutral',
    '4 stars': 'Positive',
    '5 stars': 'Very Positive'
}

Device set to use mps:0


In [36]:
from transformers import pipeline
sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model="nlptown/bert-base-multilingual-uncased-sentiment",
    device="cpu"  # force CPU instead of MPS
)
text = "The food was absolutely hectic, from preparation to presentation, very unpleasant."
result = sentiment_pipeline(text)[0]
label_map = {
    "1 star": "Very Negative",
    "2 stars": "Negative",
    "3 stars": "Neutral",
    "4 stars": "Positive",
    "5 stars": "Very Positive",
}
print(result)
print("Mapped label:", label_map.get(result["label"], result["label"]))

Device set to use cpu


{'label': '1 star', 'score': 0.6021507382392883}
Mapped label: Very Negative


In [37]:
emotion_pipeline = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    device="cpu"  # force CPU instead of MPS
)
text = "The food was absolutely hectic, from preparation to presentation, very unpleasant."
emotion_result = emotion_pipeline(text)[0]
print(emotion_result)

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/329M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/294 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cpu


{'label': 'disgust', 'score': 0.9700794816017151}
