# Sentimental analysis of swiggy using RNN

In [1]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense,Embedding,SimpleRNN
from tensorflow.keras.models import Sequential

In [2]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [3]:
#Load the data
data=pd.read_csv("C:/Users/Rakesh/Desktop/career/AI ML AND DL Practice/Data/swiggy.csv")
print("Columns in a data are:")
print(data.columns.tolist())

Columns in a data are:
['ID', 'Area', 'City', 'Restaurant Price', 'Avg Rating', 'Total Rating', 'Food Item', 'Food Type', 'Delivery Time', 'Review']


In [4]:
data.head()

Unnamed: 0,ID,Area,City,Restaurant Price,Avg Rating,Total Rating,Food Item,Food Type,Delivery Time,Review
0,1,Suburb,Ahmedabad,600,4.2,6198,Sushi,Fast Food,30-40 min,"Good, but nothing extraordinary."
1,2,Business District,Pune,200,4.7,4865,Pepperoni Pizza,Non-Vegetarian,50-60 min,"Good, but nothing extraordinary."
2,3,Suburb,Bangalore,600,4.7,2095,Waffles,Fast Food,50-60 min,Late delivery ruined it.
3,4,Business District,Mumbai,900,4.0,6639,Sushi,Vegetarian,50-60 min,Best meal I've had in a while!
4,5,Tech Park,Mumbai,200,4.7,6926,Spring Rolls,Gluten-Free,20-30 min,Mediocre experience.


In [5]:
data["Review"]=data["Review"].str.lower()
data["Review"]=data["Review"].replace(r'[^a-z0-9\s]','',regex=True)

In [6]:
data["sentiment"]=data["Avg Rating"].apply(lambda x: 1 if x>3.5 else 0)

In [7]:
data=data.dropna()

In [8]:
data.head()

Unnamed: 0,ID,Area,City,Restaurant Price,Avg Rating,Total Rating,Food Item,Food Type,Delivery Time,Review,sentiment
0,1,Suburb,Ahmedabad,600,4.2,6198,Sushi,Fast Food,30-40 min,good but nothing extraordinary,1
1,2,Business District,Pune,200,4.7,4865,Pepperoni Pizza,Non-Vegetarian,50-60 min,good but nothing extraordinary,1
2,3,Suburb,Bangalore,600,4.7,2095,Waffles,Fast Food,50-60 min,late delivery ruined it,1
3,4,Business District,Mumbai,900,4.0,6639,Sushi,Vegetarian,50-60 min,best meal ive had in a while,1
4,5,Tech Park,Mumbai,200,4.7,6926,Spring Rolls,Gluten-Free,20-30 min,mediocre experience,1


In [9]:
max_features=500
max_length=200
tokenizer=Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(data["Review"])
x=pad_sequences(tokenizer.texts_to_sequences(data["Review"]),maxlen=max_length)

In [10]:
y=data["sentiment"].values

In [11]:
X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=43,stratify=y)

In [12]:
X_train,X_val,y_train,y_val=train_test_split(X_train,y_train,test_size=0.2,random_state=42,stratify=y_train)

In [13]:
model=Sequential()

In [14]:
model.add(Embedding(input_dim=max_features,output_dim=16,input_length=max_length))
model.add(SimpleRNN(64,activation="tanh",return_sequences=False))
model.add(Dense(1,activation="sigmoid"))



In [15]:
model.compile(optimizer="adam",
              loss="binary_crossentropy",
              metrics=["accuracy"])

In [16]:
history=model.fit(X_train,y_train,
                  epochs=5,
                  batch_size=32,
                  validation_data=(X_val,y_val),
                  verbose=1)

Epoch 1/5
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 32ms/step - accuracy: 0.6955 - loss: 0.6109 - val_accuracy: 0.7156 - val_loss: 0.6031
Epoch 2/5
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - accuracy: 0.7160 - loss: 0.5967 - val_accuracy: 0.7156 - val_loss: 0.5994
Epoch 3/5
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - accuracy: 0.7160 - loss: 0.5958 - val_accuracy: 0.7156 - val_loss: 0.6042
Epoch 4/5
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - accuracy: 0.7160 - loss: 0.5964 - val_accuracy: 0.7156 - val_loss: 0.6012
Epoch 5/5
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - accuracy: 0.7160 - loss: 0.5958 - val_accuracy: 0.7156 - val_loss: 0.6025


In [17]:
score=model.evaluate(X_val,y_val,verbose=1)

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7156 - loss: 0.6025


In [18]:
print(f"Test accuracy is :{score[1]}")

Test accuracy is :0.715624988079071


In [19]:
def sentiment_analyse(review_text):
    text = review_text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)

    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_length)

    prediction = model.predict(padded)[0][0]
    return f"{'Positive' if prediction >= 0.5 else 'Negative'} (Probability: {prediction:.2f})"


In [20]:

sample_review="Food was amazing"
print(f"Sample data is {sample_review}")
print(f" the sentiment is {sentiment_analyse(sample_review)}")

Sample data is Food was amazing
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 281ms/step
 the sentiment is Positive (Probability: 0.73)


In [21]:
sample_review="Highly recommended"
print(f"Sample data is {sample_review}")
print(f" the sentiment is {sentiment_analyse(sample_review)}")

Sample data is Highly recommended
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
 the sentiment is Positive (Probability: 0.74)


In [43]:
sample_review="Neighter great nor bad"
print(f"Sample data is {sample_review}")
print(f" the sentiment is {sentiment_analyse(sample_review)}")

Sample data is Neighter great nor bad
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
 the sentiment is Positive (Probability: 0.73)


# Implementation of bidirectional RNN

In [46]:
import warnings
warnings.filterwarnings("ignore")
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences

In [51]:
features=2000
max_len=50
(X_train,y_train),(X_test,y_test)=imdb.load_data(num_words=features)

In [52]:
#Defining model architecture
from keras.models import Sequential
from keras.layers import Embedding, Bidirectional, SimpleRNN, Dense

embedding_dim = 128  
hidden_units = 64    

model = Sequential()

model.add(Embedding(features, embedding_dim, input_length=max_len))

model.add(Bidirectional(SimpleRNN(hidden_units)))

model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [54]:
batch_size = 32
epochs = 5

model.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=(X_test, y_test))

ValueError: Invalid dtype: object