# **Imports**

In [10]:
import pandas as pd #data manipulation
import numpy as np #working with arrays
import matplotlib.pyplot as plt #to plot charts
import seaborn as sns #statistical graphs
plt.style.use('ggplot')
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


# **Input**

In [None]:
! pip install -q kaggle
from google.colab import files
files.upload()
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download denizbilginn/google-maps-restaurant-reviews
!mkdir CSV
!unzip google-maps-restaurant-reviews.zip -d CSV
df = pd.read_csv('CSV/reviews.csv')
df.head()

Saving kaggle.json to kaggle.json
mkdir: cannot create directory ‘/root/.kaggle’: File exists
Dataset URL: https://www.kaggle.com/datasets/denizbilginn/google-maps-restaurant-reviews
License(s): ODbL-1.0
google-maps-restaurant-reviews.zip: Skipping, found more recently modified local copy (use --force to force download)
mkdir: cannot create directory ‘CSV’: File exists
Archive:  google-maps-restaurant-reviews.zip
replace CSV/dataset/dataset/indoor_atmosphere/abidin_tantuni_enes_ozturk.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

# **Reducing the DaraFrame to the First 19 Reviewed Restaurants**

In [None]:
unique_restaurants = df['business_name'].unique()
first_19_restaurants = unique_restaurants[:19]
df_first_19 = df[df['business_name'].isin(first_19_restaurants)]
print(df_first_19.head())

# **Charting Restaurant Ratings**

In [None]:
# Plot the ratings of each restaurant
plt.figure(figsize=(12, 6))
sns.countplot(data=df_first_19, x='business_name', hue='rating', palette='viridis')
plt.title('Ratings of Each Restaurant')
plt.xlabel('Restaurant')
plt.ylabel('Count of Ratings')
plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels for better readability
plt.legend(title='Rating')
plt.tight_layout()
plt.show()


average_ratings = df_first_19.groupby('business_name')['rating'].mean().sort_values(ascending=False)

# Plot the average rating for each restaurant
plt.figure(figsize=(12, 6))
sns.barplot(x=average_ratings.index, y=average_ratings.values, palette='viridis')
plt.title('Average Rating for Each Restaurant')
plt.xlabel('Restaurant')
plt.ylabel('Average Rating')
plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels for better readability
plt.ylim(0, 5)  # Set y-axis limits from 0 to 5
plt.tight_layout()
plt.show()




# **VADER Demo**

In [None]:
sia = SentimentIntensityAnalyzer()
demo = df_first_19['text'][20]
demo_sia = sia.polarity_scores(demo)
print(demo)
print(demo_sia)

# **Sentiment Analysis on the DataFrame Text Column, and Merging it With the DF**

In [None]:
result = {}
for i, row in df_first_19.iterrows():
    text = row['text']
    myid = row['author_name']
    result[myid] = sia.polarity_scores(text)
vaders = pd.DataFrame(result).T
vaders = vaders.reset_index().rename(columns={'index' : 'author_name'})
vaders = vaders.merge(df_first_19, how = 'left')
vaders.head()

# **Charting SIA Results**

In [None]:
average_compound_scores = vaders.groupby('business_name')['compound'].mean().sort_values(ascending=False)


plt.figure(figsize=(12, 6))
sns.barplot(x=average_compound_scores.index, y=average_compound_scores.values, palette='viridis')
plt.title('Average Compound Sentiment Score for Each Restaurant')
plt.xlabel('Restaurant')
plt.ylabel('Average Compound Sentiment Score')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()



```
# This is formatted as code
```

# **Reviews Summary of the best and worst reviewed restaurants**

In [None]:
from transformers import BartTokenizer, BartForConditionalGeneration

model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')

winner = average_compound_scores.index[0]
loser = average_compound_scores.index[-1]

winner_restaurant_df = vaders[vaders['business_name'] == winner]
loser_restaurant_df = vaders[vaders['business_name'] == loser]

reviewsW = " ".join(winner_restaurant_df['text'])
reviewsL = " ".join(loser_restaurant_df['text'])

inputsW = tokenizer([reviewsW], max_length=2048, return_tensors='pt',truncation=True)
summary_idsW = model.generate(inputsW['input_ids'], num_beams=4, max_length=150, early_stopping=True)
summaryW = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_idsW]

inputsL = tokenizer([reviewsL], max_length=2048, return_tensors='pt',truncation=True)
summary_idsL = model.generate(inputsL['input_ids'], num_beams=4, max_length=150, early_stopping=True)
summaryL = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_idsL]





In [None]:
print("The best reviewed restaurant to visit would be " + winner + ". Some highlights would be: ")
print(summaryW)
print("\n")
print("The worst reviewed restaurant to visit would be " + loser + ". Some highlights would be: ")
print(summaryL)