# Dependencies

In [1]:
# Dependencies
import pandas as pd
import re
import requests
import math 
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
from pprint import pprint
from config import api_key, plotly_key
from yelpapi import YelpAPI

# Ideas
<ul><li>Average rating vs. category
<li>Average rating vs. price rating
<li>Number of restaurant categories per city 
<li>Review Count vs. city 

# Useful Links 
<ul><li><a href="https://alcidanalytics.com/p/geographic-heatmap-in-python">Heat maps</a>

# API Call

In [69]:
# API call to Yelp API
yelp_api = YelpAPI(api_key)
# Input string for location search
input_string = input("Search query: ")
api_call = yelp_api.search_query(location=input_string, limit=50)

# API Call to Plotly 
plotly.tools.set_credentials_file(username='nguyenkevint94', api_key=plotly_key)

# Delete hashtag to view the contents of api_call
# pprint(api_call)

Search query: Santa Monica, CA


In [70]:
# Lists
business_names_list = []
categories_list = []
street_address_list = []
city_list = []
country_list = []
ratings_list = []
review_count_list = []
price_ratings = []

# Looping through each business in the call
for businesses in api_call["businesses"]:
    try:
        # Name
        name = businesses["name"]
        # print(f"Successfully found business name: {name}")

        # Category 
        category = businesses["categories"][0]["alias"]
        # print(f"Successfully found category: {category}")

        # Street Address
        street_address = businesses["location"]["address1"]
        # print(f"Successfully found street address: {street_address}")

        # City
        city = businesses["location"]["city"]
        # print(f"Successfully found city: {city}")

        # Country
        country = businesses["location"]["country"]
        # print(f"Successfully found country: {country}")

        # Price rating
        # NOTE: Some places do not have a price rating (ie. $, $$, $$$)
        price = businesses["price"]
        # print(f"Successfully found price rating: {price}")

        # Ratings
        rating = businesses["rating"]
        # print(f"Successfully found rating: {rating}")

        # Review count
        review_count = businesses["review_count"]
        # print(f"Successfully found review counts: {review_count}")
        
        # Appends
        # Tried putting appends after each section, adding them towards the end made it work 
        # with no error since it passes those without a price rating
        business_names_list.append(name) 
        categories_list.append(category)
        street_address_list.append(street_address)
        city_list.append(city)
        country_list.append(country)
        price_ratings.append(price)
        ratings_list.append(rating)
        review_count_list.append(review_count)
        
        # print("- - - - - - - - - - - - - - - - - - - - - -")
    except Exception:
        pass

# Yelp DataFrame

In [71]:
# Dictionary for DataFrame
business_details_dict = ({"Name": business_names_list,
                          "Category": categories_list,
                          "Street": street_address_list,
                          "City": city_list,
                          "Country": country_list,
                          "Rating": ratings_list,
                          "Review Count": review_count_list,
                          "$": price_ratings})

# Dictionary to DataFrame
yelp_df = pd.DataFrame(business_details_dict)
yelp_df

Unnamed: 0,Name,Category,Street,City,Country,Rating,Review Count,$
0,Bay Cities Italian Deli,delis,1517 Lincoln Blvd,Santa Monica,US,4.0,4340,$
1,Tar & Roses,newamerican,602 Santa Monica Blvd,Santa Monica,US,4.5,1986,$$$
2,The Misfit Restaurant + Bar,bars,225 Santa Monica Blvd,Santa Monica,US,4.0,3573,$$
3,Philz Coffee,coffee,525 Santa Monica Blvd,Santa Monica,US,4.5,1572,$
4,Fritto Misto,italian,601 Colorado Ave,Santa Monica,US,4.5,2265,$$
5,Sidecar Doughnuts & Coffee,donuts,631 Wilshire Blvd,Santa Monica,US,4.5,1487,$$
6,Urth Caffé - Santa Monica,breakfast_brunch,2327 Main St,Santa Monica,US,4.0,3082,$$
7,Benny's Tacos & Chicken Rotisserie,mexican,915 Wilshire Blvd,Santa Monica,US,4.5,1397,$
8,True Food Kitchen,newamerican,395 Santa Monica Pl,Santa Monica,US,4.0,2102,$$
9,Sunny Blue,japanese,2728 Main St,Santa Monica,US,4.5,1202,$


# Sorting by Review Count

In [72]:
sorted_df_reviews = yelp_df.sort_values(by=["Review Count"], ascending=False)
sorted_df_reviews

Unnamed: 0,Name,Category,Street,City,Country,Rating,Review Count,$
0,Bay Cities Italian Deli,delis,1517 Lincoln Blvd,Santa Monica,US,4.0,4340,$
2,The Misfit Restaurant + Bar,bars,225 Santa Monica Blvd,Santa Monica,US,4.0,3573,$$
6,Urth Caffé - Santa Monica,breakfast_brunch,2327 Main St,Santa Monica,US,4.0,3082,$$
11,Father's Office,burgers,1018 Montana Ave,Santa Monica,US,4.0,2995,$$
16,Umami Burger - Santa Monica,burgers,525 Broadway,Santa Monica,US,4.0,2445,$$
4,Fritto Misto,italian,601 Colorado Ave,Santa Monica,US,4.5,2265,$$
8,True Food Kitchen,newamerican,395 Santa Monica Pl,Santa Monica,US,4.0,2102,$$
18,The Lobster,bars,1602 Ocean Ave,Santa Monica,US,4.0,2069,$$$
1,Tar & Roses,newamerican,602 Santa Monica Blvd,Santa Monica,US,4.5,1986,$$$
10,DK's Donuts & Bakery,donuts,1614 Santa Monica Blvd,Santa Monica,US,4.0,1935,$


# Category Shares

In [73]:
# Counting up number in each category
biz_categories = yelp_df.groupby("Category").count()
# Resetting the index to Category
biz_categories.reset_index("Category", inplace=True)
categories = biz_categories["Category"]

# Labels for each category to be used in Plotly 
labels = categories

# Values for each category to be used in Plotly
category_count = biz_categories["Name"]
values = category_count
values

# Setting up arguments for Plotly pie chart 
trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename= "Pie_Chart_Categories")

# Price Comparisons

In [74]:
# Grouping price ratings ($)
price_groups = yelp_df.groupby("$").count()
price_groups.reset_index("$", inplace=True)

labels = price_groups["$"]
values = price_groups["Name"]

trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename= "Pie_Chart_Price_Categories")

# Average Review Count vs. Category

In [77]:
# Grouping categories 
average_reviews = yelp_df.groupby("Category").mean()
average_reviews.reset_index("Category")

# Gathering top 5 categories with the highest number of reviews
top_five = average_reviews.nlargest(10,"Review Count")
top_five.reset_index("Category", inplace=True)
# Category names
categories = top_five["Category"]

# Category review counts
review_count = top_five["Review Count"]

# Setting up the bar chart
trace = go.Bar(
    x=categories,
    y=review_count,
    text=categories,
    marker=dict(
        color="rgb(158, 202, 225)",
        line=dict(
            color="rgb(8, 48,107)",
            width=1.5,
        )
    ),
    opacity=0.6
)

data=[trace]
layout=go.Layout(
    title="Top 10 Categories by Average Review Count",
)

# Bar chart 
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename="top-10-categories-by-average-review-count")