# Dependencies

In [1]:
# Dependencies
import pandas as pd
import re
import requests
import math 
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
from pprint import pprint
from config import api_key, plotly_key
from yelpapi import YelpAPI

# Ideas
<ul><li>Average rating vs. category
<li>Average rating vs. price rating
<li>Number of restaurant categories per city 
<li>Review Count vs. city 

# API Call

In [2]:
# API call to Yelp API
yelp_api = YelpAPI(api_key)
# Input string for location search
input_string = input("Search query: ")
api_call = yelp_api.search_query(location=input_string, limit=50)

# API Call to Plotly 
plotly.tools.set_credentials_file(username='nguyenkevint94', api_key=plotly_key)

# Delete hashtag to view the contents of api_call
# pprint(api_call)

Search query: Orange County, CA


In [3]:
# Lists
business_names_list = []
categories_list = []
street_address_list = []
city_list = []
country_list = []
ratings_list = []
review_count_list = []
price_ratings = []

# Looping through each business in the call
for businesses in api_call["businesses"]:
    try:
        # Name
        name = businesses["name"]
        # print(f"Successfully found business name: {name}")

        # Category 
        category = businesses["categories"][0]["alias"]
        # print(f"Successfully found category: {category}")

        # Street Address
        street_address = businesses["location"]["address1"]
        # print(f"Successfully found street address: {street_address}")

        # City
        city = businesses["location"]["city"]
        # print(f"Successfully found city: {city}")

        # Country
        country = businesses["location"]["country"]
        # print(f"Successfully found country: {country}")

        # Price rating
        # NOTE: Some places do not have a price rating (ie. $, $$, $$$)
        price = businesses["price"]
        # print(f"Successfully found price rating: {price}")

        # Ratings
        rating = businesses["rating"]
        # print(f"Successfully found rating: {rating}")

        # Review count
        review_count = businesses["review_count"]
        # print(f"Successfully found review counts: {review_count}")
        
        # Appends
        # Tried putting appends after each section, adding them towards the end made it work 
        # with no error since it passes those without a price rating
        business_names_list.append(name) 
        categories_list.append(category)
        street_address_list.append(street_address)
        city_list.append(city)
        country_list.append(country)
        price_ratings.append(price)
        ratings_list.append(rating)
        review_count_list.append(review_count)
        
        # print("- - - - - - - - - - - - - - - - - - - - - -")
    except Exception:
        pass

# Yelp DataFrame

In [4]:
# Dictionary for DataFrame
business_details_dict = ({"Name": business_names_list,
                          "Category": categories_list,
                          "Street": street_address_list,
                          "City": city_list,
                          "Country": country_list,
                          "Rating": ratings_list,
                          "Review Count": review_count_list,
                          "$": price_ratings})

# Dictionary to DataFrame
yelp_df = pd.DataFrame(business_details_dict)
yelp_df

Unnamed: 0,Name,Category,Street,City,Country,Rating,Review Count,$
0,Taqueria El Zamorano,breakfast_brunch,925 W Warner Ave,Santa Ana,US,4.5,1061,$
1,Cream Pan,bakeries,602 El Camino Real,Tustin,US,4.5,4333,$
2,Cauldron Ice Cream,icecream,1421 W MacArthur Blvd,Santa Ana,US,4.0,2614,$
3,Playground,newamerican,220 E 4th St,Santa Ana,US,4.5,2894,$$
4,Ritter's Steam Kettle Cooking,cajun,1421 W MacArthur Blvd,Santa Ana,US,4.0,2455,$$
5,Chicken Maison,mediterranean,3332 S Bristol St,Santa Ana,US,4.5,1840,$
6,Mastro's Steakhouse,steak,633 Anton Blvd,Costa Mesa,US,4.0,3020,$$$$
7,Din Tai Fung,taiwanese,3333 Bristol St,Costa Mesa,US,4.0,3844,$$
8,85°C Bakery Cafe,coffee,2700 Alton Pkwy,Irvine,US,4.0,5530,$
9,Peter's Gourmade Grill,burgers,14311 Newport Ave,Tustin,US,4.5,2212,$


# Sorting by Review Count

In [8]:
sorted_df_reviews = yelp_df.sort_values(by=["Review Count"], ascending=False)
sorted_df_reviews

Unnamed: 0,Name,Category,Street,City,Country,Rating,Review Count,$
8,85°C Bakery Cafe,coffee,2700 Alton Pkwy,Irvine,US,4.0,5530,$
1,Cream Pan,bakeries,602 El Camino Real,Tustin,US,4.5,4333,$
24,Gen Korean BBQ House,korean,13741 Newport Ave,Tustin,US,4.0,4020,$$
36,Bruxie,sandwiches,292 N Glassell St,Orange,US,4.0,3960,$
7,Din Tai Fung,taiwanese,3333 Bristol St,Costa Mesa,US,4.0,3844,$$
22,Habana,cuban,2930 Bristol St,Costa Mesa,US,4.0,3508,$$
6,Mastro's Steakhouse,steak,633 Anton Blvd,Costa Mesa,US,4.0,3020,$$$$
14,Bosscat Kitchen and Libations,newamerican,4647 MacArthur Blvd,Newport Beach,US,4.5,2905,$$
3,Playground,newamerican,220 E 4th St,Santa Ana,US,4.5,2894,$$
2,Cauldron Ice Cream,icecream,1421 W MacArthur Blvd,Santa Ana,US,4.0,2614,$


# Category Shares

In [6]:
# Counting up number in each category
biz_categories = yelp_df.groupby("Category").count()
# Resetting the index to Category
biz_categories.reset_index("Category", inplace=True)
categories = biz_categories["Category"]

# Labels for each category to be used in Plotly 
labels = categories

# Values for each category to be used in Plotly
category_count = biz_categories["Name"]
values = category_count
values

# Setting up arguments for Plotly pie chart 
trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename= "Pie_Chart_Categories")


Consider using IPython.display.IFrame instead



# Price Comparisons

In [7]:
# Grouping price ratings ($)
price_groups = yelp_df.groupby("$").count()
price_groups.reset_index("$", inplace=True)

labels = price_groups["$"]
values = price_groups["Name"]

trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename= "Pie_Chart_Price_Categories")

# Average Review Count vs. Category

In [47]:
# Grouping categories 
average_reviews = yelp_df.groupby("Category").mean()
average_reviews.reset_index("Category")

# Gathering top 5 categories with the highest number of reviews
top_five = average_reviews.nlargest(5,"Review Count")
top_five.reset_index("Category", inplace=True)
# Category names
categories = top_five["Category"]

# Category review counts
review_count = top_five["Review Count"]

# Setting up the bar chart
trace = go.Bar(
    x=categories,
    y=review_count,
    text=categories,
    marker=dict(
        color="rgb(158, 202, 225)",
        line=dict(
            color="rgb(8, 48,107)",
            width=1.5,
        )
    ),
    opacity=0.6
)

data=[trace]
layout=go.Layout(
    title="Top 5 Categories by Average Review Count",
)

# Bar chart 
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename="top-5-categories-by-average-review-count")