# Dependencies

In [1]:
# Dependencies
import pandas as pd
import re
import requests
import math 
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
from pprint import pprint
from config import api_key, plotly_key
from yelpapi import YelpAPI

# Ideas
<ul><li>Average rating vs. category
<li>Average rating vs. price rating
<li>Number of restaurant categories per city 
<li>Review Count vs. city 

# Useful Links 
<ul><li><a href="https://alcidanalytics.com/p/geographic-heatmap-in-python">Heat maps</a>

# API Call

In [2]:
# API call to Yelp API
yelp_api = YelpAPI(api_key)
# Input string for location search
input_string = input("Search query: ")
api_call = yelp_api.search_query(location=input_string, limit=50)

# API Call to Plotly 
plotly.tools.set_credentials_file(username='nguyenkevint94', api_key=plotly_key)

# Delete hashtag to view the contents of api_call
# pprint(api_call)

Search query: Irvine


In [3]:
# Lists
business_names_list = []
categories_list = []
street_address_list = []
city_list = []
country_list = []
lat_list = []
lon_list = []
ratings_list = []
review_count_list = []
price_ratings = []

# Looping through each business in the call
for businesses in api_call["businesses"]:
    try:
        # Name
        name = businesses["name"]
        # print(f"Successfully found business name: {name}")

        # Category 
        category = businesses["categories"][0]["alias"]
        # print(f"Successfully found category: {category}")

        # Street Address
        street_address = businesses["location"]["address1"]
        # print(f"Successfully found street address: {street_address}")

        # City
        city = businesses["location"]["city"]
        # print(f"Successfully found city: {city}")

        # Country
        country = businesses["location"]["country"]
        # print(f"Successfully found country: {country}")
        
        # Latitude
        lat = businesses["coordinates"]["latitude"]
        # print(f"Successfully found latitude: {lat}")
        
        #Longitude 
        lon = businesses["coordinates"]["longitude"]
        # print(f"Successfully found longitude: {lon}")

        # Price rating
        # NOTE: Some places do not have a price rating (ie. $, $$, $$$)
        price = businesses["price"]
        # print(f"Successfully found price rating: {price}")

        # Ratings
        rating = businesses["rating"]
        # print(f"Successfully found rating: {rating}")

        # Review count
        review_count = businesses["review_count"]
        # print(f"Successfully found review counts: {review_count}")
        
        # Appends
        # Tried putting appends after each section, adding them towards the end made it work 
        # with no error since it passes those without a price rating
        business_names_list.append(name) 
        categories_list.append(category)
        street_address_list.append(street_address)
        city_list.append(city)
        country_list.append(country)
        lat_list.append(lat)
        lon_list.append(lon)
        price_ratings.append(price)
        ratings_list.append(rating)
        review_count_list.append(review_count)
        
        # print("- - - - - - - - - - - - - - - - - - - - - -")
    except Exception:
        pass

# Yelp DataFrame

In [4]:
# Dictionary for DataFrame
business_details_dict = ({"Name": business_names_list,
                          "Category": categories_list,
                          "Street": street_address_list,
                          "City": city_list,
                          "Country": country_list,
                          "Latitude": lat_list,
                          "Longitude": lon_list,
                          "Rating": ratings_list,
                          "Review Count": review_count_list,
                          "$": price_ratings})

# Dictionary to DataFrame
yelp_df = pd.DataFrame(business_details_dict)
yelp_df.head()

Unnamed: 0,Name,Category,Street,City,Country,Latitude,Longitude,Rating,Review Count,$
0,85°C Bakery Cafe,coffee,2700 Alton Pkwy,Irvine,US,33.68801,-117.834029,4.0,5529,$
1,North Italia,pizza,2957 Michelson Dr,Irvine,US,33.67179,-117.84507,4.5,2395,$$
2,HiroNori Craft Ramen,ramen,2222 Michelson Dr,Irvine,US,33.676153,-117.855475,4.5,1677,$$
3,California Fish Grill,seafood,3988 Barranca Pkwy,Irvine,US,33.684235,-117.809287,4.0,2529,$$
4,Fukada,japanese,8683 Irvine Center Dr,Irvine,US,33.644121,-117.743129,4.0,2398,$$


# Sorting by Review Count

In [5]:
sorted_df_reviews = yelp_df.sort_values(by=["Review Count"], ascending=False)
sorted_df_reviews

Unnamed: 0,Name,Category,Street,City,Country,Latitude,Longitude,Rating,Review Count,$
0,85°C Bakery Cafe,coffee,2700 Alton Pkwy,Irvine,US,33.68801,-117.834029,4.0,5529,$
36,Cream Pan,bakeries,602 El Camino Real,Tustin,US,33.739824,-117.823845,4.5,4336,$
3,California Fish Grill,seafood,3988 Barranca Pkwy,Irvine,US,33.684235,-117.809287,4.0,2529,$$
4,Fukada,japanese,8683 Irvine Center Dr,Irvine,US,33.644121,-117.743129,4.0,2398,$$
1,North Italia,pizza,2957 Michelson Dr,Irvine,US,33.67179,-117.84507,4.5,2395,$$
25,BCD Tofu House,korean,2700 Alton Pkwy,Irvine,US,33.688572,-117.83393,3.5,2154,$$
6,House of Shabu Shabu,japanese,5394 Walnut Ave,Irvine,US,33.690066,-117.770704,4.0,2079,$$
12,CUCINA enoteca Irvine,italian,532 Spectrum Center Dr,Irvine,US,33.65176,-117.746378,4.0,2025,$$
17,All That Barbecue,korean,15333 Culver Dr,Irvine,US,33.696726,-117.798275,4.0,1873,$$
11,A & J Restaurant,chinese,14805 Jeffrey Rd,Irvine,US,33.688076,-117.771562,4.0,1758,$


# Category Shares

In [6]:
# Counting up number in each category
biz_categories = yelp_df.groupby("Category").count()
# Resetting the index to Category
biz_categories.reset_index("Category", inplace=True)
categories = biz_categories["Category"]

# Labels for each category to be used in Plotly 
labels = categories

# Values for each category to be used in Plotly
category_count = biz_categories["Name"]
values = category_count
values

# Setting up arguments for Plotly pie chart 
trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename= "Pie_Chart_Categories")


Consider using IPython.display.IFrame instead



# Price Comparisons

In [7]:
# Grouping price ratings ($)
price_groups = yelp_df.groupby("$").count()
price_groups.reset_index("$", inplace=True)

labels = price_groups["$"]
values = price_groups["Name"]

trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename= "Pie_Chart_Price_Categories")

# Average Review Count vs. Category

In [8]:
# Grouping categories 
average_reviews = yelp_df.groupby("Category").mean()
average_reviews.reset_index("Category")

# Gathering top 5 categories with the highest number of reviews
top_five = average_reviews.nlargest(10,"Review Count")
top_five.reset_index("Category", inplace=True)
# Category names
categories = top_five["Category"]

# Category review counts
review_count = top_five["Review Count"]

# Setting up the bar chart
trace = go.Bar(
    x=categories,
    y=review_count,
    text=categories,
    marker=dict(
        color="rgb(158, 202, 225)",
        line=dict(
            color="rgb(8, 48,107)",
            width=1.5,
        )
    ),
    opacity=0.6
)

data=[trace]
layout=go.Layout(
    title="Top 10 Categories by Average Review Count",
)

# Bar chart 
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename="top-10-categories-by-average-review-count")