# Dependencies

In [2]:
# Dependencies
import pandas as pd
import re
import requests
import math 
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
from pprint import pprint
from config import api_key, plotly_key
from yelpapi import YelpAPI

# Ideas
<ul><li>Average rating vs. category
<li>Average rating vs. price rating
<li>Number of restaurant categories per city 
<li>Review Count vs. city 

# Useful Links 
<ul><li><a href="https://alcidanalytics.com/p/geographic-heatmap-in-python">Heat maps</a>

# API Call

In [3]:
# API call to Yelp API
yelp_api = YelpAPI(api_key)
# Input string for location search
input_string = input("Search query: ")
api_call = yelp_api.search_query(location=input_string, limit=50)

# API Call to Plotly 
plotly.tools.set_credentials_file(username='nguyenkevint94', api_key=plotly_key)

# Delete hashtag to view the contents of api_call
# pprint(api_call)

Search query: Atlanta, GA


In [4]:
# Lists
business_names_list = []
categories_list = []
street_address_list = []
city_list = []
country_list = []
lat_list = []
lon_list = []
ratings_list = []
review_count_list = []
price_ratings = []

# Looping through each business in the call
for businesses in api_call["businesses"]:
    try:
        # Name
        name = businesses["name"]
        # print(f"Successfully found business name: {name}")

        # Category 
        category = businesses["categories"][0]["alias"]
        # print(f"Successfully found category: {category}")

        # Street Address
        street_address = businesses["location"]["address1"]
        # print(f"Successfully found street address: {street_address}")

        # City
        city = businesses["location"]["city"]
        # print(f"Successfully found city: {city}")

        # Country
        country = businesses["location"]["country"]
        # print(f"Successfully found country: {country}")
        
        # Latitude
        lat = businesses["coordinates"]["latitude"]
        # print(f"Successfully found latitude: {lat}")
        
        #Longitude 
        lon = businesses["coordinates"]["longitude"]
        # print(f"Successfully found longitude: {lon}")

        # Price rating
        # NOTE: Some places do not have a price rating (ie. $, $$, $$$)
        price = businesses["price"]
        # print(f"Successfully found price rating: {price}")

        # Ratings
        rating = businesses["rating"]
        # print(f"Successfully found rating: {rating}")

        # Review count
        review_count = businesses["review_count"]
        # print(f"Successfully found review counts: {review_count}")
        
        # Appends
        # Tried putting appends after each section, adding them towards the end made it work 
        # with no error since it passes those without a price rating
        business_names_list.append(name) 
        categories_list.append(category)
        street_address_list.append(street_address)
        city_list.append(city)
        country_list.append(country)
        lat_list.append(lat)
        lon_list.append(lon)
        price_ratings.append(price)
        ratings_list.append(rating)
        review_count_list.append(review_count)
        
        # print("- - - - - - - - - - - - - - - - - - - - - -")
    except Exception:
        pass

# Yelp DataFrame

In [5]:
# Dictionary for DataFrame
business_details_dict = ({"Name": business_names_list,
                          "Category": categories_list,
                          "Street": street_address_list,
                          "City": city_list,
                          "Country": country_list,
                          "Latitude": lat_list,
                          "Longitude": lon_list,
                          "Rating": ratings_list,
                          "Review Count": review_count_list,
                          "$": price_ratings})

# Dictionary to DataFrame
yelp_df = pd.DataFrame(business_details_dict)
yelp_df.head()

Unnamed: 0,Name,Category,Street,City,Country,Latitude,Longitude,Rating,Review Count,$
0,Poor Calvin's,asianfusion,510 Piedmont Ave NE,Atlanta,US,33.7684,-84.38226,4.5,2482,$$
1,Aviva by Kameel,mediterranean,225 Peachtree St NE,Atlanta,US,33.760538,-84.386555,5.0,1220,$$
2,Fox Bros. Bar-B-Q,bbq,1238 Dekalb Ave Ne,Atlanta,US,33.761051,-84.347502,4.5,3195,$$
3,South City Kitchen - Midtown,southern,1144 Crescent Ave NE,Atlanta,US,33.786,-84.38456,4.5,2223,$$
4,Antico Pizza,pizza,1093 Hemphill Ave NW,Atlanta,US,33.784642,-84.405652,4.0,2834,$$


# Sorting by Review Count

In [6]:
sorted_df_reviews = yelp_df.sort_values(by=["Review Count"], ascending=False)
sorted_df_reviews

Unnamed: 0,Name,Category,Street,City,Country,Latitude,Longitude,Rating,Review Count,$
2,Fox Bros. Bar-B-Q,bbq,1238 Dekalb Ave Ne,Atlanta,US,33.761051,-84.347502,4.5,3195,$$
9,Mary Mac's Tea Room,southern,224 Ponce De Leon Ave NE,Atlanta,US,33.77286,-84.37988,4.0,3140,$$
4,Antico Pizza,pizza,1093 Hemphill Ave NW,Atlanta,US,33.784642,-84.405652,4.0,2834,$$
0,Poor Calvin's,asianfusion,510 Piedmont Ave NE,Atlanta,US,33.7684,-84.38226,4.5,2482,$$
20,Two Urban Licks,bars,820 Ralph McGill Blvd NE,Atlanta,US,33.768456,-84.361274,4.0,2256,$$
3,South City Kitchen - Midtown,southern,1144 Crescent Ave NE,Atlanta,US,33.786,-84.38456,4.5,2223,$$
5,Atlanta Breakfast Club,breakfast_brunch,249 Ivan Allen Jr Blvd,Atlanta,US,33.764665,-84.39545,4.5,1931,$$
7,Fat Matt's Rib Shack,bbq,1811 Piedmont Ave NE,Atlanta,US,33.804595,-84.367137,4.0,1922,$$
23,FLIP burger boutique,burgers,1587 Howell Mill Rd,Atlanta,US,33.798343,-84.415749,4.0,1699,$$
8,Canoe,newamerican,4199 Paces Ferry Rd SE,Atlanta,US,33.859863,-84.455464,4.5,1555,$$$


# Category Shares

In [7]:
# Counting up number in each category
biz_categories = yelp_df.groupby("Category").count()
# Resetting the index to Category
biz_categories.reset_index("Category", inplace=True)
categories = biz_categories["Category"]

# Labels for each category to be used in Plotly 
labels = categories

# Values for each category to be used in Plotly
category_count = biz_categories["Name"]
values = category_count
values

# Setting up arguments for Plotly pie chart 
trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename= "Pie_Chart_Categories")


Consider using IPython.display.IFrame instead



# Price Comparisons

In [8]:
# Grouping price ratings ($)
price_groups = yelp_df.groupby("$").count()
price_groups.reset_index("$", inplace=True)

labels = price_groups["$"]
values = price_groups["Name"]

trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename= "Pie_Chart_Price_Categories")

# Average Review Count vs. Category

In [9]:
# Grouping categories 
average_reviews = yelp_df.groupby("Category").mean()
average_reviews.reset_index("Category")

# Gathering top 5 categories with the highest number of reviews
top_five = average_reviews.nlargest(10,"Review Count")
top_five.reset_index("Category", inplace=True)
# Category names
categories = top_five["Category"]

# Category review counts
review_count = top_five["Review Count"]

# Setting up the bar chart
trace = go.Bar(
    x=categories,
    y=review_count,
    text=categories,
    marker=dict(
        color="rgb(158, 202, 225)",
        line=dict(
            color="rgb(8, 48,107)",
            width=1.5,
        )
    ),
    opacity=0.6
)

data=[trace]
layout=go.Layout(
    title="Top 10 Categories by Average Review Count",
)

# Bar chart 
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename="top-10-categories-by-average-review-count")