# DATA515 Yelp Reviews

## Visualization Example using MapBox

In [1]:
import os
from pathlib import Path

import pandas as pd
import numpy as np

from mapboxgl.utils import create_color_stops, df_to_geojson
from mapboxgl.viz import CircleViz

import sys
import re

import ipywidgets as widgets

In [2]:
print(sys.path.append(str(Path(os.getcwd()).parents[0])))
sys.path.append(str(Path(os.getcwd()).parents[0]))

None


In [3]:
from yelp_reviews import (write_api_data, 
                          get_map_df, 
                          get_center, 
                          all_restaurants, 
                          yelp_search, 
                          get_indicators, 
                          get_filter_indicator_df,
                          get_latest_reviews)

In [4]:
#Set API search parameters.

params = {"terms" : "tacos",
          "location" : "University District, Seattle",
          "categories" : "restaurants"}

In [5]:
#read token for MapBox, API key for Yelp Fusion API

def read_api_key():
    dir_path = str(Path(os.getcwd()).parents[0])
    api_key = Path(os.path.join(dir_path, "yelp_reviews", 
                                "api_key.txt")).read_text()
    return api_key

print(read_api_key())

Y0vpAcCzpLY3l5VSChBzAcRpy-JrWmmaOenfUf-AGrC4lKtc79YDH503ZZSURFVGsAx_I1-Xo0T6YykBPmaOalvnGubVhpIH_K0kfIcWEh0FLftyNyUQ75MXaW0wYHYx


In [6]:
def get_map_df_func(file_path, params, from_api = False):
    if from_api:
        api_key = read_api_key()
        df = write_api_data(api_key, params)
    else:  
        df = pd.read_csv(file_path)
    return df

In [7]:
get_map_df_func('', params, True)

'/Users/chenqingyu/Desktop/515project/Yelp-Reviews/data/api_data.csv'

In [8]:
dir_path = str(Path(os.getcwd()).parents[0])
dir_path

'/Users/chenqingyu/Desktop/515project/Yelp-Reviews'

In [10]:
file_path_taco = os.path.join(dir_path, "data", "api_data_tacos.csv")
file_path_taco

'/Users/chenqingyu/Desktop/515project/Yelp-Reviews/data/api_data_tacos.csv'

In [11]:
#Read .csv file data to dataframe
#Clean dataframe to be readable for mapbox JSON format

df = pd.read_csv(file_path_taco)
df_map = get_map_df(file_path_taco, params, False)
df_map

Unnamed: 0.1,Unnamed: 0,id,alias,name,url,review_count,rating,transactions,price,phone,category,lat,lon
0,0,CFpVuZtTXNmIRRkNaT_fXg,el-camion-seattle-16,El Camion,https://www.yelp.com/biz/el-camion-seattle-16?...,82,4.0,['delivery'],$,,"foodtrucks,mexican",47.661607,-122.287411
1,1,L7EpZh_kFu8NSFrAeXY3ag,off-the-rez-seattle,Off the Rez,https://www.yelp.com/biz/off-the-rez-seattle?a...,195,4.0,"['delivery', 'pickup']",$,1.206415e+10,"foodtrucks,burgers,tacos",47.659920,-122.311825
2,2,USoCUrnuGw1mwzVpo_dlmA,guanacos-tacos-pupuseria-seattle,Guanacos Tacos Pupuseria,https://www.yelp.com/biz/guanacos-tacos-pupuse...,338,4.0,"['delivery', 'pickup']",$$,1.206547e+10,"salvadoran,tacos",47.657141,-122.314029
3,3,Nm0amaHjKa97jR-PeJq6Zw,tnt-taqueria-seattle,TNT Taqueria,https://www.yelp.com/biz/tnt-taqueria-seattle?...,444,4.0,['delivery'],$,1.206322e+10,mexican,47.661509,-122.332939
4,4,Y_smWaJnZrOT3-uRBGkgLA,agua-verde-cafe-seattle,Agua Verde Cafe,https://www.yelp.com/biz/agua-verde-cafe-seatt...,996,3.5,"['delivery', 'pickup']",$$,1.206546e+10,"mexican,venues,breakfast_brunch",47.651610,-122.314410
...,...,...,...,...,...,...,...,...,...,...,...,...,...
151,151,wt3amcB6YvO2TjoO-8hxKg,the-lodge-sports-grille-seattle-3,The Lodge Sports Grille,https://www.yelp.com/biz/the-lodge-sports-gril...,198,2.5,['delivery'],$$,1.206402e+10,"sportsbars,tradamerican",47.690728,-122.355599
152,152,Qo-l9tezF_ApTFzUhnPK1w,revel-seattle,Revel,https://www.yelp.com/biz/revel-seattle?adjust_...,1386,3.5,"['delivery', 'pickup']",$$,1.206547e+10,"korean,newamerican,cocktailbars",47.652033,-122.354123
153,153,6OVriioGG7w7Nwq5Q25WtA,portage-bay-cafe-roosevelt-seattle,Portage Bay Cafe - Roosevelt,https://www.yelp.com/biz/portage-bay-cafe-roos...,2427,4.0,"['restaurant_reservation', 'delivery', 'pickup']",$$,1.206548e+10,"newamerican,breakfast_brunch,cafes",47.657570,-122.317600
154,154,gP921Ghfb7ERWmOjl7r-ng,ivars-salmon-house-seattle-4,Ivars Salmon House,https://www.yelp.com/biz/ivars-salmon-house-se...,1228,3.5,"['pickup', 'delivery']",$$,1.206632e+10,"seafood,lounges,tradamerican",47.653620,-122.324040


In [21]:
filename1 = './data/reviews.csv'
filename2 = './data/reviewCountOnPage.csv'
df_map["latest_reviews"] = get_latest_reviews(filename1, filename2)

In [22]:
df_map["latest_reviews_cleaned"] = df_map["latest_reviews"].str.replace(",", "")

In [26]:
#Return to JSON format, get centroid location from given dataset



#'latest_reviews_cleaned' disabled for now

rest_json = df_to_geojson(df_map.fillna(''),
                          properties=['name', 'rating', 'price'],
                          precision=4)

df_center = get_center(df_map)

In [27]:
rest_json

{"features": [{"geometry": {"coordinates": [-122.2874, 47.6616], "type": "Point"}, "properties": {"name": "El Camion", "price": "$", "rating": 4.0}, "type": "Feature"}, {"geometry": {"coordinates": [-122.3118, 47.6599], "type": "Point"}, "properties": {"name": "Off the Rez", "price": "$", "rating": 4.0}, "type": "Feature"}, {"geometry": {"coordinates": [-122.314, 47.6571], "type": "Point"}, "properties": {"name": "Guanacos Tacos Pupuseria", "price": "$$", "rating": 4.0}, "type": "Feature"}, {"geometry": {"coordinates": [-122.3329, 47.6615], "type": "Point"}, "properties": {"name": "TNT Taqueria", "price": "$", "rating": 4.0}, "type": "Feature"}, {"geometry": {"coordinates": [-122.3144, 47.6516], "type": "Point"}, "properties": {"name": "Agua Verde Cafe", "price": "$$", "rating": 3.5}, "type": "Feature"}, {"geometry": {"coordinates": [-122.3265, 47.6612], "type": "Point"}, "properties": {"name": "Rancho Bravo Tacos", "price": "$", "rating": 4.0}, "type": "Feature"}, {"geometry": {"coord

In [33]:
#Encode color by price category
#Render mapbox map
access_token = read_api_key()

category_color_stops = [['$', 'rgb(211,47,47)'],
                        ['$$', 'rgb(81,45,168)'],
                        ['$$$','rgb(2,136,209)'],
                        ['$$$$','rgb(255,160,0)']]

viz = CircleViz(rest_json,
                access_token = access_token,
                label_property = 'name',
                color_property = 'price',
                color_function_type = 'match',
                color_stops = category_color_stops,
                center = df_center,
                zoom = 13)


viz.show()



## Filter returned dataset

The data returned by Yelp API includes many restaurant that seems to have little to do with tacos.  
Further, we would like to filter the dataset by rating and/or price

In [34]:
df_filter = get_filter_indicator_df(df_map, "category", ["tacos", "texmex", "mexican"])

In [35]:
rest_json_filter = df_to_geojson(df_filter.fillna(''),
                          properties=['name', 'rating', 'price'],
                          precision=4)

df_center_filter = get_center(df_filter)

In [36]:
#Encode color by price category
#Render mapbox map

category_color_stops = [['$', 'rgb(211,47,47)'],
                        ['$$', 'rgb(81,45,168)'],
                        ['$$$','rgb(2,136,209)'],
                        ['$$$$','rgb(255,160,0)']]

viz = CircleViz(rest_json_filter,
                access_token = access_token,
                label_property = 'name',
                color_property = 'price',
                color_function_type = 'match',
                color_stops = category_color_stops,
                center = df_center,
                zoom = 13)

viz.show()



In [37]:
df_filter_rating = df_filter.loc[df_filter.rating > 3]

rest_json_filter_rating = df_to_geojson(df_filter_rating.fillna(''),
                          properties=['name', 'rating', 'price'],
                          precision=4)

df_center_filter_rating = get_center(df_filter_rating)

In [38]:
#Encode color by price category
#Render mapbox map

category_color_stops = [['$', 'rgb(211,47,47)'],
                        ['$$', 'rgb(81,45,168)'],
                        ['$$$','rgb(2,136,209)'],
                        ['$$$$','rgb(255,160,0)']]

viz = CircleViz(rest_json_filter_rating,
                access_token = access_token,
                label_property = 'name',
                color_property = 'price',
                color_function_type = 'match',
                color_stops = category_color_stops,
                center = df_center_filter_rating,
                zoom = 13)

viz.show()



In [None]:
df_map["transactions"][0]

In [None]:
re.sub('[^A-Za-z0-9,_]+', '', txt).split(",")

In [None]:
split_test = df_map["transactions"].apply(lambda x: re.sub('[^A-Za-z0-9,_]+', '', x).split(","))
test_df = pd.DataFrame(data = split_test)

In [None]:
test_ser = test_df["transactions"].apply(pd.Series)

In [None]:
test_ser["alias"] = df_map["alias"]

In [None]:
test_t = test_ser.transpose()
test_t = test_t == "delivery"
contains_delivery = test_t.sum()

In [None]:
test_ser["delivery"] = contains_delivery

In [None]:
for col in test_ser.columns:
    print(col)

In [None]:
def get_indicators(df, col_name):
    
    split_column = df[col_name].apply(lambda x: re.sub('[^A-Za-z0-9,_]+', '', x).split(","))
    temp_df = pd.DataFrame(data = split_column)
    split_df = temp_df[col_name].apply(pd.Series)
    
    col_list = []
    
    for col in split_df.columns:
        col_list.append(list(split_df[col].unique()))
    
    flat_list = list(set([trans for sublist in col_list for trans in sublist if trans not in [np.nan, ""]]))
    
    df_t = split_df.transpose()
    
    for item in flat_list:
        
        df_contains = df_t == item
        contains_ser = df_contains.sum()
        df[item] = contains_ser
        
    return df
    
#def get_unique_transactions(df):
    
    
    
    

In [None]:
col_list = get_transactions(df_map)

In [None]:
get_indicators(df_map, "category")

In [None]:
test_t[0].str.contains("delivery")

In [None]:
df_map["transactions"].str.split(",", expand = True)