# DATA515 Yelp Reviews

## Visualization Example using MapBox

In [1]:
import os
from pathlib import Path

import pandas as pd
import numpy as np

from mapboxgl.utils import create_color_stops, df_to_geojson
from mapboxgl.viz import CircleViz

import sys
import re

In [3]:
sys.path.append(str(Path(os.getcwd()).parents[0]))

from yelp_reviews import (write_api_data, 
                          get_map_df, 
                          get_center, 
                          all_restaurants, 
                          yelp_search, 
                          get_indicators, 
                          get_filter_indicator_df
                         )

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


FileNotFoundError: [Errno 2] No such file or directory: 'yelp_reviews/reviews.csv'

In [3]:
#read token for MapBox, API key for Yelp Fusion API

dir_path = str(Path(os.getcwd()).parents[0])
access_token = 'pk.eyJ1IjoiZW1pOTAiLCJhIjoiY2tsaG9penkxMmY1cTJ2czZyNmQ5c3I2MCJ9.AaHgMWQdOv-SwzWj_nYvDg'
api_key = Path(os.path.join(dir_path, "code", "api_key.txt")).read_text()

In [4]:
#Set API search parameters.

params = {
    "terms" : "tacos",
    "location" : "University District, Seattle",
    "categories" : "restaurants"
}

In [8]:
#Retrieve restaurant data from Yelp API, return filepath where .csv file is saved
file_path = write_api_data(params)

In [9]:
#Read .csv file data to dataframe
#Clean dataframe to be readable for mapbox JSON format

df = pd.read_csv(file_path)
df_map = get_map_df(df)

In [10]:
#Return to JSON format, get centroid location from given dataset

rest_json = df_to_geojson(df_map.fillna(''),
                          properties=['name', 'rating', 'price'],
                          precision=4)

df_center = get_center(df_map)

In [11]:
#Encode color by price category
#Render mapbox map

category_color_stops = [['$', 'rgb(211,47,47)'],
                        ['$$', 'rgb(81,45,168)'],
                        ['$$$','rgb(2,136,209)'],
                        ['$$$$','rgb(255,160,0)']]

viz = CircleViz(rest_json,
                access_token = access_token,
                label_property = 'name',
                color_property = 'price',
                color_function_type = 'match',
                color_stops = category_color_stops,
                center = df_center,
                zoom = 13)

viz.show()



## Filter returned dataset

The data returned by Yelp API includes many restaurant that seems to have little to do with tacos.  
Further, we would like to filter the dataset by rating and/or price

In [14]:
df_filter = get_filter_indicator_df(df_map, "category", ["tacos", "texmex", "mexican"])

In [16]:
rest_json_filter = df_to_geojson(df_filter.fillna(''),
                          properties=['name', 'rating', 'price'],
                          precision=4)

df_center_filter = get_center(df_filter)

In [17]:
#Encode color by price category
#Render mapbox map

category_color_stops = [['$', 'rgb(211,47,47)'],
                        ['$$', 'rgb(81,45,168)'],
                        ['$$$','rgb(2,136,209)'],
                        ['$$$$','rgb(255,160,0)']]

viz = CircleViz(rest_json_filter,
                access_token = access_token,
                label_property = 'name',
                color_property = 'price',
                color_function_type = 'match',
                color_stops = category_color_stops,
                center = df_center,
                zoom = 13)

viz.show()



In [18]:
df_filter_rating = df_filter.loc[df_filter.rating > 3]

rest_json_filter_rating = df_to_geojson(df_filter_rating.fillna(''),
                          properties=['name', 'rating', 'price'],
                          precision=4)

df_center_filter_rating = get_center(df_filter_rating)

In [19]:
#Encode color by price category
#Render mapbox map

category_color_stops = [['$', 'rgb(211,47,47)'],
                        ['$$', 'rgb(81,45,168)'],
                        ['$$$','rgb(2,136,209)'],
                        ['$$$$','rgb(255,160,0)']]

viz = CircleViz(rest_json_filter_rating,
                access_token = access_token,
                label_property = 'name',
                color_property = 'price',
                color_function_type = 'match',
                color_stops = category_color_stops,
                center = df_center_filter_rating,
                zoom = 13)

viz.show()



In [52]:
df_map["transactions"][0]

"['delivery', 'restaurant_reservation']"

In [88]:
re.sub('[^A-Za-z0-9,_]+', '', txt).split(",")

['delivery', 'restaurant_reservation']

In [126]:
split_test = df_map["transactions"].apply(lambda x: re.sub('[^A-Za-z0-9,_]+', '', x).split(","))
test_df = pd.DataFrame(data = split_test)

In [152]:
test_ser = test_df["transactions"].apply(pd.Series)

In [148]:
test_ser["alias"] = df_map["alias"]

In [159]:
test_t = test_ser.transpose()
test_t = test_t == "delivery"
contains_delivery = test_t.sum()

In [160]:
test_ser["delivery"] = contains_delivery

In [173]:
for col in test_ser.columns:
    print(col)

0
1
2
delivery


In [196]:
def get_indicators(df, col_name):
    
    split_column = df[col_name].apply(lambda x: re.sub('[^A-Za-z0-9,_]+', '', x).split(","))
    temp_df = pd.DataFrame(data = split_column)
    split_df = temp_df[col_name].apply(pd.Series)
    
    col_list = []
    
    for col in split_df.columns:
        col_list.append(list(split_df[col].unique()))
    
    flat_list = list(set([trans for sublist in col_list for trans in sublist if trans not in [np.nan, ""]]))
    
    df_t = split_df.transpose()
    
    for item in flat_list:
        
        df_contains = df_t == item
        contains_ser = df_contains.sum()
        df[item] = contains_ser
        
    return df
    
#def get_unique_transactions(df):
    
    
    
    

In [194]:
col_list = get_transactions(df_map)

In [198]:
get_indicators(df_map, "category")

Unnamed: 0.1,Unnamed: 0,id,alias,name,url,review_count,rating,transactions,price,phone,...,shavedice,izakaya,pubs,dimsum,sushi,shanghainese,hainan,pizza,bagels,diners
0,0,ud5NuBZka9lVZdRlhQDrjQ,din-tai-fung-seattle,Din Tai Fung,https://www.yelp.com/biz/din-tai-fung-seattle?...,2243,4.0,"['delivery', 'restaurant_reservation']",$$,1.206525e+10,...,0,0,0,1,0,1,0,0,0,0
1,1,m_x3dvMGCvXF0Fj6AeauoA,morsel-seattle,Morsel,https://www.yelp.com/biz/morsel-seattle?adjust...,1033,4.5,"['delivery', 'pickup']",$,1.206268e+10,...,0,0,0,0,0,0,0,0,0,0
2,2,7F473Qfy0eRD6_5lgfWTSA,taste-of-india-seattle,Taste of India,https://www.yelp.com/biz/taste-of-india-seattl...,1667,4.5,"['delivery', 'pickup']",$$,1.206528e+10,...,0,0,0,0,0,0,0,0,0,0
3,3,NCDpIDp2f-DhPO5sL5Hbdw,xian-noodles-seattle-9,Xian Noodles,https://www.yelp.com/biz/xian-noodles-seattle-...,853,4.5,['delivery'],$,1.206523e+10,...,0,0,0,0,0,0,0,0,0,0
4,4,9YeSEzr8HZMCuQlAyr8FPw,thai-tom-seattle,Thai Tom,https://www.yelp.com/biz/thai-tom-seattle?adju...,2232,3.5,[],$,1.206549e+10,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
631,631,BIldC_eQhtNzmXhCSRSOGA,the-bistro-seattle-3,The Bistro,https://www.yelp.com/biz/the-bistro-seattle-3?...,1,3.0,[],,1.206213e+10,...,0,0,0,0,0,0,0,0,0,0
632,632,zJmCi6rbqzwveazbekd63A,alley-cat-seattle,Alley Cat,https://www.yelp.com/biz/alley-cat-seattle?adj...,1,5.0,['delivery'],,1.206588e+10,...,0,0,0,0,0,0,0,0,0,0
633,633,ageaxK4wI-J6EmsnKEH3lQ,lagétta-pasta-and-pub-seattle,Lagétta Pasta & Pub,https://www.yelp.com/biz/lag%C3%A9tta-pasta-an...,1,5.0,"['delivery', 'pickup']",,1.206688e+10,...,0,0,1,0,0,0,0,0,0,0
634,634,hR4F4N8BFtpfZ7JKkQzfzQ,greenlake-boathouse-cafe-seattle,Greenlake Boathouse Cafe,https://www.yelp.com/biz/greenlake-boathouse-c...,1,1.0,['delivery'],,1.206527e+10,...,0,0,0,0,0,0,0,0,0,0


In [136]:
test_t[0].str.contains("delivery")

0     True
1    False
2      NaN
Name: 0, dtype: object

In [45]:
df_map["transactions"].str.split(",", expand = True)

Unnamed: 0,0,1,2
0,['delivery','restaurant_reservation'],
1,['delivery','pickup'],
2,['delivery','pickup'],
3,['delivery'],,
4,[],,
...,...,...,...
631,[],,
632,['delivery'],,
633,['delivery','pickup'],
634,['delivery'],,
