# Exploring the data
---
*With pandas!*

In [1]:
from parse_json import *

## Foursquare:

In [2]:
json_obj = json_from_file("Foursquare_restaurants_p1.json")
foursquare = parse_object_to_df(
    json_obj, legend_discrepencies=FOURSQUARE_LEGEND_DISCREPANCIES
)
foursquare["RatingCount"] = post_process_series(foursquare["RatingCount"])
foursquare.tail()

50


Unnamed: 0,Name,Price,Type,Rating,RatingCount,Source
45,BeaverTails Canada Inc,1.0,"[desserts, cheese, lunch, chocolate, pastries,...",8.6,221.0,Foursquare
46,Ahora Mexican Cuisine,2.0,"[chicken, Mexican food, lunch, cocktails, taco...",7.8,135.0,Foursquare
47,Chez Lucien,2.0,"[good service, beer, burgers, fruit, cheese, s...",8.8,257.0,Foursquare
48,El Camino,1.0,"[cocktails, tacos, chips and salsa, ox tongue,...",8.8,31.0,Foursquare
49,The Keg Steakhouse + Bar,3.0,"[chicken, cocktails, steak, shrimp, outdoor se...",8.2,98.0,Foursquare


## Yelp:

In [3]:
json_obj = json_from_file("Yelp_restaurants.json")
yelp = parse_object_to_df(
    json_obj, "businesses", legend_discrepencies=YELP_LEGEND_DISCREPANCIES
)
yelp.tail()

50


Unnamed: 0,Name,Price,Type,Rating,RatingCount,Source
45,Fiazza Fresh Fired,$$,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,136,Yelp
46,Benny's Bistro,$$,"[{'alias': 'bistros', 'title': 'Bistros'}]",4.5,113,Yelp
47,La Bottega Nicastro Fine Food Shop,$$,"[{'alias': 'delis', 'title': 'Delis'}, {'alias...",4.5,153,Yelp
48,Ahora Mexican Cuisine,$$,"[{'alias': 'mexican', 'title': 'Mexican'}, {'a...",3.5,179,Yelp
49,BeaverTails,$,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",4.5,246,Yelp


## Google:

In [4]:
# Load in the seperate files.
google = pd.DataFrame()
for integer in range(1, 4):
    file_name = f"Google_restaurants_p{integer}.json"
    json_obj = json_from_file(file_name)
    temp = parse_object_to_df(
        json_obj, legend_discrepencies=GOOGLE_LEGEND_DISCREPANCIES
    )
    google = google.append(temp, True)
google.tail()

20
20
20


Unnamed: 0,Name,Price,Type,Rating,RatingCount,Source
55,Lone Star Texas Grill,2.0,"[restaurant, food, point_of_interest, establis...",4.1,1789,Google
56,The King Eddy,2.0,"[meal_delivery, meal_takeaway, bar, restaurant...",4.2,2353,Google
57,iPho,,"[restaurant, food, point_of_interest, establis...",3.7,366,Google
58,Blue Cactus Bar and Grill,2.0,"[restaurant, food, point_of_interest, establis...",3.9,1555,Google
59,Le Casablanca,,"[meal_delivery, meal_takeaway, restaurant, foo...",4.4,211,Google


In [5]:
# Keep a random 50 rows from Google to equal the sample size. Random state for reproducibility.
google = google.sample(50, axis="index", random_state=411)
google.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 50 entries, 54 to 46
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Name         50 non-null     object 
 1   Price        38 non-null     float64
 2   Type         50 non-null     object 
 3   Rating       50 non-null     float64
 4   RatingCount  50 non-null     int64  
 5   Source       50 non-null     object 
dtypes: float64(2), int64(1), object(3)
memory usage: 2.7+ KB


## Comparisons

In [6]:
print(
    "Total ratings:",
    f'Foursquare: {int(foursquare["RatingCount"].sum())}',
    f'Yelp: {yelp["RatingCount"].sum()}',
    f'Google: {google["RatingCount"].sum()}',
    sep="\n",
)

Total ratings:
Foursquare: 3808
Yelp: 3759
Google: 52672


In [7]:
# top 20 from each:
foursquare[["Name", "Rating", "Price"]].sort_values(by="Rating", ascending=False).head(
    20
)

Unnamed: 0,Name,Rating,Price
48,El Camino,8.8,1.0
47,Chez Lucien,8.8,2.0
45,BeaverTails Canada Inc,8.6,1.0
38,Play Food & Wine,8.5,3.0
40,E18hteen,8.4,4.0
44,Fiazza Fresh Fired,8.3,1.0
49,The Keg Steakhouse + Bar,8.2,3.0
35,Bridgehead Coffee House,8.1,2.0
34,Sidedoor Contemporary Kitchen & Bar,8.0,3.0
39,Zak's Diner,8.0,2.0


In [8]:
# top 10 from each:
yelp[["Name", "Rating", "Price"]].sort_values(by="Rating", ascending=False).head(20)

Unnamed: 0,Name,Rating,Price
49,BeaverTails,4.5,$
42,Corazón De Maíz,4.5,$
28,Level One Game Pub,4.5,$$
34,Shanghai Wonton Noodle,4.5,$
35,Food Mood: Korean & Japanese Kitchen,4.5,$$
16,Pili Pili,4.5,$$
13,Sur Lie Restaurant,4.5,$$$$
24,Oz Kafe,4.5,$$$
23,La Catrina,4.5,$$$
8,Gyubee Japanese Grill - Ottawa,4.5,$$$


In [9]:
google[["Name", "Rating", "Price"]].sort_values(by="Rating", ascending=False).head(20)

Unnamed: 0,Name,Rating,Price
1,"La Bottega Nicastro, ByWard Market",4.7,
48,BeaverTails Byward Market,4.6,1.0
18,Chez Lucien,4.6,2.0
25,il Perugino Caffè & Bar,4.6,2.0
46,Fairouz Cafe,4.5,3.0
38,Fatboys Southern Smokehouse,4.5,2.0
39,"Oz Kafe, ByWard Market, Ottawa",4.5,
53,Benny's Bistro,4.5,2.0
9,Restaurant e18hteen,4.5,
19,ByWard Market,4.5,


Beavertails consistently ranks among the highest for all platforms!