In [1]:
import pandas as pd
from pathlib import Path

In [2]:
# load data
data_folder = Path.cwd() / '../datasets'
df = pd.read_csv(data_folder / 'restaurants_dk.csv')

In [5]:
df.columns

Index(['restaurant_id', 'restaurant_name', 'country', 'city', 'awards',
       'popularity_detailed', 'top_tags', 'price_level', 'price_range',
       'cuisines', 'special_diets', 'vegetarian_friendly', 'vegan_options',
       'gluten_free', 'avg_rating', 'total_reviews_count', 'food', 'service',
       'value', 'atmosphere', 'keywords'],
      dtype='object')

In [17]:
df

Unnamed: 0,restaurant_id,restaurant_name,country,city,awards,popularity_detailed,top_tags,price_level,price_range,cuisines,...,vegetarian_friendly,vegan_options,gluten_free,avg_rating,total_reviews_count,food,service,value,atmosphere,keywords
0,g1023816-d15059813,French Chicken,Denmark,Taarnby,,#1 of 4 Restaurants in Taarnby,"French, Philippine, Fast food, European",,,"French, Philippine, Fast food, European, Asian...",...,N,N,N,5.0,2.0,,,,,
1,g1023816-d15613910,Tang Bistro,Denmark,Taarnby,,#4 of 4 Restaurants in Taarnby,"Mid-range, Cafe, European, Danish",€€-€€€,,"Cafe, European, Danish, Norwegian",...,N,N,N,2.5,11.0,,,,,
2,g1023816-d17398468,Burger King,Denmark,Taarnby,,#3 of 4 Restaurants in Taarnby,,,,,...,N,N,N,3.0,1.0,,,,,
3,g1023816-d2201829,Restaurant Koebenhavn,Denmark,Taarnby,,#2 of 4 Restaurants in Taarnby,"European, Danish",,,"European, Danish",...,N,N,N,4.5,4.0,,,,,
4,g1056631-d10284114,Hanstholm Madbar,Denmark,Hanstholm,"Travellers' Choice, Certificate of Excellence ...",#1 of 9 Restaurants in Hanstholm,"Mid-range, Seafood, European, Grill",€€-€€€,€9-€34,"Seafood, European, Grill, Gastropub, Scandinav...",...,Y,Y,N,4.5,219.0,4.5,4.5,4.5,,"seafood, great food, a great experience, cozy ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9480,g910730-d939094,Restaurant Brasseriet,Denmark,Holte,,#7 of 15 Restaurants in Holte,Mid-range,€€-€€€,,,...,N,N,N,3.5,32.0,4.0,4.0,4.0,4.0,
9481,g910730-d950918,La Vecchia Enoteca,Denmark,Holte,,#2 of 15 Restaurants in Holte,"Mid-range, Italian, Mediterranean, European",€€-€€€,,"Italian, Mediterranean, European",...,N,N,N,4.0,207.0,4.0,3.5,3.5,4.0,
9482,g947971-d12535404,Strandby Badehotel Restaurant,Denmark,Strandby,,#2 of 2 Restaurants in Strandby,"Mid-range, Seafood, European, Diner",€€-€€€,€13-€47,"Seafood, European, Diner, Swedish, Danish",...,N,N,N,5.0,6.0,,,,,
9483,g947971-d6898484,Den Blå Café,Denmark,Strandby,,#1 of 2 Restaurants in Strandby,"Cheap Eats, Cafe, Seafood, European",€,€3-€23,"Cafe, Seafood, European, Diner, Danish",...,N,N,Y,4.5,43.0,4.5,4.5,4.5,,


In [25]:
# How many restaurants are located in CPH?
res = len(df[df['city'] == 'Copenhagen'])
print (f'How many restaurants are located in CPH? {res}')

How many restaurants are located in CPH? 2445


In [26]:
# How many restaurants offer a vegan option?
res = len(df[df['vegan_options'] == 'Y'])
print (f'How many restaurants offer a vegan option? {res}')

How many restaurants offer a vegan option? 623


In [28]:
# How many restaurants score the highest average rating?
res = len(df[df['avg_rating'] == df['avg_rating'].max()])
print (f'How many restaurants score the highest average rating? {res}')

How many restaurants score the highest average rating? 994


In [30]:
# Identify how many restaurants in Dragoer score the maximum average rating.
df_dragoer = df[df['city'] == 'Dragoer']
res_dragoer_highest_rating = len(df_dragoer[df_dragoer['avg_rating'] == df['avg_rating'].max()])
print (f'How many restaurants in Dragoer score the maximum average rating? {res_dragoer_highest_rating}')

How many restaurants in Dragoer score the maximum average rating? 3


In [34]:
# Create a dataset that contains only restaurants from Copenhagen, Aalborg and Aarhus. Call it "big_citites".
Copenhagen = df[df['city'] == 'Copenhagen']
Aalborg = df[df['city'] == 'Aalborg']
Aarhus = df[df['city'] == 'Aarhus']
big_citites = pd.concat([Copenhagen, Aalborg, Aarhus]).reset_index(drop=True)

In [58]:
# Using the big_cities dataframe check if the mean average rating is higher for restaurants that are vegetarian_friendly.
vegi_friend_y = big_citites[big_citites['vegetarian_friendly'] == 'Y']['avg_rating_mean'].mean()
vegi_friend_n = big_citites[big_citites['vegetarian_friendly'] == 'N']['avg_rating_mean'].mean()

print (f'The mean average rating is higher for restaurants that are vegetarian_friendly? {vegi_friend_y > vegi_friend_n}')

The mean average rating is higher for restaurants that are vegetarian_friendly? True


In [62]:
# Control if the difference is significant.
dif = round(vegi_friend_y - vegi_friend_n, 2)
if dif > 1:
    print (f'A different on {dif} is significant')
else:
    print (f'The diff is {dif} and a different on {dif} is not significant')

The diff is 0.14 and a different on 0.14 is not significant


In [77]:
# Create a new column in the original data. 
# Call it "want_to_go" and set it True if all these conditions are met: 
# the restaurant is in Copenhagen, food and value scores are higher than 4.
# Now create a df whith all the places where you "want to go" and call it my_list

df["want_to_go"] = (df['city'] == 'Copenhagen') & \
                    (df['food'] > 4.0) & \
                    (df['value'] > 4.0)

my_list = df[df['want_to_go'] == True]

In [86]:
# In how many places with a price-level equal to € do you want to go?
res = len(my_list[my_list['price_level'] == '€'])

print (f'In how many places with a price-level equal to € do you want to go? {res}')

In how many places with a price-level equal to € do you want to go? 110


In [79]:
# Work with my_list data. 
# The each rastaurant contains a columns cuisines that represents all the cuisines served in that restaurants. 
# You want to calculate the average score for food and value per each cuisine in the data and 
# identify the cuisines with higher average food score and higher average value score.

Unnamed: 0,restaurant_id,restaurant_name,country,city,awards,popularity_detailed,top_tags,price_level,price_range,cuisines,...,gluten_free,avg_rating,total_reviews_count,food,service,value,atmosphere,keywords,avg_rating_mean,want_to_go
2905,g189541-d10021446,KöD Vesterbro,Denmark,Copenhagen,"Travellers' Choice, Certificate of Excellence ...",#57 of 1969 Restaurants in Copenhagen,"Mid-range, Steakhouse, Barbecue, European",€€-€€€,€22-€61,"Steakhouse, Barbecue, European, Danish",...,Y,4.5,962.0,4.5,4.5,4.5,,"chateaubriand, steak, fries, caesar salad, rav...",3.894824,True
2913,g189541-d10052345,Naked Fish,Denmark,Copenhagen,,#738 of 1969 Restaurants in Copenhagen,"Mid-range, Sushi",€€-€€€,,Sushi,...,N,4.5,31.0,4.5,5.0,4.5,,,3.894824,True
2914,g189541-d1006894,Itzi Pitzi Pizza,Denmark,Copenhagen,Certificate of Excellence 2017,#525 of 1969 Restaurants in Copenhagen,"Cheap Eats, Italian, Pizza, Fast food",€,€4-€8,"Italian, Pizza, Fast food, European",...,N,4.0,83.0,4.5,4.5,4.5,4.0,,4.112051,True
2917,g189541-d10072291,Italo Disco,Denmark,Copenhagen,"Travellers' Choice, Certificate of Excellence ...",#299 of 1969 Restaurants in Copenhagen,"Mid-range, Italian, Mediterranean, European",€€-€€€,,"Italian, Mediterranean, European",...,N,4.5,78.0,4.5,4.5,4.5,,"antipasti, pasta, set menu, great italian food...",4.112051,True
2918,g189541-d10073780,Dessertdragens,Denmark,Copenhagen,,#57 of 75 Dessert Spots in Copenhagen,"Cheap Eats, Dessert, Cafe",€,,Cafe,...,N,4.5,10.0,4.5,4.0,4.5,,,3.894824,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5320,g189541-d9768029,The Olive Kitchen & Bar,Denmark,Copenhagen,"Travellers' Choice, Certificate of Excellence ...",#4 of 1969 Restaurants in Copenhagen,"Mid-range, International, European, Gluten Fre...",€€-€€€,CHF 11-CHF 26,"International, European",...,Y,5.0,2413.0,5.0,5.0,4.5,,"mushroom soup, steak, ribs, duck, creme brûlée",3.894824,True
5325,g189541-d9786813,Reinh. van Hauen Store Kongensgade,Denmark,Copenhagen,,#19 of 38 Bakeries in Copenhagen,"Cheap Eats, Bakeries, Cafe, European",€,CHF 4-CHF 7,"Cafe, European, Danish",...,N,4.5,34.0,4.5,4.5,4.5,,,4.112051,True
5336,g189541-d9853707,Yoburger,Denmark,Copenhagen,Certificate of Excellence 2017,#117 of 171 Quick Bites in Copenhagen,"Cheap Eats, Quick Bites, American, Fast food",€,,,...,N,4.0,54.0,4.5,4.5,4.5,,"fries, burger, bread, shake shack, bun",3.894824,True
5340,g189541-d9859984,RIST Kaffebar,Denmark,Copenhagen,,#34 of 138 Coffee & Tea in Copenhagen,"Mid-range, Quick Bites, Cafe, Diner",€€-€€€,,"Diner, Cafe",...,N,4.5,30.0,4.5,4.5,4.5,,,3.894824,True


In [114]:
df_c = df[df['cuisines'].notna()]

In [116]:
df_c['cuisines'].unique()

array(['French, Philippine, Fast food, European, Asian, Danish',
       'Cafe, European, Danish, Norwegian', 'European, Danish', ...,
       'European, Asian, Thai, Russian, Danish, Georgian',
       'Seafood, European, Diner, Swedish, Danish',
       'Cafe, Seafood, European, Diner, Danish'], dtype=object)

In [131]:
df_c = df.groupby(df['cuisines'])[['food', 'value']].mean().reset_index()
high_average_food_score  = df_c[df_c['food'] == df_c['food'].max()]
high_score = high_average_food_score[high_average_food_score['value'] == high_average_food_score['value'].max()]
high_score

Unnamed: 0,cuisines,food,value
284,"Barbecue, European, Grill, Eastern European, C...",5.0,5.0
307,"Brew Pub, Contemporary, Pub, Diner, Healthy, S...",5.0,5.0
403,"Cafe, European, Scandinavian, Danish, Spanish",5.0,5.0
529,"Contemporary, Danish, French, European, Scandi...",5.0,5.0
530,"Contemporary, European, Danish",5.0,5.0
593,"Deli, French, Danish, European",5.0,5.0
627,"European, Cafe, Street Food",5.0,5.0
677,"European, Danish, Seafood",5.0,5.0
713,"European, Italian, Spanish, Danish",5.0,5.0
757,"European, Scandinavian, Seafood",5.0,5.0


In [None]:
.unique()