# 00-Connect database

In [10]:
import sqlite3
import pandas as pd
from pathlib import Path

# Paths
SQL_OUT = Path("../sql_outputs/takeaway/")
SQL_OUT.mkdir(parents=True, exist_ok=True)

# DB connection
conn = sqlite3.connect("../databases/takeaway.db")

In [None]:
pd.read_sql_query("SELECT 1 AS ok", conn)

Unnamed: 0,ok
0,1


# 01-Price distribution of menu items

In [None]:
sql = """
SELECT price, COUNT(*) 
FROM menuitems 
GROUP BY price 
ORDER BY price
"""
df_result = pd.read_sql_query(sql, conn)

df_result.to_csv(
    SQL_OUT / "01_price_distribution.csv",
    index=False
)

df_result

Unnamed: 0,price,COUNT(*)
0,0.00,99
1,0.01,3
2,0.10,14
3,0.15,1
4,0.20,89
...,...,...
1466,144.00,2
1467,145.00,5
1468,147.00,1
1469,149.99,1


# 02-Resto distribution per location

In [None]:
sql = """
SELECT city, COUNT(*) as num_restaurants 
FROM restaurants 
GROUP BY city
"""
df_result = pd.read_sql_query(sql, conn)

df_result.to_csv(
    SQL_OUT / "02_resto_distribution_by_location.csv",
    index=False
)

df_result

Unnamed: 0,city,num_restaurants
0,,3
1,1040,1
2,Aalst,38
3,Aalter,4
4,Aarschot,8
...,...,...
525,Zulte,3
526,Zutendaal,1
527,Zwevegem,2
528,Zwijnaarde,1


# 03-Top 10 rated pizza

In [None]:
sql = """
SELECT distinct(r.name), r.ratings
from restaurants as r
join categories as c
on r.primarySlug = c.restaurant_id
where c.name like '%Pizza%'
order by r.ratings DESC
limit 10
"""
df_result = pd.read_sql_query(sql, conn)

df_result.to_csv(
    SQL_OUT / "03_top_10_rated_pizza.csv",
    index=False
)

df_result

Unnamed: 0,name,ratings
0,Kebap - Pizza De Brug,5.0
1,De Echte Eethuis Carlos,5.0
2,Munchies Ieper,5.0
3,Trend Kebab & Pizzeria,5.0
4,La Trinacria Bottega,5.0
5,Snack-It,5.0
6,Pizza Leie,4.9
7,The Black Horse,4.9
8,Pyramide,4.9
9,Baskent Meerhout,4.9


# 04-Map locations offering kapsalons and their average price

In [None]:
sql = """
SELECT 
    r.name,
    r.latitude,
    r.longitude,
    AVG(m.price) AS avg_price,
    r.ratings
FROM restaurants AS r
JOIN categories AS c
    ON r.primarySlug = c.restaurant_id
JOIN menuitems AS m
    ON r.primarySlug = m.primarySlug
WHERE c.name LIKE '%Kapsalon%'
GROUP BY r.name, r.latitude, r.longitude
ORDER BY avg_price
"""
df_result = pd.read_sql_query(sql, conn)

df_result.to_csv(
    SQL_OUT / "04_kapsalon_location_avg_price.csv",
    index=False
)

df_result

Unnamed: 0,name,latitude,longitude,avg_price,ratings
0,Snack Clems,50.842117,4.331330,2.704545,0.0
1,Frituur Plantin,51.210295,4.427760,3.038158,2.8
2,'t Gouden Frietje,51.161182,4.132365,3.477273,3.8
3,Frituur Nationale,51.213764,4.397862,3.715368,1.4
4,Snack Omar,51.225302,4.416710,3.893548,3.0
...,...,...,...,...,...
798,Pizza Taliano,50.815410,3.317978,14.814198,4.6
799,Qubbe Lounge,50.979642,5.491545,15.107143,4.8
800,De Waterput,51.332660,4.959082,15.291204,3.9
801,Valento Pizzeria,51.140286,4.337516,15.817000,4.4


# 05-Which restaurants have the best price-to-rating ratio?

In [29]:
sql = """
SELECT 
    r.name,
    r.ratings,
    AVG(m.price) AS avg_price,
    AVG(m.price) / r.ratings AS price_to_rating
FROM restaurants AS r
JOIN menuitems AS m
    ON r.primarySlug = m.primarySlug
WHERE r.ratings > 0
GROUP BY r.name, r.ratings
ORDER BY price_to_rating
"""
df_result = pd.read_sql_query(sql, conn)

df_result.to_csv(
    SQL_OUT / "05_best_price_to_rating_ratio.csv",
    index=False
)

df_result

Unnamed: 0,name,ratings,avg_price,price_to_rating
0,Le Corner,5.0,2.363333,0.472667
1,El Bocadillos,4.2,2.000000,0.476190
2,Boulangerie Salam,4.1,2.039535,0.497448
3,Pâtisserie Doce Tentação,5.0,2.625000,0.525000
4,Bazi Boulangerie,4.3,2.335294,0.543092
...,...,...,...,...
3473,Mère Liban,4.4,34.547619,7.851732
3474,Wok van Antwerpen,2.5,20.500000,8.200000
3475,Pizza Hut Delivery,2.1,17.344944,8.259497
3476,Japans Restaurant Amatsu,4.4,38.469697,8.743113


# 06-Where are the delivery ‘dead zones’—areas with minimal restaurant coverage?
Just select all restos wtih locations now and use visualization later to identify the delivery dead zones

In [31]:
sql = """
SELECT
    name,
    latitude,
    longitude
FROM restaurants
"""
df_result = pd.read_sql_query(sql, conn)

df_result.to_csv(
    SQL_OUT / "06_delivery_dead_zones.csv",
    index=False
)

df_result

Unnamed: 0,name,latitude,longitude
0,Pasta Pino,50.743630,3.216888
1,Da Barone et Fils,50.540992,5.460752
2,Pizza Gustosa,50.675491,5.480206
3,La Brezza Nova Rocourt,50.684180,5.549703
4,La Storia,50.663340,5.512421
...,...,...,...
4066,Frituur Snax,50.805719,3.872587
4067,Pitta Dorp,50.870853,4.018145
4068,Nino Grill,50.836477,4.026344
4069,The Salad Spot,50.762280,3.862852


# 07-How does the availability of vegetarian and vegan dishes vary by area?

In [35]:
sql = """
SELECT
    r.name,
    r.latitude,
    r.longitude,
    SUM(c.name LIKE '%Vegetarian%') AS has_vegetarian,
    SUM(c.name LIKE '%Vegan%') AS has_vegan
FROM restaurants AS r
JOIN categories AS c
    ON r.primarySlug = c.restaurant_id
GROUP BY r.name, r.latitude, r.longitude
HAVING SUM(c.name LIKE '%Vegetarian%') + SUM(c.name LIKE '%Vegan%') > 0
"""
df_result = pd.read_sql_query(sql, conn)

df_result.to_csv(
    SQL_OUT / "07_veggie_vegan_by_area.csv",
    index=False
)

df_result

Unnamed: 0,name,latitude,longitude,has_vegetarian,has_vegan
0,'t Frietwinkelken,51.091212,4.151635,0,17
1,'t Krok(r)antje,50.879183,4.728653,0,2
2,2Bangkok Thai,51.057265,3.724585,4,0
3,Aahaar Restaurant,51.215530,4.417400,1,0
4,Aalst Sushi,50.942041,4.054103,0,22
...,...,...,...,...,...
275,Tota Empanadas,51.220123,4.398512,4,3
276,Tota Empanadas 2,51.220123,4.398512,0,3
277,Veggiezond,51.036113,3.710839,0,1
278,Volt,51.161879,4.990860,3,3


# 08-Identify the World Hummus Order (WHO); top 3 hummus serving restaurants

In [44]:
sql = """
SELECT DISTINCT
    r.name,
    r.ratings
FROM restaurants AS r
JOIN menuitems AS m
    ON r.primarySlug = m.primarySlug
WHERE m.name LIKE '%Hummus%'
ORDER BY r.ratings DESC
LIMIT 3
"""
df_result = pd.read_sql_query(sql, conn)

df_result.to_csv(
    SQL_OUT / "08_WHO_top_3_hummus.csv",
    index=False
)

df_result

Unnamed: 0,name,ratings
0,Punto Caffè,5
1,Royal Donuts & Bagels,5
2,Restaurant le Royal,5


# 09-Which restaurants in Leuven provide best balance between rating and price?

In [47]:
sql = """
SELECT 
    r.name,
    r.address,
    AVG(m.price) AS avg_price,
    r.ratings,
    AVG(m.price) / r.ratings AS price_to_rating
FROM restaurants AS r
JOIN menuitems AS m
    ON r.primarySlug = m.primarySlug
WHERE r.city = 'Leuven'
GROUP BY r.name, r.ratings
HAVING r.ratings > 0
ORDER BY price_to_rating
LIMIT 20
"""
df_result = pd.read_sql_query(sql, conn)

df_result.to_csv(
    SQL_OUT / "09_best_Leuven_price_rating.csv",
    index=False
)

df_result

Unnamed: 0,name,address,avg_price,ratings,price_to_rating
0,De Friettoerist,Sint-Jacobsplein 13,2.722018,3.6,0.756116
1,Frietcafé l'Etoile d'Or,Tiensevest 16,3.632197,4.7,0.772808
2,Frituur Den Dreef,Naamsesteenweg 58,3.770714,4.5,0.837937
3,Sahtain,Louis Melsensstraat 5,4.238095,4.8,0.882937
4,Frituur Het Grotteke,Heilige-Geeststraat 102,4.081356,4.5,0.906968
5,Fritboutique,Tiensestraat 58,3.7375,4.1,0.911585
6,De Smullende Heks,Diestsestraat 250,4.176147,4.3,0.971197
7,Falafel Top,Diestsestraat 159,4.8875,4.9,0.997449
8,The Sandwich Factory,Naamsestraat 86,4.827863,4.8,1.005805
9,Bocata,Maria Theresiastraat 12A,3.867683,3.8,1.017811


# 10-Any restaurants with explicit keto or low carb menu items?

In [59]:
sql = """
SELECT DISTINCT
    r.name AS restaurant_name,
    r.city
FROM menuitems AS m
JOIN restaurants AS r
    ON r.primarySlug = m.primarySlug
WHERE m.name LIKE '%keto%'
   OR m.name LIKE '%low carb%'
   OR m.description LIKE '%keto%'
   OR m.description LIKE '%low carb%'
"""
df_result = pd.read_sql_query(sql, conn)

df_result.to_csv(
    SQL_OUT / "10_restaurants_with_low_carb_options.csv",
    index=False
)

df_result

Unnamed: 0,restaurant_name,city
0,Yokoso,Mechelen
1,Yokoso,Gent
2,M'adam Traiteur,Brugge
3,Kroketo by 't Huiskombuis,Brugge
