# 00-Connect database

In [10]:
import sqlite3
import pandas as pd
from pathlib import Path

# Paths
SQL_OUT = Path("../sql_outputs/takeaway/")
SQL_OUT.mkdir(parents=True, exist_ok=True)

# DB connection
conn = sqlite3.connect("../databases/takeaway.db")

In [None]:
pd.read_sql_query("SELECT 1 AS ok", conn)

Unnamed: 0,ok
0,1


# 01-Price distribution of menu items

In [None]:
sql = """
SELECT price, COUNT(*) 
FROM menuitems 
GROUP BY price 
ORDER BY price
"""
df_result = pd.read_sql_query(sql, conn)

df_result.to_csv(
    SQL_OUT / "01_price_distribution.csv",
    index=False
)

df_result

Unnamed: 0,price,COUNT(*)
0,0.00,99
1,0.01,3
2,0.10,14
3,0.15,1
4,0.20,89
...,...,...
1466,144.00,2
1467,145.00,5
1468,147.00,1
1469,149.99,1


# 02-Resto distribution per location

In [None]:
sql = """
SELECT city, COUNT(*) as num_restaurants 
FROM restaurants 
GROUP BY city
"""
df_result = pd.read_sql_query(sql, conn)

df_result.to_csv(
    SQL_OUT / "02_resto_distribution_by_location.csv",
    index=False
)

df_result

Unnamed: 0,city,num_restaurants
0,,3
1,1040,1
2,Aalst,38
3,Aalter,4
4,Aarschot,8
...,...,...
525,Zulte,3
526,Zutendaal,1
527,Zwevegem,2
528,Zwijnaarde,1


# 03-Top 10 rated pizza

In [None]:
sql = """
SELECT distinct(r.name), r.ratings
from restaurants as r
join categories as c
on r.primarySlug = c.restaurant_id
where c.name like '%Pizza%'
order by r.ratings DESC
limit 10
"""
df_result = pd.read_sql_query(sql, conn)

df_result.to_csv(
    SQL_OUT / "03_top_10_rated_pizza.csv",
    index=False
)

df_result

Unnamed: 0,name,ratings
0,Kebap - Pizza De Brug,5.0
1,De Echte Eethuis Carlos,5.0
2,Munchies Ieper,5.0
3,Trend Kebab & Pizzeria,5.0
4,La Trinacria Bottega,5.0
5,Snack-It,5.0
6,Pizza Leie,4.9
7,The Black Horse,4.9
8,Pyramide,4.9
9,Baskent Meerhout,4.9


# 04-Map locations offering kapsalons and their average price

In [None]:
sql = """
SELECT 
    r.name,
    r.latitude,
    r.longitude,
    AVG(m.price) AS avg_price,
    r.ratings
FROM restaurants AS r
JOIN categories AS c
    ON r.primarySlug = c.restaurant_id
JOIN menuitems AS m
    ON r.primarySlug = m.primarySlug
WHERE c.name LIKE '%Kapsalon%'
GROUP BY r.name, r.latitude, r.longitude
ORDER BY avg_price
"""
df_result = pd.read_sql_query(sql, conn)

df_result.to_csv(
    SQL_OUT / "04_kapsalon_location_avg_price.csv",
    index=False
)

df_result

Unnamed: 0,name,latitude,longitude,avg_price,ratings
0,Snack Clems,50.842117,4.331330,2.704545,0.0
1,Frituur Plantin,51.210295,4.427760,3.038158,2.8
2,'t Gouden Frietje,51.161182,4.132365,3.477273,3.8
3,Frituur Nationale,51.213764,4.397862,3.715368,1.4
4,Snack Omar,51.225302,4.416710,3.893548,3.0
...,...,...,...,...,...
798,Pizza Taliano,50.815410,3.317978,14.814198,4.6
799,Qubbe Lounge,50.979642,5.491545,15.107143,4.8
800,De Waterput,51.332660,4.959082,15.291204,3.9
801,Valento Pizzeria,51.140286,4.337516,15.817000,4.4
