In [1]:
import pandas as pd
import sqlite3

df = pd.read_csv('/kaggle/input/day-14-food-delivery/day_14_food_delivery.csv')
conn = sqlite3.connect(':memory:')
df.to_sql('orders', conn, index=False, if_exists='replace')

45

In [2]:
pd.read_sql("SELECT * FROM orders LIMIT 5;", conn)

Unnamed: 0,order_id,customer_name,city,restaurant,item_category,price,quantity,month,order_value
0,1,Aarav,Delhi,Dominos,Pizza,499,2,Jan,998
1,2,Meera,Mumbai,McDonalds,Burger,299,1,Jan,299
2,3,Rohit,Delhi,Behrouz Biryani,Biryani,649,2,Jan,1298
3,4,Simran,Chandigarh,Dominos,Pizza,459,1,Feb,459
4,5,Kabir,Mumbai,Subway,Wrap,349,1,Feb,349


# Sample queries

***Average order value per city***

In [3]:
query = """
SELECT city,
       ROUND(AVG(order_value), 2) AS avg_order_value
FROM orders
GROUP BY city
ORDER BY avg_order_value DESC;

"""

pd.read_sql(query, conn)

Unnamed: 0,city,avg_order_value
0,Delhi,749.35
1,Bangalore,641.43
2,Mumbai,498.73
3,Chennai,488.5
4,Chandigarh,459.0
5,Jaipur,428.5
6,Pune,390.6


***Restaurants whose average order value is greater than the overall average***

In [4]:
query = """
SELECT restaurant
FROM orders
GROUP BY restaurant
HAVING AVG(order_value) > (
    SELECT AVG(order_value)
    FROM orders
);

"""

pd.read_sql(query, conn)

Unnamed: 0,restaurant
0,Behrouz Biryani
1,Dominos
2,KFC
3,Truffles


***Customers who placed more than 3 orders***

In [5]:
query = """
SELECT customer_name,
       COUNT(*) AS total_orders
FROM orders
GROUP BY customer_name
HAVING COUNT(*) > 3;
"""

pd.read_sql(query, conn)

Unnamed: 0,customer_name,total_orders


***Month-wise total revenue***

In [6]:
query = """
SELECT month,
       SUM(order_value) AS total_revenue
FROM orders
GROUP BY month
ORDER BY total_revenue DESC;

"""

pd.read_sql(query, conn)

Unnamed: 0,month,total_revenue
0,Mar,3192
1,Jun,2772
2,Dec,2674
3,Jan,2595
4,Sep,2255
5,May,2255
6,Apr,2153
7,Nov,2126
8,Jul,2022
9,Aug,1735


***Top 3 restaurants by total revenue***

In [7]:
query = """
SELECT restaurant,
       SUM(order_value) AS total_revenue
FROM orders
GROUP BY restaurant
ORDER BY total_revenue DESC
LIMIT 3;
"""

pd.read_sql(query, conn)

Unnamed: 0,restaurant,total_revenue
0,Dominos,7116
1,Behrouz Biryani,5342
2,Truffles,2594


# Questions to practice

---


## City where Starbucks generates highest revenue

In [8]:
query = """
SELECT city, restaurant, SUM(order_value) as Revenue
FROM orders
WHERE restaurant = 'Starbucks'
GROUP BY city
ORDER BY Revenue DESC
LIMIT 1 ;
"""

pd.read_sql(query, conn)

Unnamed: 0,city,restaurant,Revenue
0,Mumbai,Starbucks,1177


## Customers who ordered from more than one city.

In [9]:
query = """
SELECT customer_name 
FROM orders
GROUP BY customer_name
HAVING(COUNT(DISTINCT city) > 1);
"""

pd.read_sql(query, conn)

Unnamed: 0,customer_name
0,Rohit


## Restaurants with at least 5 orders.

In [10]:
query = """
SELECT restaurant, COUNT(*) as Total_Orders
FROM orders
GROUP BY restaurant
HAVING COUNT(*) >= 5 ;
"""

pd.read_sql(query, conn)

Unnamed: 0,restaurant,Total_Orders
0,Behrouz Biryani,5
1,Dominos,9


## Most popular item_category per city.

In [11]:
query = """
SELECT city, item_category , total_orders
FROM (
    SELECT city, item_category, COUNT(*) as total_orders
    FROM orders
    GROUP BY city, item_category
) t
WHERE total_orders = (
    SELECT MAX(cnt)
    FROM(
    SELECT COUNT(*) as cnt
    FROM orders o2
    WHERE o2.city = t.city
    GROUP BY item_category
    )
);
"""

pd.read_sql(query, conn)

Unnamed: 0,city,item_category,total_orders
0,Bangalore,Burger,3
1,Chandigarh,Pizza,1
2,Chennai,Pizza,1
3,Chennai,South Indian,1
4,Delhi,Pizza,4
5,Jaipur,Burger,1
6,Jaipur,Wrap,1
7,Mumbai,Burger,3
8,Mumbai,Coffee,3
9,Pune,Burger,1


## Restaurants whose revenue consistently increased from Jan → Feb → Mar.

In [12]:
query = """
SELECT city, restaurant, SUM(order_value) as Revenue
FROM orders
WHERE restaurant = 'Starbucks'
GROUP BY city
ORDER BY Revenue DESC
LIMIT 1 ;
"""

pd.read_sql(query, conn)

Unnamed: 0,city,restaurant,Revenue
0,Mumbai,Starbucks,1177


## Customers whose average order_value is higher than their city's average.

In [13]:
query = """
SELECT customer_name
FROM orders o1
GROUP BY customer_name, city
HAVING AVG(order_value) >
(
  SELECT AVG(order_value)
  FROM orders o2
  WHERE o2.city = o1.city
);
"""

pd.read_sql(query, conn)

Unnamed: 0,customer_name
0,Aarav
1,Aditya
2,Arjun
3,Dev
4,Ishaan
5,Kunal
6,Mohit
7,Nikhil
8,Pallavi
9,Pooja


## Top 2 item categories by revenue per city

In [14]:
query = """
SELECT city, item_category, revenue
FROM (
    SELECT city,
           item_category,
           SUM(order_value) AS revenue
    FROM orders
    GROUP BY city, item_category
) t
WHERE (
    SELECT COUNT(*)
    FROM (
        SELECT city, item_category, SUM(order_value) AS rev
        FROM orders
        GROUP BY city, item_category
    ) x
    WHERE x.city = t.city
      AND x.rev > t.revenue
) < 2;
"""

pd.read_sql(query, conn)

Unnamed: 0,city,item_category,revenue
0,Bangalore,Burger,2195
1,Bangalore,Pizza,1457
2,Chandigarh,Pizza,459
3,Chennai,Pizza,459
4,Chennai,South Indian,518
5,Delhi,Biryani,3994
6,Delhi,Pizza,4242
7,Jaipur,Burger,558
8,Jaipur,Wrap,299
9,Mumbai,Burger,1516


## Cities where Dominos revenue > McDonalds revenue.

In [15]:
query = """
SELECT city
FROM (
    SELECT city,
           SUM(CASE WHEN restaurant = 'Dominos' THEN order_value ELSE 0 END) AS dominos_rev,
           SUM(CASE WHEN restaurant = 'McDonalds' THEN order_value ELSE 0 END) AS mcd_rev
    FROM orders
    GROUP BY city
) t
WHERE dominos_rev > mcd_rev;
"""

pd.read_sql(query, conn)

Unnamed: 0,city
0,Bangalore
1,Chandigarh
2,Chennai
3,Delhi
4,Pune


## Customers who never ordered Pizza.

In [16]:
query = """
SELECT DISTINCT customer_name
FROM orders
WHERE customer_name NOT IN (
    SELECT customer_name
    FROM orders
    WHERE item_category = 'Pizza'
);

"""

pd.read_sql(query, conn)

Unnamed: 0,customer_name
0,Meera
1,Rohit
2,Kabir
3,Pooja
4,Neha
5,Ishaan
6,Tanvi
7,Rahul
8,Sana
9,Kavya


## Month with highest total revenue.

In [17]:
query = """
SELECT month,
       SUM(order_value) AS total_revenue
FROM orders
GROUP BY month
ORDER BY total_revenue DESC
LIMIT 1;

"""

pd.read_sql(query, conn)

Unnamed: 0,month,total_revenue
0,Mar,3192


## Rank restaurants by total revenue using derived table.

In [18]:
query = """
SELECT restaurant,
       total_revenue
FROM (
    SELECT restaurant,
           SUM(order_value) AS total_revenue
    FROM orders
    GROUP BY restaurant
) t
ORDER BY total_revenue DESC;
"""

pd.read_sql(query, conn)

Unnamed: 0,restaurant,total_revenue
0,Dominos,7116
1,Behrouz Biryani,5342
2,Truffles,2594
3,Starbucks,1556
4,Faasos,1495
5,Wow Momo,1354
6,KFC,1248
7,Haldirams,1095
8,Burger King,917
9,McDonalds,827


## Customers who placed exactly one order in the year.

In [19]:
query = """
SELECT customer_name
FROM orders
GROUP BY customer_name
HAVING COUNT(*) = 1;
"""

pd.read_sql(query, conn)

Unnamed: 0,customer_name
0,Aarav
1,Aditya
2,Ananya
3,Anjali
4,Ankit
5,Arjun
6,Arpit
7,Dev
8,Divya
9,Gaurav


## City with highest average basket size (avg quantity).

In [20]:
query = """
SELECT city,
       AVG(quantity) AS avg_basket_size
FROM orders
GROUP BY city
ORDER BY avg_basket_size DESC
LIMIT 1;

"""

pd.read_sql(query, conn)

Unnamed: 0,city,avg_basket_size
0,Delhi,1.823529


## Restaurants whose revenue is above median restaurant revenue.

In [21]:
query = """
SELECT restaurant
FROM (
    SELECT restaurant,
           SUM(order_value) AS total_revenue
    FROM orders
    GROUP BY restaurant
) t
WHERE total_revenue >
(
    SELECT AVG(total_revenue)
    FROM (
        SELECT total_revenue
        FROM (
            SELECT restaurant, SUM(order_value) AS total_revenue
            FROM orders
            GROUP BY restaurant
        )
        ORDER BY total_revenue
        LIMIT 2 OFFSET (
            SELECT (COUNT(*) - 1) / 2
            FROM (
                SELECT restaurant
                FROM orders
                GROUP BY restaurant
            )
        )
    )
);

"""

pd.read_sql(query, conn)

Unnamed: 0,restaurant
0,Behrouz Biryani
1,Dominos
2,Faasos
3,Haldirams
4,KFC
5,Starbucks
6,Truffles
7,Wow Momo


## Use EXPLAIN QUERY PLAN on your most complex query and interpret the output.

In [22]:
query = """
EXPLAIN QUERY PLAN
SELECT restaurant,
       SUM(order_value)
FROM orders
GROUP BY restaurant;

"""

pd.read_sql(query, conn)

Unnamed: 0,id,parent,notused,detail
0,6,0,0,SCAN orders
1,8,0,0,USE TEMP B-TREE FOR GROUP BY
