In [50]:
import pandas as pd
import sqlite3

customers = pd.read_csv("/kaggle/input/day18-datasets/day_18_customers.csv")
orders = pd.read_csv("/kaggle/input/day18-datasets/day_18_orders.csv")

conn = sqlite3.connect(":memory:")

customers.to_sql("customers", conn, index=False, if_exists="replace")
orders.to_sql("orders", conn, index=False, if_exists="replace")


15

In [51]:
pd.read_sql("SELECT * FROM customers", conn)

Unnamed: 0,customer_id,name,city
0,1,Aditi,Delhi
1,2,Rohan,Mumbai
2,3,Neha,Pune
3,4,Kunal,Delhi
4,5,Meera,Bangalore
5,6,Arjun,Chennai
6,7,Pooja,Mumbai
7,8,Rahul,Pune


In [52]:
pd.read_sql("SELECT * FROM orders", conn)

Unnamed: 0,order_id,customer_id,product,amount,order_date
0,1001,1,Laptop,55000,2024-01-05
1,1002,1,Mouse,800,2024-01-10
2,1003,2,Shoes,3000,2024-01-12
3,1004,2,Watch,4500,2024-02-01
4,1005,3,Bag,2000,2024-02-03
5,1006,3,Shoes,2800,2024-02-20
6,1007,4,Phone,25000,2024-03-01
7,1008,5,Tablet,30000,2024-03-10
8,1009,6,Headphones,3500,2024-03-15
9,1010,7,Camera,40000,2024-03-18


# ðŸŸ¢ LEVEL 1 â€” Concept Warm-up (CASE + simple logic)

**For each order, show order_id, amount and label:
"High Value" if amount â‰¥ 10,000
"Low Value" otherwise**

In [53]:
query = """
SELECT order_id, amount, 
    CASE 
        WHEN amount >= 10000 THEN "High Value"
        ELSE "Low value"
    END AS "label"
FROM orders ;
"""

pd.read_sql(query, conn)

Unnamed: 0,order_id,amount,label
0,1001,55000,High Value
1,1002,800,Low value
2,1003,3000,Low value
3,1004,4500,Low value
4,1005,2000,Low value
5,1006,2800,Low value
6,1007,25000,High Value
7,1008,30000,High Value
8,1009,3500,Low value
9,1010,40000,High Value


**Show total revenue split into:
revenue from orders â‰¥ 5,000
revenue from orders < 5,000**

In [54]:
query = """
SELECT
    SUM(CASE 
        WHEN amount >= 5000 THEN amount 
        ELSE 0 END) as high_revenue, 
    SUM (CASE
        WHEN amount < 5000 THEN amount
        ELSE 0 END ) as low_revenue
FROM orders ;
"""

pd.read_sql(query, conn)

Unnamed: 0,high_revenue,low_revenue
0,150000,29500


**Count how many orders were placed in:
January
February
March**

In [55]:
query = """
SELECT
    SUM(CASE 
        WHEN order_date LIKE '2024-01%' THEN 1
        ELSE 0 END) as jan_orders, 
    SUM (CASE
        WHEN order_date LIKE '2024-02%' THEN 1
        ELSE 0 END ) as feb_orders, 
    SUM(CASE
        WHEN order_date LIKE '2024-03%' THEN 1
        ELSE 0 END) as march_orders 
FROM orders ;
"""

pd.read_sql(query, conn)

Unnamed: 0,jan_orders,feb_orders,march_orders
0,3,3,4


**Show each order and add a column:
"Electronics Buyer" if product is Laptop or Phone
"Other Buyer" otherwise**

In [56]:
query = """
SELECT order_id, amount, 
    CASE 
        WHEN product IN ('Laptop', 'Phone') THEN 'Electronics'
        ELSE "Other buyer"
    END AS "label"
FROM orders ;
"""

pd.read_sql(query, conn)

Unnamed: 0,order_id,amount,label
0,1001,55000,Electronics
1,1002,800,Other buyer
2,1003,3000,Other buyer
3,1004,4500,Other buyer
4,1005,2000,Other buyer
5,1006,2800,Other buyer
6,1007,25000,Electronics
7,1008,30000,Other buyer
8,1009,3500,Other buyer
9,1010,40000,Other buyer


# ðŸŸ¡ LEVEL 2 â€” Subqueries (concept building)

**Find all orders where amount is greater than the average order amount.**

In [57]:
query = """
SELECT *
FROM orders
WHERE amount > (
    SELECT AVG(amount) FROM orders
)
;
"""

pd.read_sql(query, conn)

Unnamed: 0,order_id,customer_id,product,amount,order_date
0,1001,1,Laptop,55000,2024-01-05
1,1007,4,Phone,25000,2024-03-01
2,1008,5,Tablet,30000,2024-03-10
3,1010,7,Camera,40000,2024-03-18


**Find customers who have placed at least one order above 20,000.**

In [58]:
query = """
SELECT DISTINCT customer_id
FROM orders
WHERE customer_id IN (
    SELECT customer_id FROM orders
    WHERE amount > 20000
) ;
"""

pd.read_sql(query, conn)

Unnamed: 0,customer_id
0,1
1,4
2,5
3,7


**Find customers whose total spending is greater than the average total spending of all customers.**

In [59]:
query = """
SELECT customer_id, product, amount
FROM orders
GROUP by customer_id
HAVING SUM(amount) > (
    SELECT AVG(total_spent)
    FROM (
        SELECT SUM(amount) AS total_spent
        FROM orders
        GROUP BY customer_id
    )
)

;
"""

pd.read_sql(query, conn)

Unnamed: 0,customer_id,product,amount
0,1,Laptop,55000
1,4,Phone,25000
2,5,Tablet,30000
3,7,Camera,40000


**Find the most expensive order amount (without using ORDER BY LIMIT).**

In [60]:
query = """
SELECT *
FROM orders
WHERE amount = (
    SELECT MAX(amount)
    FROM orders
)
;
"""

pd.read_sql(query, conn)

Unnamed: 0,order_id,customer_id,product,amount,order_date
0,1001,1,Laptop,55000,2024-01-05


# ðŸŸ  LEVEL 3 â€” Window Functions (gentle introduction)

In [61]:
query = """
SELECT order_id, order_date, amount,
       SUM(amount) OVER (ORDER BY order_date) AS running_revenue
FROM orders;
"""

pd.read_sql(query, conn)

Unnamed: 0,order_id,order_date,amount,running_revenue
0,1001,2024-01-05,55000,55000
1,1002,2024-01-10,800,55800
2,1003,2024-01-12,3000,58800
3,1004,2024-02-01,4500,63300
4,1005,2024-02-03,2000,65300
5,1006,2024-02-20,2800,68100
6,1007,2024-03-01,25000,93100
7,1008,2024-03-10,30000,123100
8,1009,2024-03-15,3500,126600
9,1010,2024-03-18,40000,166600


**For each order, show order_id, order_date, amount and the running total of revenue (ordered by date).**

In [62]:
query = """
SELECT customer_id, order_id, order_date, amount,
       SUM(amount) OVER (
         PARTITION BY customer_id
         ORDER BY order_date
       ) AS customer_running_total
FROM orders;

"""

pd.read_sql(query, conn)

Unnamed: 0,customer_id,order_id,order_date,amount,customer_running_total
0,1,1001,2024-01-05,55000,55000
1,1,1002,2024-01-10,800,55800
2,2,1003,2024-01-12,3000,3000
3,2,1004,2024-02-01,4500,7500
4,2,1015,2024-04-20,2100,9600
5,3,1005,2024-02-03,2000,2000
6,3,1006,2024-02-20,2800,4800
7,4,1007,2024-03-01,25000,25000
8,4,1013,2024-04-10,900,25900
9,5,1008,2024-03-10,30000,30000


**For each customer, show each order and the cumulative total they have spent so far.**

In [63]:
query = """
SELECT order_id, amount,
       AVG(amount) OVER () AS overall_avg
FROM orders;

"""

pd.read_sql(query, conn)

Unnamed: 0,order_id,amount,overall_avg
0,1001,55000,11966.666667
1,1002,800,11966.666667
2,1003,3000,11966.666667
3,1004,4500,11966.666667
4,1005,2000,11966.666667
5,1006,2800,11966.666667
6,1007,25000,11966.666667
7,1008,30000,11966.666667
8,1009,3500,11966.666667
9,1010,40000,11966.666667


**For each order, show amount and also show the overall average order amount using a window function.**

In [64]:
query = """
SELECT order_id, amount,
       AVG(amount) OVER () AS overall_avg
FROM orders;

"""

pd.read_sql(query, conn)

Unnamed: 0,order_id,amount,overall_avg
0,1001,55000,11966.666667
1,1002,800,11966.666667
2,1003,3000,11966.666667
3,1004,4500,11966.666667
4,1005,2000,11966.666667
5,1006,2800,11966.666667
6,1007,25000,11966.666667
7,1008,30000,11966.666667
8,1009,3500,11966.666667
9,1010,40000,11966.666667


**Rank orders by amount from highest to lowest (use window function, no GROUP BY).**

In [65]:
query = """
SELECT order_id, amount,
       RANK() OVER (ORDER BY amount DESC) AS order_rank
FROM orders;

"""

pd.read_sql(query, conn)

Unnamed: 0,order_id,amount,order_rank
0,1001,55000,1
1,1010,40000,2
2,1008,30000,3
3,1007,25000,4
4,1004,4500,5
5,1014,4200,6
6,1009,3500,7
7,1011,3200,8
8,1003,3000,9
9,1006,2800,10


# ðŸ”µ LEVEL 4 â€” Mixed Real-World Thinking (Interview Style)

**For each city, show total revenue and label it:
"Top City" if revenue â‰¥ 50,000
"Developing City" otherwise**

In [66]:
query = """
SELECT c.city,
       SUM(o.amount) AS total_revenue,
       CASE
           WHEN SUM(o.amount) >= 50000 THEN 'Top City'
           ELSE 'Developing City'
       END AS city_status
FROM customers c
JOIN orders o
ON c.customer_id = o.customer_id
GROUP BY c.city;

"""

pd.read_sql(query, conn)

Unnamed: 0,city,total_revenue,city_status
0,Bangalore,32500,Developing City
1,Chennai,7700,Developing City
2,Delhi,81700,Top City
3,Mumbai,49600,Developing City
4,Pune,8000,Developing City


**Find customers who spent more than their cityâ€™s average spending.**

In [67]:
query = """
SELECT c.name
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.name, c.city
HAVING SUM(o.amount) >
(
  SELECT AVG(city_total)
  FROM (
    SELECT SUM(o2.amount) AS city_total
    FROM customers c2
    JOIN orders o2 ON c2.customer_id = o2.customer_id
    GROUP BY c2.city
  )
);

"""

pd.read_sql(query, conn)

Unnamed: 0,name
0,Aditi
1,Pooja


**Show the top 2 highest spending customers using a window function.**

In [68]:
query = """
SELECT *
FROM (
  SELECT customer_id,
         SUM(amount) AS total_spent,
         RANK() OVER (ORDER BY SUM(amount) DESC) AS rnk
  FROM orders
  GROUP BY customer_id
) t
WHERE rnk <= 2;

"""

pd.read_sql(query, conn)

Unnamed: 0,customer_id,total_spent,rnk
0,1,55800,1
1,7,40000,2
