In [2]:
import pandas as pd
import sqlite3

customers = pd.read_csv('/kaggle/input/day-15-datasets/day_15_customers.csv')
orders = pd.read_csv('/kaggle/input/day-15-datasets/day_15_orders.csv')

conn = sqlite3.connect(':memory:')
customers.to_sql('customers', conn, index=False, if_exists='replace')
orders.to_sql('orders', conn, index=False, if_exists='replace')

18

In [3]:
pd.read_sql("SELECT * FROM orders LIMIT 5;", conn)

Unnamed: 0,order_id,customer_id,product,category,amount,month
0,101,1,Laptop,Electronics,55000,Jan
1,102,1,Mouse,Electronics,700,Feb
2,103,2,Shoes,Fashion,2500,Jan
3,104,2,T-Shirt,Fashion,1200,Feb
4,105,2,Jeans,Fashion,3000,Mar


In [4]:
pd.read_sql("SELECT * FROM customers LIMIT 5;", conn)

Unnamed: 0,customer_id,name,city
0,1,Aanya,Delhi
1,2,Rohan,Mumbai
2,3,Mehul,Pune
3,4,Riya,Delhi
4,5,Kabir,Bangalore


# PART 1 — JOINS

**Show order_id, customer name, city, product, amount for all orders.**

In [5]:
query = """
SELECT o.order_id, c.name, c.city, o.product, o.amount
FROM orders o
JOIN customers c
ON o.customer_id = c.customer_id

"""

pd.read_sql(query, conn)

Unnamed: 0,order_id,name,city,product,amount
0,101,Aanya,Delhi,Laptop,55000
1,102,Aanya,Delhi,Mouse,700
2,103,Rohan,Mumbai,Shoes,2500
3,104,Rohan,Mumbai,T-Shirt,1200
4,105,Rohan,Mumbai,Jeans,3000
5,106,Mehul,Pune,Book,500
6,107,Riya,Delhi,Phone,20000
7,108,Riya,Delhi,Earphones,1500
8,109,Kabir,Bangalore,Tablet,28000
9,110,Kabir,Bangalore,Cover,800


**Show all customers and their orders (even customers with no orders).**

In [6]:
query = """
SELECT c.customer_id, c.name, c.city, o.order_id, o.product, o.amount
FROM customers c
LEFT JOIN orders o
ON c.customer_id = o.customer_id

"""

pd.read_sql(query, conn)

Unnamed: 0,customer_id,name,city,order_id,product,amount
0,1,Aanya,Delhi,101,Laptop,55000
1,1,Aanya,Delhi,102,Mouse,700
2,2,Rohan,Mumbai,103,Shoes,2500
3,2,Rohan,Mumbai,104,T-Shirt,1200
4,2,Rohan,Mumbai,105,Jeans,3000
5,3,Mehul,Pune,106,Book,500
6,4,Riya,Delhi,107,Phone,20000
7,4,Riya,Delhi,108,Earphones,1500
8,5,Kabir,Bangalore,109,Tablet,28000
9,5,Kabir,Bangalore,110,Cover,800


**Show customers who have never placed an order.**

In [7]:
query = """
SELECT 
    c.customer_id,
    c.name,
    c.city
FROM customers c
LEFT JOIN orders o
ON c.customer_id = o.customer_id
WHERE o.order_id IS NULL ;

"""

pd.read_sql(query, conn)

Unnamed: 0,customer_id,name,city


# PART 2 — JOIN + GROUP BY


**Total spending per customer.**

In [8]:
query = """
SELECT c.name, SUM(o.amount) AS total_spent
FROM customers c
JOIN orders o
ON c.customer_id = o.customer_id
GROUP BY c.name ;

"""

pd.read_sql(query, conn)

Unnamed: 0,name,total_spent
0,Aanya,55700
1,Aditya,8600
2,Arjun,1800
3,Kabir,28800
4,Mehul,500
5,Pooja,6800
6,Riya,21500
7,Rohan,6700
8,Sneha,2200
9,Tanvi,300


**Total revenue per city.**

In [9]:
query = """
SELECT c.city, SUM(o.amount) AS total_revenue
FROM customers c
JOIN orders o
ON c.customer_id = o.customer_id
GROUP BY c.city ;

"""

pd.read_sql(query, conn)

Unnamed: 0,city,total_revenue
0,Bangalore,37400
1,Chennai,1800
2,Delhi,84000
3,Mumbai,8900
4,Pune,800


**Number of orders per category.**

In [10]:
query = """
SELECT o.category, COUNT(o.order_id) AS total_orders
FROM orders o
GROUP BY o.category;

"""

pd.read_sql(query, conn)

Unnamed: 0,category,total_orders
0,Accessories,1
1,Beauty,1
2,Education,2
3,Electronics,9
4,Fashion,5


# PART 3 — SIMPLE WINDOW FUNCTIONS

**Rank orders by amount within each customer (use ROW_NUMBER)**

In [11]:
query = """
SELECT customer_id, order_id, amount,
    ROW_NUMBER() OVER (
        PARTITION BY customer_id
        ORDER BY amount DESC
    ) AS rank_per_customer
FROM orders ;

"""

pd.read_sql(query, conn)

Unnamed: 0,customer_id,order_id,amount,rank_per_customer
0,1,101,55000,1
1,1,102,700,2
2,2,105,3000,1
3,2,103,2500,2
4,2,104,1200,3
5,3,106,500,1
6,4,107,20000,1
7,4,108,1500,2
8,5,109,28000,1
9,5,110,800,2


**Find the most expensive order per customer using ranking.**

In [12]:
query = """
SELECT *
FROM (
    SELECT customer_id, order_id, amount, 
        ROW_NUMBER() OVER (
            PARTITION BY customer_id
            ORDER BY amount desc
        ) AS rn
    FROM orders
) t
WHERE rn = 1 ;

"""

pd.read_sql(query, conn)

Unnamed: 0,customer_id,order_id,amount,rn
0,1,101,55000,1
1,2,105,3000,1
2,3,106,500,1
3,4,107,20000,1
4,5,109,28000,1
5,6,111,2200,1
6,7,112,1800,1
7,8,114,4200,1
8,9,115,300,1
9,10,117,4500,1


# PART 4 — LEAD / LAG (Intro level)


**For each customer, show their current order amount and previous order amount.**

In [13]:
query = """
SELECT customer_id, order_id, month, amount AS current_amount,
    LAG(amount) OVER(
        PARTITION BY customer_id
        ORDER BY month
    ) AS previous_amount
FROM orders ;

"""

pd.read_sql(query, conn)

Unnamed: 0,customer_id,order_id,month,current_amount,previous_amount
0,1,102,Feb,700,
1,1,101,Jan,55000,700.0
2,2,104,Feb,1200,
3,2,103,Jan,2500,1200.0
4,2,105,Mar,3000,2500.0
5,3,106,Jan,500,
6,4,107,Feb,20000,
7,4,108,Mar,1500,20000.0
8,5,110,Feb,800,
9,5,109,Jan,28000,800.0


# PART 5 — CTE (Clean structuring)


**10. Use a CTE to calculate total spending per customer, then show only customers who spent more than 5000.**

In [14]:
query = """

WITH spending AS (
    SELECT c.name, SUM(o.amount) AS total_spent
    FROM customers c
    JOIN orders o
    ON c.customer_id = o.customer_id
    GROUP BY c.name
) 
SELECT *
FROM spending
WHERE total_spent > 5000
;

"""

pd.read_sql(query, conn)

Unnamed: 0,name,total_spent
0,Aanya,55700
1,Aditya,8600
2,Kabir,28800
3,Pooja,6800
4,Riya,21500
5,Rohan,6700
