# Day 29 â€“ Window Functions Using Pandas

Practicing window-function concepts such as ranking
and cumulative calculations using Pandas.

In [1]:
import pandas as pd

df = pd.read_csv("/content/ecommerce_cleaned.csv")
df.head()

Unnamed: 0,order_id,customer_id,date,age,gender,city,product_category,unit_price,quantity,discount_amount,total_amount,payment_method,device_type,session_duration_minutes,pages_viewed,is_returning_customer,delivery_time_days,customer_rating
0,ORD_001337,CUST_01337,2023-01-01,27,Female,Bursa,Toys,54.28,1,0.0,54.28,Debit Card,Mobile,4,14,True,8,5
1,ORD_004885,CUST_04885,2023-01-01,42,Male,Konya,Toys,244.9,1,0.0,244.9,Credit Card,Mobile,11,3,True,3,3
2,ORD_004507,CUST_04507,2023-01-01,43,Female,Ankara,Food,48.15,5,0.0,240.75,Credit Card,Mobile,7,8,True,5,2
3,ORD_000645,CUST_00645,2023-01-01,32,Male,Istanbul,Electronics,804.06,1,229.28,574.78,Credit Card,Mobile,8,10,False,1,4
4,ORD_000690,CUST_00690,2023-01-01,40,Female,Istanbul,Sports,755.61,5,0.0,3778.05,Cash on Delivery,Desktop,21,10,True,7,4


In [2]:
customer_spend = (
    df.groupby("customer_id")["total_amount"]
      .sum()
      .reset_index()
)

customer_spend["rank"] = customer_spend["total_amount"] \
    .rank(method="dense", ascending=False)

customer_spend.sort_values("rank").head(10)

Unnamed: 0,customer_id,total_amount,rank
4704,CUST_04705,22023.9,1.0
60,CUST_00061,21478.35,2.0
1983,CUST_01984,21409.05,3.0
524,CUST_00525,20211.81,4.0
3720,CUST_03721,19547.2,5.0
3173,CUST_03174,18728.68,6.0
2051,CUST_02052,18699.75,7.0
3493,CUST_03494,18014.55,8.0
471,CUST_00472,17099.3,9.0
641,CUST_00642,16214.2,10.0


RANK() OVER (ORDER BY SUM(total_amount) DESC)

In [3]:
category_revenue = (
    df.groupby("product_category")["total_amount"]
      .sum()
      .reset_index()
)

category_revenue["rank"] = category_revenue["total_amount"] \
    .rank(method="dense", ascending=False)

category_revenue.sort_values("rank")

Unnamed: 0,product_category,total_amount,rank
2,Electronics,2328806.81,1.0
5,Home & Garden,908348.86,2.0
6,Sports,754563.56,3.0
3,Fashion,375214.93,4.0
7,Toys,223142.48,5.0
0,Beauty,156584.74,6.0
4,Food,96138.67,7.0
1,Books,72744.52,8.0


In [4]:
category_revenue = category_revenue.sort_values(
    "total_amount", ascending=False
)

category_revenue["cumulative_revenue"] = \
    category_revenue["total_amount"].cumsum()

category_revenue

Unnamed: 0,product_category,total_amount,rank,cumulative_revenue
2,Electronics,2328806.81,1.0,2328806.81
5,Home & Garden,908348.86,2.0,3237155.67
6,Sports,754563.56,3.0,3991719.23
3,Fashion,375214.93,4.0,4366934.16
7,Toys,223142.48,5.0,4590076.64
0,Beauty,156584.74,6.0,4746661.38
4,Food,96138.67,7.0,4842800.05
1,Books,72744.52,8.0,4915544.57


SUM(total_amount) OVER (ORDER BY revenue DESC)

In [5]:
total_revenue = category_revenue["total_amount"].sum()

category_revenue["revenue_share_pct"] = (
    category_revenue["total_amount"] / total_revenue * 100
)

category_revenue

Unnamed: 0,product_category,total_amount,rank,cumulative_revenue,revenue_share_pct
2,Electronics,2328806.81,1.0,2328806.81,47.376375
5,Home & Garden,908348.86,2.0,3237155.67,18.479109
6,Sports,754563.56,3.0,3991719.23,15.350559
3,Fashion,375214.93,4.0,4366934.16,7.633232
7,Toys,223142.48,5.0,4590076.64,4.539527
0,Beauty,156584.74,6.0,4746661.38,3.185501
4,Food,96138.67,7.0,4842800.05,1.955809
1,Books,72744.52,8.0,4915544.57,1.479887


## Key Takeaways

- Window functions allow calculations across rows without collapsing data.
- Ranking highlights top contributors.
- Cumulative metrics reveal concentration (Pareto effect).
- These concepts are commonly tested in SQL interviews.