In [32]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import pandas as pd
import numpy as np

In [33]:
df = pd.DataFrame({
    "date": pd.to_datetime([
        "2025-12-01" , "2025-12-02" , "2025-12-02" ,
        "2025-12-08" , "2025-12-09" , "2025-12-15" ,
        "2026-01-03" , "2026-01-05"
    ]) ,
    "region":  ["East" , "East" , "West" , "East" ,
                "West" , "West" , "East" , "West"] ,
    "channel": ["Online" , "Retail" , "Online" , "Online" ,
                "Retail" , "Online" , "Retail" , "Retail"] ,
    "product": ["Cable" , "Switch" , "Cable" , "Wire" ,
                "Switch" , "Wire" , "Cable" , "Wire"] ,
    "revenue": [1200 , 600 , 900 , 1500 , 400 , 1100 , 700 , 500] ,
    "discount":[0.10 , 0.05 , 0.00 , 0.15 , 0.00 , 0.10 , 0.05 , 0.00]
})
df

Unnamed: 0,date,region,channel,product,revenue,discount
0,2025-12-01,East,Online,Cable,1200,0.1
1,2025-12-02,East,Retail,Switch,600,0.05
2,2025-12-02,West,Online,Cable,900,0.0
3,2025-12-08,East,Online,Wire,1500,0.15
4,2025-12-09,West,Retail,Switch,400,0.0
5,2025-12-15,West,Online,Wire,1100,0.1
6,2026-01-03,East,Retail,Cable,700,0.05
7,2026-01-05,West,Retail,Wire,500,0.0


In [34]:
#=====================================
# Case 1) Group by one column
# Use case: total revenue per region
#=====================================
case1 = df.groupby("region")["revenue"].sum()
case1

region
East    4000
West    2900
Name: revenue, dtype: int64

In [35]:
#========================================
# Case 2) Group by multiple columns
# Use case: revenue per region + channel
#========================================
case2 = df.groupby(["region" , "channel"])["revenue"].sum()
case2

region  channel
East    Online     2700
        Retail     1300
West    Online     2000
        Retail      900
Name: revenue, dtype: int64

In [36]:
#=====================================================
# Case 3) Group by a function (callable)
# Explanation: callable is applied to the index values
# Use case: revenue by weekday
#=====================================================
df_idx = df.set_index("date")
case3 = df_idx.groupby(lambda dt: dt.day_name())["revenue"].sum()
case3

date
Monday      4300
Saturday     700
Tuesday     1900
Name: revenue, dtype: int64

In [37]:
#===========================================
# Case 4) Group by time windows (pd.Grouper)
# Use case: monthly revenue
#===========================================
case4 = df.groupby(pd.Grouper(key = "date" , freq = "MS"))["revenue"].sum()
case4

date
2025-12-01    5700
2026-01-01    1200
Freq: MS, Name: revenue, dtype: int64

In [38]:
#===================================================
# Case 5) Control output format with as_index / sort
# Use case: Flat output (easy to export)
#===================================================
case5 = df.groupby("region" , as_index = False , sort = False).agg(
    total_revenue = ("revenue" , "sum")
)
case5

Unnamed: 0,region,total_revenue
0,East,4000
1,West,2900


In [39]:
#=========================================================
# Case 6) Group by a MultiIndex level
# Use case: keys are in index; summarize by 'region' level
#=========================================================
mi = df.set_index(["region" , "channel" , "date"]).sort_index()
case6 = mi.groupby(level = "region")["revenue"].sum()
case6

case6 = mi.groupby(level = "channel")["revenue"].sum()
case6

region
East    4000
West    2900
Name: revenue, dtype: int64

channel
Online    4700
Retail    2200
Name: revenue, dtype: int64