In [4]:
import pandas as pd

In [5]:
df = pd.DataFrame({
    "Department": ["HR", "HR", "IT", "IT", "Marketing", "Marketing", "Sales", "Sales"],
    "Team": ["A", "A", "B", "B", "C", "C", "D", "D"],
    "Gender": ["M", "F", "M", "F", "M", "F", "M", "F"],
    "Salary": [85, 90, 78, 85, 92, 88, 75, 80],
    "Age": [23, 25, 30, 22, 28, 26, 21, 27],
    "JoinDate": pd.to_datetime([
        "2020-01-10", "2020-02-15", "2021-03-20", "2021-04-10",
        "2020-05-30", "2020-06-25", "2021-07-15", "2021-08-01"
    ])
})  
df

Unnamed: 0,Department,Team,Gender,Salary,Age,JoinDate
0,HR,A,M,85,23,2020-01-10
1,HR,A,F,90,25,2020-02-15
2,IT,B,M,78,30,2021-03-20
3,IT,B,F,85,22,2021-04-10
4,Marketing,C,M,92,28,2020-05-30
5,Marketing,C,F,88,26,2020-06-25
6,Sales,D,M,75,21,2021-07-15
7,Sales,D,F,80,27,2021-08-01


In [6]:
df.groupby("Department")["Salary"].mean()

Department
HR           87.5
IT           81.5
Marketing    90.0
Sales        77.5
Name: Salary, dtype: float64

In [9]:
df.groupby("Department")["Salary"].count()

Department
HR           2
IT           2
Marketing    2
Sales        2
Name: Salary, dtype: int64

In [11]:
df.groupby("Department")["Salary"].agg(["mean", "max", "min"])

Unnamed: 0_level_0,mean,max,min
Department,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
HR,87.5,90,85
IT,81.5,85,78
Marketing,90.0,92,88
Sales,77.5,80,75


In [17]:
print(df.groupby("Team")["Salary"].mean())    # Average per team
print(df.groupby("Team")["Salary"].sum())    # Total score
print(df.groupby("Team")["Salary"].count())    # How many entries
print(df.groupby("Team")["Salary"].min())
print(df.groupby("Team")["Salary"].max())

Team
A    87.5
B    81.5
C    90.0
D    77.5
Name: Salary, dtype: float64
Team
A    175
B    163
C    180
D    155
Name: Salary, dtype: int64
Team
A    2
B    2
C    2
D    2
Name: Salary, dtype: int64
Team
A    85
B    78
C    88
D    75
Name: Salary, dtype: int64
Team
A    90
B    85
C    92
D    80
Name: Salary, dtype: int64


In [None]:
'''
Transform vs Aggregate vs Filter
Operation	Returns	When to Use
.aggregate()	Single value per group	Summary (like mean)
.transform()	Same shape as original	Add new column based on group
.filter()	Subset of rows	Keep/discard whole groups


.transform() Example:
df["Team Avg"] = df.groupby("Team")["Salary"].transform("mean")

Now each row gets its team average — great for comparisons!

.filter() Example:
df.groupby("Team").filter(lambda x: x["Salary"].mean() > 80)

Only keeps teams with average score > 80.

Summary
.groupby() helps you summarize large datasets by category
Use mean(), sum(), count(), .agg() for custom metrics
.transform() adds values back to original rows
.filter() keeps only groups that meet conditions'''