In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import pandas as pd
import numpy as np

In [2]:
#========
# Setup
#========
df = pd.DataFrame({
    "ticket_id": ["T1" , "T2" , "T3" , "T4" , "T5" , "T6"] ,
    "priority": ["High" , "Low" , "Medium" , "High" , "Low" , "Medium"] ,
    "status": ["Open" , "In Progress" , "Closed" , "Open" , "Closed" , "Open"] ,
    "hours": [5.0 , 1.0 , 3.5 , 6.0 , 0.5 , 2.0]
})
df

Unnamed: 0,ticket_id,priority,status,hours
0,T1,High,Open,5.0
1,T2,Low,In Progress,1.0
2,T3,Medium,Closed,3.5
3,T4,High,Open,6.0
4,T5,Low,Closed,0.5
5,T6,Medium,Open,2.0


In [3]:
#==============================
# Case 1) Convert to category
#==============================
df1 = df.copy()
df1["priority"] = df1["priority"].astype("category")
df1.dtypes

ticket_id      object
priority     category
status         object
hours         float64
dtype: object

In [11]:
#============================
# Case 2) Inspect categories
#============================
case2 = df1["priority"].cat.categories
case2

Index(['High', 'Low', 'Medium'], dtype='object')

In [13]:
#============================
# Case 3) Ordered categories
#============================
order = ["Low", "Medium", "High"]
df3 = df.copy()
df3["priority"] = pd.Categorical(df3["priority"] , categories = order , ordered = True)

case3_sorted = df3.sort_values("priority")[["ticket_id" , "priority" , "hours"]]
case3_sorted

Unnamed: 0,ticket_id,priority,hours
1,T2,Low,1.0
4,T5,Low,0.5
2,T3,Medium,3.5
5,T6,Medium,2.0
0,T1,High,5.0
3,T4,High,6.0


In [14]:
#============================
# Case 4) Rename categories
#============================
df4 = df3.copy()
df4["priority"] = df4["priority"].cat.rename_categories({
    "Low": "LOW" , "Medium": "MED" , "High": "HIGH"
})
df4["priority"].cat.categories

Index(['LOW', 'MED', 'HIGH'], dtype='object')

In [15]:
#===========================================
# Case 5) Add / remove unused categories
#===========================================
df5 = df3.copy()
df5 = df5[df5["priority"].isin(["Low" , "High"])].copy()

before = df5["priority"].cat.categories
before

df5["priority"] = df5["priority"].cat.remove_unused_categories()
after = df5["priority"].cat.categories
after

Index(['Low', 'Medium', 'High'], dtype='object')

Index(['Low', 'High'], dtype='object')

In [16]:
#========================================
# Case 6) Groupby with categorical keys
#========================================
df6 = df3.copy()
case6 = df6.groupby("priority" , observed = True)["hours"].agg(["count" , "mean" , "sum"])
case6

Unnamed: 0_level_0,count,mean,sum
priority,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Low,2,0.75,1.5
Medium,2,2.75,5.5
High,2,5.5,11.0


In [19]:
#===============================================================
# Case 7) New/unknown values handling (must add category first)
#===============================================================
df7 = df3.copy()
df7["priority"] = df7["priority"].cat.add_categories(["Critical"])
df7.loc[df7["ticket_id"] == "T2", "priority"] = "Critical"
df7[["ticket_id", "priority"]]

Unnamed: 0,ticket_id,priority
0,T1,High
1,T2,Critical
2,T3,Medium
3,T4,High
4,T5,Low
5,T6,Medium
