In [59]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import pandas as pd
import numpy as np

In [60]:
#========
# Setup
#========
df = pd.DataFrame({
    "order_id": ["A101" , "A102" , "A103" , "A104" , "A105"] ,
    "region": ["East" , "West" , "East" , "West" , "East"] ,
    "sales": [120 , -30 , 80 , 200 , -5] ,
    "returns": [5 , 0 , 2 , 1 , 3] ,
    "discount": [0.10 , 0.05 , 0.00 , 0.15 , 0.20]
})
df

Unnamed: 0,order_id,region,sales,returns,discount
0,A101,East,120,5,0.1
1,A102,West,-30,0,0.05
2,A103,East,80,2,0.0
3,A104,West,200,1,0.15
4,A105,East,-5,3,0.2


In [61]:
#========================================================
# Case 1) Basic where(): keep valid sales only, else NA
#========================================================
case1 = df[["sales"]].where(df["sales"] >= 0)
case1

Unnamed: 0,sales
0,120.0
1,
2,80.0
3,200.0
4,


In [62]:
#=======================================================
# Case 2) other= scalar: replace invalid sales with 0
#=======================================================
case2 = df.copy()
case2["sales_clean"] = df["sales"].where(df["sales"] >= 0 , other = 0)
case2[["order_id" , "sales" , "sales_clean"]]

Unnamed: 0,order_id,sales,sales_clean
0,A101,120,120
1,A102,-30,0
2,A103,80,80
3,A104,200,200
4,A105,-5,0


In [63]:
#==========================================================================
# Case 3) other as Series: replace invalid values with per-column medians
# (median differs per column, broadcast down rows)
#==========================================================================
num = df[["sales" , "returns" , "discount"]]
fallback = num.median(numeric_only = True)
fallback

case3 = num.where(num >= 0 , other = fallback , axis = 1)
case3

sales       80.0
returns      2.0
discount     0.1
dtype: float64

Unnamed: 0,sales,returns,discount
0,120,5,0.1
1,80,0,0.05
2,80,2,0.0
3,200,1,0.15
4,80,3,0.2


In [64]:
#=====================================================================
# Case 4) DataFrame condition: keep values only in an allowed range
# (sales between 0.150, discount <= 0.15, returns <= 3)
#=====================================================================
cond4 = pd.DataFrame({
    "sales": df["sales"].between(0 , 150) ,
    "returns": df["returns"].le(3) ,
    "discount": df["discount"].le(0.15) ,
})

case4 = df[["sales" , "returns" , "discount"]].where(cond4)
case4

Unnamed: 0,sales,returns,discount
0,120.0,,0.1
1,,0.0,0.05
2,80.0,2.0,0.0
3,,1.0,0.15
4,,3.0,


In [65]:
#====================================================================
# Case 5) callable condition: keep rows only when region == 'East'
# (callable returns a boolean Series)
#====================================================================
case5 = df.where(lambda x: x["region"].eq("East")).dropna()
case5

Unnamed: 0,order_id,region,sales,returns,discount
0,A101,East,120.0,5.0,0.1
2,A103,East,80.0,2.0,0.0
4,A105,East,-5.0,3.0,0.2


In [66]:
#========================================================================
# Case 6) callable other: replace negative sales using a dynamic value
# Here: replace negatives with the median of non-negative sales
#========================================================================
nonneg_median = df.loc[df["sales"] >= 0 , "sales"].median()
nonneg_median

case6 = df.copy()
case6["sales_fixed"] = df["sales"].where(
    df["sales"] >= 0 ,
    other = lambda x: nonneg_median
)
case6[["order_id" , "sales" , "sales_fixed"]]

np.float64(120.0)

Unnamed: 0,order_id,sales,sales_fixed
0,A101,120,120
1,A102,-30,120
2,A103,80,80
3,A104,200,200
4,A105,-5,120
