In [42]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import pandas as pd
import numpy as np

In [43]:
#====================
# Case 1 & 2 setup
#====================
df1 = pd.DataFrame(
    {"price": [100 , np.nan , 120 , 140] , "lead_days": [5 , 7 , np.nan , 3]} ,
    index = pd.Index(["R1" , "R2" , "R3" , "R6"] , name = "idx")
)
df1

df2 = pd.DataFrame(
    {"price": [95 , 125 , 110 , np.nan] , "lead_days": [6 , np.nan , 4 , np.nan]} ,
    index = pd.Index(["R2" , "R3" , "R4" , "R5"] , name = "idx")
)
df2

Unnamed: 0_level_0,price,lead_days
idx,Unnamed: 1_level_1,Unnamed: 2_level_1
R1,100.0,5.0
R2,,7.0
R3,120.0,
R6,140.0,3.0


Unnamed: 0_level_0,price,lead_days
idx,Unnamed: 1_level_1,Unnamed: 2_level_1
R2,95.0,6.0
R3,125.0,
R4,110.0,4.0
R5,,


In [44]:
#=======================================================================================
# Case 1) Element-wise rule: choose the lower value (e.g., best price / shortest lead)
#=======================================================================================
case1a = df1.combine(df2 , np.minimum)
case1a

case1b = df1.combine(df2 , np.fmin)
case1b

Unnamed: 0_level_0,price,lead_days
idx,Unnamed: 1_level_1,Unnamed: 2_level_1
R1,,
R2,,6.0
R3,120.0,
R4,,
R5,,
R6,,


Unnamed: 0_level_0,price,lead_days
idx,Unnamed: 1_level_1,Unnamed: 2_level_1
R1,100.0,5.0
R2,95.0,6.0
R3,120.0,
R4,110.0,4.0
R5,,
R6,140.0,3.0


In [45]:
#================================================================================
# Case 2) Series-level decision: prefer the column with more non-missing values
#================================================================================
def prefer_more_complete(s1: pd.Series , s2: pd.Series) -> pd.Series:
    return s1 if s1.notna().sum() >= s2.notna().sum() else s2

case2 = df1.combine(df2 , prefer_more_complete)
case2

Unnamed: 0_level_0,price,lead_days
idx,Unnamed: 1_level_1,Unnamed: 2_level_1
R1,100.0,5.0
R2,,7.0
R3,120.0,
R4,,
R5,,
R6,140.0,3.0


In [46]:
#=================
# Case 3 setup
#=================
df3 = pd.DataFrame(
    {"stock": [5 , np.nan , 2]} ,
    index = pd.Index(["R1", "R2", "R3"] , name = "idx")
)
df3

df4 = pd.DataFrame(
    {"stock": [1 , 4 , np.nan]} ,
    index = pd.Index(["R2", "R3", "R4"] , name = "idx")
)
df4

Unnamed: 0_level_0,stock
idx,Unnamed: 1_level_1
R1,5.0
R2,
R3,2.0


Unnamed: 0_level_0,stock
idx,Unnamed: 1_level_1
R2,1.0
R3,4.0
R4,


In [47]:
#==========================================================
# Case 3) fill_value to treat missing as 0 before summing
#==========================================================
case3 = df3.combine(df4 , lambda s1 , s2: s1 + s2 , fill_value = 0.0)
case3

Unnamed: 0_level_0,stock
idx,Unnamed: 1_level_1
R1,5.0
R2,1.0
R3,6.0
R4,0.0


In [48]:
#================
# Case 4 setup
#================
df5 = pd.DataFrame(
    {"sales": [1000 , 1100] , "units": [50 , 55]} ,
    index = pd.Index(["2025-10" , "2025-11"] , name = "month")
)
df5

df6 = pd.DataFrame(
    {"sales": [1050 , 1200] , "returns": [10 , 12]} ,
    index = pd.Index(["2025-11" , "2025-12"] , name = "month")
)
df6

Unnamed: 0_level_0,sales,units
month,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-10,1000,50
2025-11,1100,55


Unnamed: 0_level_0,sales,returns
month,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-11,1050,10
2025-12,1200,12


In [49]:
#=======================================================
# Case 4) union of index/columns happens automatically
#=======================================================
case4a = df5.combine(df6 , np.maximum)
case4a

case4b = df5.combine(df6 , np.fmax)
case4b

Unnamed: 0_level_0,returns,sales,units
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-10,,,
2025-11,,1100.0,
2025-12,,,


Unnamed: 0_level_0,returns,sales,units
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-10,,1000.0,50.0
2025-11,10.0,1100.0,55.0
2025-12,12.0,1200.0,


In [50]:
#=================
# Case 5 setup
#=================
df7 = pd.DataFrame(
    {"sales": [500 , 700] , "units": [40 , 60]} ,
    index = pd.Index(["East" , "West"] , name = "region")
)
df7

df8 = pd.DataFrame(
    {"sales": [520 , 680]} ,
    index = pd.Index(["East" , "West"] , name = "region")
)
df8

Unnamed: 0_level_0,sales,units
region,Unnamed: 1_level_1,Unnamed: 2_level_1
East,500,40
West,700,60


Unnamed: 0_level_0,sales
region,Unnamed: 1_level_1
East,520
West,680


In [51]:
#=================================================================
# Case 5) overwrite effect (what happens to columns only in df7)
#=================================================================
case5_overwrite_true = df7.combine(df8 , lambda s1 , s2: s2 , overwrite = True)
case5_overwrite_true

case5_overwrite_false = df7.combine(df8 , lambda s1 , s2: s2 , overwrite = False)
case5_overwrite_false

Unnamed: 0_level_0,sales,units
region,Unnamed: 1_level_1,Unnamed: 2_level_1
East,520,
West,680,


Unnamed: 0_level_0,sales,units
region,Unnamed: 1_level_1,Unnamed: 2_level_1
East,520,40
West,680,60


In [52]:
#===================================================================
# Case 6) direction matters when shapes differ / overwrite = False
#===================================================================
case6a = df7.combine(df8 , lambda s1 , s2: s2 , overwrite = False)
case6a

case6b = df7.combine(df8 , lambda s1 , s2: s1 , overwrite = False)
case6b

case6c = df8.combine(df7 , lambda s1 , s2: s2 , overwrite = False)
case6c

case6d = df8.combine(df7 , lambda s1 , s2: s1 , overwrite = False)
case6d

Unnamed: 0_level_0,sales,units
region,Unnamed: 1_level_1,Unnamed: 2_level_1
East,520,40
West,680,60


Unnamed: 0_level_0,sales,units
region,Unnamed: 1_level_1,Unnamed: 2_level_1
East,500,40
West,700,60


Unnamed: 0_level_0,sales,units
region,Unnamed: 1_level_1,Unnamed: 2_level_1
East,500,40
West,700,60


Unnamed: 0_level_0,sales,units
region,Unnamed: 1_level_1,Unnamed: 2_level_1
East,520,
West,680,
