In [1]:
# https://marginaleffects.com/vignettes/comparisons.html

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import marginaleffects

In [3]:
df = pd.read_csv("titanic.csv", usecols=["Survived", "PClass", "SexCode", "Age"]).dropna()
df.head()
df.shape

Unnamed: 0,PClass,Age,Survived,SexCode
0,1st,29.0,1,1
1,1st,2.0,0,1
2,1st,30.0,0,0
3,1st,25.0,0,1
4,1st,0.92,1,0


(756, 4)

In [4]:
formula = "Survived ~ C(PClass) * C(SexCode) * Age"
model = smf.glm(formula, family=sm.families.Binomial(), data=df).fit()
print(model.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:               Survived   No. Observations:                  756
Model:                            GLM   Df Residuals:                      744
Model Family:                Binomial   Df Model:                           11
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -319.82
Date:                Sat, 01 Jun 2024   Deviance:                       639.64
Time:                        20:28:35   Pearson chi2:                     807.
No. Iterations:                     6   Pseudo R-squ. (CS):             0.3998
Covariance Type:            nonrobust                                         
                                           coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------------


In [5]:
# for a single variable
marginaleffects.avg_comparisons(model, variables="SexCode").to_pandas()

Unnamed: 0,term,contrast,estimate,std_error,statistic,p_value,s_value,conf_low,conf_high
0,SexCode,mean(True) - mean(False),0.497307,0.030175,16.480768,0.0,inf,0.438165,0.556449


In [6]:
# same calculation
model.predict(df.assign(SexCode=1)).mean() - model.predict(df.assign(SexCode=0)).mean()

0.4973067378607241

In [7]:
# for all variables
marginaleffects.avg_comparisons(model).to_pandas()

Unnamed: 0,term,contrast,estimate,std_error,statistic,p_value,s_value,conf_low,conf_high
0,Age,+1,-0.006096,0.001105,-5.518069,3.427448e-08,24.79829,-0.008261,-0.00393
1,PClass,mean(2nd) - mean(1st),-0.224836,0.040536,-5.546523,2.914061e-08,25.032394,-0.304286,-0.145386
2,PClass,mean(3rd) - mean(1st),-0.395676,0.042534,-9.302478,0.0,inf,-0.479042,-0.31231
3,SexCode,mean(True) - mean(False),0.497307,0.030175,16.480768,0.0,inf,0.438165,0.556449


In [8]:
marginaleffects.avg_comparisons(model, variables={"Age": 5}).to_pandas()
marginaleffects.avg_comparisons(model, variables={"Age": "sd"}).to_pandas()
marginaleffects.avg_comparisons(model, variables={"PClass": "sequential"}).to_pandas()
marginaleffects.avg_comparisons(model, variables={"PClass": "pairwise"}).to_pandas()
marginaleffects.avg_comparisons(model, variables={"PClass": "reference"}).to_pandas()

Unnamed: 0,term,contrast,estimate,std_error,statistic,p_value,s_value,conf_low,conf_high
0,Age,5,-0.030508,0.005526,-5.521109,3.368672e-08,24.823245,-0.041338,-0.019678


Unnamed: 0,term,contrast,estimate,std_error,statistic,p_value,s_value,conf_low,conf_high
0,Age,mean((x+sd/2)) - mean((x-sd/2)),-0.087538,0.0158,-5.540519,3.015761e-08,24.982903,-0.118505,-0.056571


Unnamed: 0,term,contrast,estimate,std_error,statistic,p_value,s_value,conf_low,conf_high
0,PClass,mean(2nd) - mean(1st),-0.224836,0.040536,-5.546523,2.914061e-08,25.032394,-0.304286,-0.145386
1,PClass,mean(3rd) - mean(2nd),-0.17084,0.033114,-5.159218,2.47983e-07,21.943255,-0.235741,-0.105939


Unnamed: 0,term,contrast,estimate,std_error,statistic,p_value,s_value,conf_low,conf_high
0,PClass,mean(2nd) - mean(1st),-0.224836,0.040536,-5.546523,2.914061e-08,25.032394,-0.304286,-0.145386
1,PClass,mean(3rd) - mean(1st),-0.395676,0.042534,-9.302478,0.0,inf,-0.479042,-0.31231
2,PClass,mean(3rd) - mean(2nd),-0.17084,0.033114,-5.159218,2.47983e-07,21.943255,-0.235741,-0.105939


Unnamed: 0,term,contrast,estimate,std_error,statistic,p_value,s_value,conf_low,conf_high
0,PClass,mean(2nd) - mean(1st),-0.224836,0.040536,-5.546523,2.914061e-08,25.032394,-0.304286,-0.145386
1,PClass,mean(3rd) - mean(1st),-0.395676,0.042534,-9.302478,0.0,inf,-0.479042,-0.31231


# Hypothetical Passenger

In [9]:
passenger = pd.DataFrame({"PClass": "3rd", "Age": 20, "SexCode": 0, "Survived": "NA"}, index=[0])

In [10]:
marginaleffects.comparisons(model, comparison="difference", newdata=passenger).to_pandas()
marginaleffects.comparisons(model, comparison="ratio", newdata=passenger).to_pandas()

Unnamed: 0,term,contrast,estimate,std_error,statistic,p_value,s_value,conf_low,conf_high
0,Age,+1,-0.007468,0.003391,-2.202124,0.027657,5.176234,-0.014115,-0.000821
1,PClass,2nd - 1st,-0.362817,0.097579,-3.718205,0.000201,12.283079,-0.554067,-0.171567
2,PClass,3rd - 1st,-0.423756,0.087102,-4.865078,1e-06,19.737331,-0.594472,-0.25304
3,SexCode,True - False,0.271698,0.058791,4.621387,4e-06,18.001082,0.156469,0.386927


Unnamed: 0,term,contrast,estimate,std_error,statistic,p_value,s_value,conf_low,conf_high
0,Age,+1,0.959306,0.015529,61.776648,0.0,inf,0.928871,0.989742
1,PClass,2nd / 1st,0.398823,0.103462,3.854787,0.0001158303,13.0757,0.196042,0.601605
2,PClass,3rd / 1st,0.29785,0.064036,4.651263,3.29908e-06,18.209505,0.172341,0.423358
3,SexCode,True / False,2.511484,0.504844,4.974772,6.532457e-07,20.545871,1.522008,3.50096


## More details

In [11]:
# cross-contrasts
# not supported in Python
# marginaleffects.comparisons(model, variables={"SexCode": [0,1], "Age": 5}, cross=True, newdata=passenger).to_pandas()

In [12]:
# The effect of changing passenger class on the predicted probability of survival for a 50 year old man and a 50 year old woman.
newdata = marginaleffects.datagrid(Age=50, SexCode=[0,1])
marginaleffects.comparisons(model, variables="PClass", newdata=newdata).to_pandas()

Unnamed: 0,Age,SexCode,rowid,term,contrast,estimate,std_error,statistic,p_value,s_value,conf_low,conf_high,predicted,predicted_lo,predicted_hi,PClass,Survived
0,50,0,0,PClass,2nd - 1st,-0.225975,0.047271,-4.780413,2e-06,19.124742,-0.318624,-0.133325,0.04576,0.230189,0.004214,3rd,0
1,50,1,1,PClass,2nd - 1st,-0.156229,0.103442,-1.510311,0.130964,2.932758,-0.358971,0.046513,0.446338,0.957648,0.801419,3rd,0
2,50,0,0,PClass,3rd - 1st,-0.184429,0.053478,-3.448686,0.000563,10.793752,-0.289244,-0.079614,0.04576,0.230189,0.04576,3rd,0
3,50,1,1,PClass,3rd - 1st,-0.51131,0.124199,-4.116866,3.8e-05,14.668313,-0.754735,-0.267885,0.446338,0.957648,0.446338,3rd,0
