# Summarize multiple model fitting results. 
We explore the model of "vote ~ logpopul + income" stratified by age for illustration purpose.

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
df = sm.datasets.anes96.load_pandas().data

In [3]:
dfM = df.copy()
inc = "income"
df[inc] = (df[inc]
          .mask(df[inc] <= 24, "17-24")
          .mask(df[inc] <= 16, "9-16")
          .mask(df[inc] <= 8, "0-8")
         )

Model1: "vote ~ logpopul"  
Model2: "vote ~ logpopul + income"  
Model3: "vote ~ logpopul" for 60 years old or under  
Model4: "vote ~ logpopul + income" for 60 years old or under.  

In [4]:
res1 = smf.glm(formula="vote ~ logpopul" ,data=df, family = sm.families.Binomial()).fit()
res2 = smf.glm(formula="vote ~ logpopul + income" ,data=df, family = sm.families.Binomial()).fit()

df_60 = df.loc[df["age"].le(60)]
res3 = smf.glm(formula="vote ~ logpopul" ,data=df_60, family = sm.families.Binomial()).fit()
res4 = smf.glm(formula="vote ~ logpopul + income" ,data=df_60, family = sm.families.Binomial()).fit()

In [5]:
results = [res1, res2, res3, res4]
results = [sm.add_params_summary(res, func=np.exp, alpha=0.05) for res in results]

In [6]:
results[1].params_summary

Unnamed: 0,coef,std err,t,P>|t|,f(coef),[0.025,0.975],str
Intercept,-0.756182,0.21124,-3.57972,0.0003439629,0.469456,0.310303,0.710236,"0.469 (0.310, 0.710)"
income[T.17-24],0.958841,0.223063,4.298526,1.719374e-05,2.608672,1.684799,4.039158,"2.609 (1.685, 4.039)"
income[T.9-16],0.398255,0.240439,1.656366,0.09764772,1.489224,0.929604,2.385735,"1.489 (0.930, 2.386)"
logpopul,-0.107381,0.021515,-4.990984,6.007257e-07,0.898183,0.861096,0.936869,"0.898 (0.861, 0.937)"


In [7]:
df_res = sm.multi_model_summary(
    results, 
    accessor=lambda x : x.params_summary["str"], 
    columns=None, 
    index = ["logpopul", 'income[T.0-8]','income[T.9-16]','income[T.17-24]'], 
    fill_value="Ref.",
    )

With index specification, although "income[T.0-8]" is not existed, insert "Ref." to this row. 

In [8]:
df_res

Unnamed: 0,Model1,Model2,Model3,Model4
logpopul,"0.895 (0.859, 0.933)","0.898 (0.861, 0.937)","0.874 (0.834, 0.916)","0.880 (0.839, 0.922)"
income[T.0-8],Ref.,Ref.,Ref.,Ref.
income[T.9-16],,"1.489 (0.930, 2.386)",,"1.239 (0.690, 2.225)"
income[T.17-24],,"2.609 (1.685, 4.039)",,"2.376 (1.402, 4.028)"


After slight modification, you can obtain a table encountered in epidemiological papers. 

In [9]:
df_res[["Model1", "Model3"]] = df_res[["Model1","Model3"]].replace("Ref.",np.nan)
df_res

Unnamed: 0,Model1,Model2,Model3,Model4
logpopul,"0.895 (0.859, 0.933)","0.898 (0.861, 0.937)","0.874 (0.834, 0.916)","0.880 (0.839, 0.922)"
income[T.0-8],,Ref.,,Ref.
income[T.9-16],,"1.489 (0.930, 2.386)",,"1.239 (0.690, 2.225)"
income[T.17-24],,"2.609 (1.685, 4.039)",,"2.376 (1.402, 4.028)"


# Mosaic model summary
Customly layout multiple model fitting results.

In [10]:
mosaic = [[0,1],
          [2,3]]
sm.mosaic_model_summary(results, mosaic=mosaic )

Unnamed: 0_level_0,Unnamed: 1_level_0,Model1,Model2
Row,index,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Intercept,-0.073026,-0.756182
0,logpopul,-0.110967,-0.107381
0,income[T.17-24],,0.958841
0,income[T.9-16],,0.398255
1,Intercept,-0.071608,-0.698724
1,logpopul,-0.135041,-0.128286
1,income[T.17-24],,0.865501
1,income[T.9-16],,0.214079


In [11]:
sm.mosaic_model_summary(results, mosaic=mosaic, accessor=lambda x: x.params_summary["str"], 
                         columns=["model1", "model2"], rows=["All","Age<=60"])

Unnamed: 0_level_0,Unnamed: 1_level_0,model1,model2
Row,index,Unnamed: 2_level_1,Unnamed: 3_level_1
All,Intercept,"0.930 (0.790, 1.094)","0.469 (0.310, 0.710)"
All,logpopul,"0.895 (0.859, 0.933)","0.898 (0.861, 0.937)"
All,income[T.17-24],,"2.609 (1.685, 4.039)"
All,income[T.9-16],,"1.489 (0.930, 2.386)"
Age<=60,Intercept,"0.931 (0.775, 1.118)","0.497 (0.299, 0.826)"
Age<=60,logpopul,"0.874 (0.834, 0.916)","0.880 (0.839, 0.922)"
Age<=60,income[T.17-24],,"2.376 (1.402, 4.028)"
Age<=60,income[T.9-16],,"1.239 (0.690, 2.225)"


If specify accessor to access aic, it enables you to check aic values over models.

In [12]:
sm.mosaic_model_summary(results, accessor=lambda x : x.aic, columns=None, mosaic=mosaic)

Unnamed: 0_level_0,Unnamed: 1_level_0,Model1,Model2
Row,index,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,1258.025051,1234.873163
1,0,952.794948,937.145104


In [13]:
mosaic = [[0,None],
          [1,None],
          [2,3]]
sm.mosaic_model_summary(
    results, 
    mosaic=mosaic, 
    columns=["Case1", "Case2"], 
    rows=["All","Age<65", "All2"],
    )

Unnamed: 0_level_0,Unnamed: 1_level_0,Case1,Case2
Row,index,Unnamed: 2_level_1,Unnamed: 3_level_1
All,Intercept,-0.073026,
All,logpopul,-0.110967,
All,income[T.17-24],,
All,income[T.9-16],,
Age<65,Intercept,-0.756182,
Age<65,logpopul,-0.107381,
Age<65,income[T.17-24],0.958841,
Age<65,income[T.9-16],0.398255,
All2,Intercept,-0.071608,-0.698724
All2,logpopul,-0.135041,-0.128286
