# Multilevel Modeling 
### Used when data are nested or hierarchical structure

In [55]:
import pandas as pd
import numpy as np
import statsmodels.api as sm


df = pd.read_csv('../data/mlm2.csv')
#df = pd.get_dummies(df, dtype=int)
df.head()

Unnamed: 0,SchoolID,StudentID,SchoolQuality,StudentGender,StudentAge,ExamScore
0,0,0,2.561318,Female,15.178315,61.971248
1,0,1,2.561318,Male,16.548217,58.067884
2,0,2,2.561318,Female,15.809929,65.20505
3,0,3,2.561318,Male,11.623108,51.965632
4,0,4,2.561318,Female,14.484574,53.004251


In [67]:
#OLS model for baseline
model = sm.OLS.from_formula("ExamScore ~ SchoolQuality + StudentGender + StudentAge", df)
results = model.fit()
results.summary()

0,1,2,3
Dep. Variable:,ExamScore,R-squared:,0.071
Model:,OLS,Adj. R-squared:,0.066
Method:,Least Squares,F-statistic:,15.17
Date:,"Wed, 05 Jun 2024",Prob (F-statistic):,1.58e-09
Time:,12:44:13,Log-Likelihood:,-2237.2
No. Observations:,600,AIC:,4482.0
Df Residuals:,596,BIC:,4500.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,55.3006,4.682,11.811,0.000,46.105,64.496
StudentGender[T.Male],-0.3864,0.829,-0.466,0.642,-2.015,1.243
SchoolQuality,4.4861,0.713,6.293,0.000,3.086,5.886
StudentAge,-0.7448,0.285,-2.617,0.009,-1.304,-0.186

0,1,2,3
Omnibus:,1.158,Durbin-Watson:,2.118
Prob(Omnibus):,0.561,Jarque-Bera (JB):,1.198
Skew:,-0.048,Prob(JB):,0.549
Kurtosis:,2.804,Cond. No.,174.0


In [79]:
#Multilevel model - Random slopes for grouping students by school
model = smf.mixedlm("ExamScore ~ SchoolQuality + StudentGender + StudentAge", df, groups=df["SchoolID"])
results = model.fit()
results.summary()

0,1,2,3
Model:,MixedLM,Dependent Variable:,ExamScore
No. Observations:,600,Method:,REML
No. Groups:,20,Scale:,101.7589
Min. group size:,30,Log-Likelihood:,-2236.1366
Max. group size:,30,Converged:,Yes
Mean group size:,30.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,55.239,4.734,11.668,0.000,45.961,64.518
StudentGender[T.Male],-0.404,0.831,-0.486,0.627,-2.033,1.225
SchoolQuality,4.487,0.752,5.968,0.000,3.013,5.960
StudentAge,-0.740,0.285,-2.598,0.009,-1.299,-0.182
Group Var,0.395,0.127,,,,


In [69]:
#View intercepts for each group
result.random_effects

{0: Group   -0.089827
 dtype: float64,
 1: Group    0.119917
 dtype: float64,
 2: Group   -0.189105
 dtype: float64,
 3: Group    0.272012
 dtype: float64,
 4: Group   -0.100611
 dtype: float64,
 5: Group   -0.032418
 dtype: float64,
 6: Group    0.319981
 dtype: float64,
 7: Group   -0.156517
 dtype: float64,
 8: Group    0.122116
 dtype: float64,
 9: Group   -0.054634
 dtype: float64,
 10: Group   -0.197735
 dtype: float64,
 11: Group    0.005091
 dtype: float64,
 12: Group    0.02378
 dtype: float64,
 13: Group   -0.242026
 dtype: float64,
 14: Group    0.164078
 dtype: float64,
 15: Group   -0.099376
 dtype: float64,
 16: Group    0.022056
 dtype: float64,
 17: Group   -0.047202
 dtype: float64,
 18: Group    0.079186
 dtype: float64,
 19: Group    0.081236
 dtype: float64}

In [71]:
#Random intercepts for grouping by school and random slopes for gender variable
model = smf.mixedlm("ExamScore ~ SchoolQuality + StudentGender + StudentAge", df, groups=df["SchoolID"], re_formula="~StudentGender")
results = model.fit()
results.summary()



0,1,2,3
Model:,MixedLM,Dependent Variable:,ExamScore
No. Observations:,600,Method:,REML
No. Groups:,20,Scale:,100.3594
Min. group size:,30,Log-Likelihood:,-2237.1657
Max. group size:,30,Converged:,No
Mean group size:,30.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,55.335,5.075,10.904,0.000,45.389,65.282
StudentGender[T.Male],-0.432,0.876,-0.494,0.622,-2.149,1.284
SchoolQuality,4.314,1.026,4.206,0.000,2.304,6.325
StudentAge,-0.714,0.287,-2.491,0.013,-1.276,-0.152
Group Var,1.230,0.413,,,,
Group x StudentGender[T.Male] Cov,1.208,0.449,,,,
StudentGender[T.Male] Var,1.456,0.819,,,,


In [77]:
# View intercepts and slopes for each group and variable
print("Random Effects (Intercepts and Slopes):")
print(results.random_effects)

Random Effects (Intercepts and Slopes):
{0: Group                   -0.349715
StudentGender[T.Male]   -0.446320
dtype: float64, 1: Group                    0.526130
StudentGender[T.Male]    0.530042
dtype: float64, 2: Group                   -0.627812
StudentGender[T.Male]   -0.704971
dtype: float64, 3: Group                    1.170392
StudentGender[T.Male]    1.248360
dtype: float64, 4: Group                   -0.489985
StudentGender[T.Male]   -0.566750
dtype: float64, 5: Group                   -0.072265
StudentGender[T.Male]   -0.112791
dtype: float64, 6: Group                    1.078677
StudentGender[T.Male]    1.128138
dtype: float64, 7: Group                   -0.292535
StudentGender[T.Male]   -0.247124
dtype: float64, 8: Group                    0.394415
StudentGender[T.Male]    0.405798
dtype: float64, 9: Group                    0.10351
StudentGender[T.Male]    0.14965
dtype: float64, 10: Group                   -0.796562
StudentGender[T.Male]   -0.873995
dtype: float64, 11: