## HEXAD vs outcomes and regularity vs outcomes

Impact of HEXAD and regularity on outcomes

In [80]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("..")

import pandas as pd
from statsmodels.stats.diagnostic import het_breuschpagan
import src.modeling.ols_model as ols_models

# Load and scale data
df = pd.read_csv("../data/preprocessed/student_time_features_2021_2024.csv")
scale_cols = [
    'pre_test', 'test',
    'entropy_day', 'entropy_week', 'gini_day', 'gini_week', 'burstiness_day', 'burstiness_week',
    'HEXAD_P', 'HEXAD_S', 'HEXAD_F', 'HEXAD_A', 'HEXAD_D', 'HEXAD_R'
]
df = ols_models.standardize_columns(df, scale_cols)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Final test

In [81]:
model_name = "hexad_outcomes_main"
formula = ols_models.get_ols_formula_by_name(model_name)
model = ols_models.fit_ols_model(df, formula)

# Breusch–Pagan test - heteroscedasticity likely present
bp_test = het_breuschpagan(model.resid, model.model.exog)
bp_stat, bp_pval, f_stat, f_pval = bp_test

print("Breusch-Pagan test statistic:", bp_stat)
print("p-value:", bp_pval)

# Use robust standard errors
robust_model = model.get_robustcov_results(cov_type='HC1')


Breusch-Pagan test statistic: 27.15852151688184
p-value: 0.00031207754034122187


In [82]:
formula

'test ~ pre_test + HEXAD_P + HEXAD_S + HEXAD_F + HEXAD_A + HEXAD_D + HEXAD_R'

In [83]:
robust_model.summary()

0,1,2,3
Dep. Variable:,test,R-squared:,0.175
Model:,OLS,Adj. R-squared:,0.169
Method:,Least Squares,F-statistic:,26.25
Date:,"Fri, 23 May 2025",Prob (F-statistic):,1.0400000000000001e-32
Time:,17:10:22,Log-Likelihood:,-1151.9
No. Observations:,871,AIC:,2320.0
Df Residuals:,863,BIC:,2358.0
Df Model:,7,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2.16e-16,0.031,6.99e-15,1.000,-0.061,0.061
pre_test,0.4086,0.032,12.862,0.000,0.346,0.471
HEXAD_P,-0.0359,0.042,-0.854,0.393,-0.118,0.047
HEXAD_S,-0.0177,0.038,-0.470,0.639,-0.092,0.056
HEXAD_F,0.0874,0.041,2.115,0.035,0.006,0.168
HEXAD_A,0.0019,0.046,0.042,0.966,-0.089,0.093
HEXAD_D,-0.0603,0.035,-1.725,0.085,-0.129,0.008
HEXAD_R,0.0831,0.042,1.981,0.048,0.001,0.166

0,1,2,3
Omnibus:,17.43,Durbin-Watson:,1.871
Prob(Omnibus):,0.0,Jarque-Bera (JB):,17.821
Skew:,-0.333,Prob(JB):,0.000135
Kurtosis:,2.781,Cond. No.,3.49


## Final outcomes - HEXAD interactions

In [84]:
model_name = "hexad_outcomes_interactions"
formula = ols_models.get_ols_formula_by_name(model_name)
model = ols_models.fit_ols_model(df, formula)

# Breusch–Pagan test - heteroscedasticity likely present
bp_test = het_breuschpagan(model.resid, model.model.exog)
bp_stat, bp_pval, f_stat, f_pval = bp_test

print("Breusch-Pagan test statistic:", bp_stat)
print("p-value:", bp_pval)

# Use robust standard errors
robust_model = model.get_robustcov_results(cov_type='HC1')


Breusch-Pagan test statistic: 33.39049394158351
p-value: 0.00023413087860081554


In [85]:
print(formula)


            test ~ pre_test
            + HEXAD_P + HEXAD_S + HEXAD_F + HEXAD_A + HEXAD_D + HEXAD_R
            + HEXAD_F:HEXAD_D + HEXAD_S:HEXAD_P + HEXAD_A:HEXAD_R
            


In [86]:
robust_model.summary()

0,1,2,3
Dep. Variable:,test,R-squared:,0.186
Model:,OLS,Adj. R-squared:,0.176
Method:,Least Squares,F-statistic:,19.76
Date:,"Fri, 23 May 2025",Prob (F-statistic):,4.32e-33
Time:,17:10:22,Log-Likelihood:,-1146.4
No. Observations:,871,AIC:,2315.0
Df Residuals:,860,BIC:,2367.0
Df Model:,10,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0093,0.036,-0.258,0.797,-0.080,0.062
pre_test,0.4126,0.032,13.008,0.000,0.350,0.475
HEXAD_P,-0.0319,0.043,-0.745,0.457,-0.116,0.052
HEXAD_S,-0.0014,0.038,-0.036,0.972,-0.076,0.073
HEXAD_F,0.0850,0.041,2.049,0.041,0.004,0.166
HEXAD_A,-0.0172,0.047,-0.366,0.714,-0.109,0.075
HEXAD_D,-0.0794,0.036,-2.235,0.026,-0.149,-0.010
HEXAD_R,0.0838,0.042,1.978,0.048,0.001,0.167
HEXAD_F:HEXAD_D,0.0838,0.027,3.094,0.002,0.031,0.137

0,1,2,3
Omnibus:,14.619,Durbin-Watson:,1.866
Prob(Omnibus):,0.001,Jarque-Bera (JB):,15.017
Skew:,-0.309,Prob(JB):,0.000549
Kurtosis:,2.825,Cond. No.,4.03


## Regularity - days

In [87]:
model_name = "regularity_day_main"
formula = ols_models.get_ols_formula_by_name(model_name)
model = ols_models.fit_ols_model(df, formula)

In [88]:
formula

'test ~ pre_test + entropy_day + burstiness_day + gini_day'

In [89]:
model.summary()

0,1,2,3
Dep. Variable:,test,R-squared:,0.218
Model:,OLS,Adj. R-squared:,0.214
Method:,Least Squares,F-statistic:,53.88
Date:,"Fri, 23 May 2025",Prob (F-statistic):,4.61e-40
Time:,17:10:22,Log-Likelihood:,-1007.8
No. Observations:,776,AIC:,2026.0
Df Residuals:,771,BIC:,2049.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0388,0.032,1.210,0.227,-0.024,0.102
pre_test,0.4983,0.035,14.403,0.000,0.430,0.566
entropy_day,0.2545,0.041,6.184,0.000,0.174,0.335
burstiness_day,0.0004,0.163,0.003,0.998,-0.319,0.320
gini_day,-0.0159,0.159,-0.100,0.921,-0.328,0.296

0,1,2,3
Omnibus:,9.414,Durbin-Watson:,1.891
Prob(Omnibus):,0.009,Jarque-Bera (JB):,9.367
Skew:,-0.245,Prob(JB):,0.00925
Kurtosis:,2.777,Cond. No.,11.3


## Regularity - weeks

In [90]:
model_name = "regularity_week_main"
formula = ols_models.get_ols_formula_by_name(model_name)
model = ols_models.fit_ols_model(df, formula)

In [91]:
formula

'test ~ pre_test + entropy_week + burstiness_week + gini_week'

In [92]:
model.summary()

0,1,2,3
Dep. Variable:,test,R-squared:,0.214
Model:,OLS,Adj. R-squared:,0.21
Method:,Least Squares,F-statistic:,52.43
Date:,"Fri, 23 May 2025",Prob (F-statistic):,4.34e-39
Time:,17:10:23,Log-Likelihood:,-1010.1
No. Observations:,776,AIC:,2030.0
Df Residuals:,771,BIC:,2053.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0367,0.032,1.142,0.254,-0.026,0.100
pre_test,0.4702,0.034,13.847,0.000,0.404,0.537
entropy_week,0.2523,0.046,5.534,0.000,0.163,0.342
burstiness_week,0.0298,0.049,0.611,0.541,-0.066,0.125
gini_week,-0.0447,0.053,-0.840,0.401,-0.149,0.060

0,1,2,3
Omnibus:,9.672,Durbin-Watson:,1.888
Prob(Omnibus):,0.008,Jarque-Bera (JB):,9.546
Skew:,-0.244,Prob(JB):,0.00846
Kurtosis:,2.762,Cond. No.,3.15
