## HEXAD vs outcomes and regularity vs outcomes

Impact of HEXAD and regularity on outcomes

In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("..")

import pandas as pd
from statsmodels.stats.diagnostic import het_breuschpagan
import src.modeling.ols_model as ols_models

# Load and scale data
df = pd.read_csv("../data/preprocessed/student_time_features_2021_2024.csv")
scale_cols = [
    'pre_test', 'test',
    'entropy_day', 'entropy_week', 'gini_day', 'gini_week', 'burstiness_day', 'burstiness_week',
    'HEXAD_P', 'HEXAD_S', 'HEXAD_F', 'HEXAD_A', 'HEXAD_D', 'HEXAD_R'
]
df = ols_models.standardize_columns(df, scale_cols)

## Final test

In [None]:
model_name = "hexad_outcomes_main"
formula = ols_models.get_ols_formula_by_name(model_name)
model = ols_models.fit_ols_model(df, formula)

# Breusch–Pagan test - heteroscedasticity likely present
bp_test = het_breuschpagan(model.resid, model.model.exog)
bp_stat, bp_pval, f_stat, f_pval = bp_test

print("Breusch-Pagan test statistic:", bp_stat)
print("p-value:", bp_pval)

# Use robust standard errors
robust_model = model.get_robustcov_results(cov_type='HC1')


In [None]:
formula

In [None]:
robust_model.summary()

## Final outcomes - HEXAD interactions

In [None]:
model_name = "hexad_outcomes_interactions"
formula = ols_models.get_ols_formula_by_name(model_name)
model = ols_models.fit_ols_model(df, formula)

# Breusch–Pagan test - heteroscedasticity likely present
bp_test = het_breuschpagan(model.resid, model.model.exog)
bp_stat, bp_pval, f_stat, f_pval = bp_test

print("Breusch-Pagan test statistic:", bp_stat)
print("p-value:", bp_pval)

# Use robust standard errors
robust_model = model.get_robustcov_results(cov_type='HC1')


In [None]:
print(formula)

In [None]:
robust_model.summary()

## Regularity - days

In [None]:
model_name = "regularity_day_main"
formula = ols_models.get_ols_formula_by_name(model_name)
model = ols_models.fit_ols_model(df, formula)

In [None]:
formula

In [None]:
model.summary()

## Regularity - weeks

In [None]:
model_name = "regularity_week_main"
formula = ols_models.get_ols_formula_by_name(model_name)
model = ols_models.fit_ols_model(df, formula)

In [None]:
formula

In [None]:
model.summary()