## Explore the impact of regularity

In [93]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("..")

import pandas as pd
import src.modeling.ols_model as ols_models

# Load and scale data
df = pd.read_csv("../data/preprocessed/student_time_features_2021_2024.csv")
scale_cols = [
    'pre_test', 'test',
    'entropy_day', 'entropy_week', 'gini_day', 'gini_week', 'burstiness_day', 'burstiness_week',
    'HEXAD_P', 'HEXAD_S', 'HEXAD_F', 'HEXAD_A', 'HEXAD_D', 'HEXAD_R'
]
df = ols_models.standardize_columns(df, scale_cols)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Entropy (week) - HEXAD

This model examines how students’ HEXAD traits and prior knowledge relate to the distribution (entropy) of their activity across weeks. It helps identify which traits are associated with more consistent versus more irregular weekly engagement patterns.

In [94]:
model_name = "entropy_week_main"
formula = ols_models.get_ols_formula_by_name(model_name)
model = ols_models.fit_ols_model(df, formula)

In [95]:
formula

'entropy_week ~ pre_test + HEXAD_P + HEXAD_S + HEXAD_F + HEXAD_A + HEXAD_D + HEXAD_R'

In [96]:
model.summary()

0,1,2,3
Dep. Variable:,entropy_week,R-squared:,0.101
Model:,OLS,Adj. R-squared:,0.092
Method:,Least Squares,F-statistic:,12.28
Date:,"Fri, 23 May 2025",Prob (F-statistic):,6.08e-15
Time:,17:10:29,Log-Likelihood:,-1059.9
No. Observations:,776,AIC:,2136.0
Df Residuals:,768,BIC:,2173.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0333,0.034,-0.966,0.334,-0.101,0.034
pre_test,-0.2559,0.035,-7.348,0.000,-0.324,-0.188
HEXAD_P,-0.0228,0.051,-0.449,0.653,-0.122,0.077
HEXAD_S,0.0185,0.044,0.415,0.678,-0.069,0.106
HEXAD_F,-0.0082,0.048,-0.169,0.866,-0.103,0.087
HEXAD_A,0.0711,0.054,1.316,0.189,-0.035,0.177
HEXAD_D,-0.1092,0.039,-2.788,0.005,-0.186,-0.032
HEXAD_R,0.1066,0.048,2.209,0.028,0.012,0.201

0,1,2,3
Omnibus:,62.165,Durbin-Watson:,2.04
Prob(Omnibus):,0.0,Jarque-Bera (JB):,30.891
Skew:,-0.311,Prob(JB):,1.96e-07
Kurtosis:,2.247,Cond. No.,3.52


## Gini (week) - HEXAD

This model examines how students’ HEXAD traits and prior knowledge relate to the concentration (Gini index) of their activity across weeks. It helps identify which traits are associated with more uneven or more balanced weekly engagement.

In [97]:
model_name = "gini_week_main"
formula = ols_models.get_ols_formula_by_name(model_name)
model = ols_models.fit_ols_model(df, formula)

In [98]:
formula

'gini_week ~ pre_test + HEXAD_P + HEXAD_S + HEXAD_F + HEXAD_A + HEXAD_D + HEXAD_R'

In [99]:
model.summary()

0,1,2,3
Dep. Variable:,gini_week,R-squared:,0.076
Model:,OLS,Adj. R-squared:,0.068
Method:,Least Squares,F-statistic:,9.071
Date:,"Fri, 23 May 2025",Prob (F-statistic):,8.66e-11
Time:,17:10:29,Log-Likelihood:,-1070.3
No. Observations:,776,AIC:,2157.0
Df Residuals:,768,BIC:,2194.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0268,0.035,-0.768,0.443,-0.095,0.042
pre_test,-0.2478,0.035,-7.023,0.000,-0.317,-0.179
HEXAD_P,0.0206,0.051,0.401,0.688,-0.080,0.121
HEXAD_S,0.1026,0.045,2.279,0.023,0.014,0.191
HEXAD_F,-0.0144,0.049,-0.295,0.768,-0.111,0.082
HEXAD_A,0.0388,0.055,0.708,0.479,-0.069,0.146
HEXAD_D,-0.0450,0.040,-1.135,0.257,-0.123,0.033
HEXAD_R,-0.0129,0.049,-0.264,0.792,-0.109,0.083

0,1,2,3
Omnibus:,32.876,Durbin-Watson:,2.029
Prob(Omnibus):,0.0,Jarque-Bera (JB):,34.43
Skew:,-0.49,Prob(JB):,3.34e-08
Kurtosis:,2.677,Cond. No.,3.52


## Burstiness (week) - HEXAD

This model examines how students’ HEXAD traits and prior knowledge relate to the burstiness of their weekly activity. It helps identify which traits are associated with more irregular or clustered patterns of engagement over time.

In [100]:
model_name = "burstiness_week_main"
formula = ols_models.get_ols_formula_by_name(model_name)
model = ols_models.fit_ols_model(df, formula)

In [101]:
formula

'burstiness_week ~ pre_test + HEXAD_P + HEXAD_S + HEXAD_F + HEXAD_A + HEXAD_D + HEXAD_R'

In [102]:
model.summary()

0,1,2,3
Dep. Variable:,burstiness_week,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.007
Method:,Least Squares,F-statistic:,1.808
Date:,"Fri, 23 May 2025",Prob (F-statistic):,0.0827
Time:,17:10:30,Log-Likelihood:,-1094.8
No. Observations:,776,AIC:,2206.0
Df Residuals:,768,BIC:,2243.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0063,0.036,-0.174,0.862,-0.077,0.064
pre_test,-0.0919,0.036,-2.524,0.012,-0.163,-0.020
HEXAD_P,0.0494,0.053,0.932,0.351,-0.055,0.154
HEXAD_S,0.0584,0.046,1.258,0.209,-0.033,0.150
HEXAD_F,-0.0353,0.051,-0.699,0.485,-0.135,0.064
HEXAD_A,0.0364,0.057,0.645,0.519,-0.075,0.147
HEXAD_D,0.0232,0.041,0.566,0.571,-0.057,0.104
HEXAD_R,-0.0917,0.050,-1.818,0.070,-0.191,0.007

0,1,2,3
Omnibus:,238.03,Durbin-Watson:,1.963
Prob(Omnibus):,0.0,Jarque-Bera (JB):,751.626
Skew:,-1.483,Prob(JB):,6.120000000000001e-164
Kurtosis:,6.801,Cond. No.,3.52
