In [None]:
import pandas as pd
from scipy.stats import pearsonr

data =  pd.read_csv('https://raw.githubusercontent.com/celstark/n247data/main/data/StarkData.csv',header=0)
data['Recog'] = data['pO_R'] - data['pO_F']
data['LDI'] = data['pS_L'] - data['pS_F']

    
print('Age vs. LDI {0:.2f}'.format(data['LDI'].corr(data['Age'])))
(r,p)=pearsonr(data['Age'],data['LDI'])
print(f'  r={r:.2f} p={p:.4f} r^2={r ** 2:.3f}')

Age vs. LDI -0.49
  r=-0.49 p=0.0000 r^2=0.243


In [3]:
import statsmodels.api as sm

print('Standard statsmodels setup')
smLM_endog=data['LDI']  # Endognenous or y-value
smLM_exog=data['Age']  # Exogneous, or x-values
smLM_exog=sm.add_constant(smLM_exog, prepend=False)  # Add our constant / intercept / offset term
smLM_model = sm.OLS(smLM_endog, smLM_exog)
smLM_results = smLM_model.fit()
print(smLM_results.summary())

Standard statsmodels setup
                            OLS Regression Results                            
Dep. Variable:                    LDI   R-squared:                       0.243
Model:                            OLS   Adj. R-squared:                  0.235
Method:                 Least Squares   F-statistic:                     30.78
Date:                Tue, 20 May 2025   Prob (F-statistic):           2.55e-07
Time:                        09:25:01   Log-Likelihood:                 31.244
No. Observations:                  98   AIC:                            -58.49
Df Residuals:                      96   BIC:                            -53.32
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Age           -0.0050    

In [4]:
import statsmodels.formula.api as smf

print('R-style statsmodels setup')
smfLM_model = smf.ols(formula='LDI ~ Age',data=data)
smfLM_results = smfLM_model.fit()
print(smfLM_results.summary())

R-style statsmodels setup
                            OLS Regression Results                            
Dep. Variable:                    LDI   R-squared:                       0.243
Model:                            OLS   Adj. R-squared:                  0.235
Method:                 Least Squares   F-statistic:                     30.78
Date:                Tue, 20 May 2025   Prob (F-statistic):           2.55e-07
Time:                        09:25:01   Log-Likelihood:                 31.244
No. Observations:                  98   AIC:                            -58.49
Df Residuals:                      96   BIC:                            -53.32
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.4801     

In [6]:
print('Fancier model')
smfLM_model2 = smf.ols(formula='LDI ~ Age*Recog + C(Set)',data=data)
smfLM_results2 = smfLM_model2.fit()
print(smfLM_results2.summary())

Fancier model
                            OLS Regression Results                            
Dep. Variable:                    LDI   R-squared:                       0.267
Model:                            OLS   Adj. R-squared:                  0.236
Method:                 Least Squares   F-statistic:                     8.476
Date:                Tue, 20 May 2025   Prob (F-statistic):           7.09e-06
Time:                        09:25:43   Log-Likelihood:                 32.847
No. Observations:                  98   AIC:                            -55.69
Df Residuals:                      93   BIC:                            -42.77
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept       0.1370      0.423   