In [2]:
# imports
import statsmodels.api as sm
import pandas as pd 
import numpy as np
import warnings
warnings.simplefilter('ignore') # ignore warnings

from statsmodels.formula.api import ols
from statsmodels.graphics.api import interaction_plot, abline_plot
from statsmodels.stats.anova import anova_lm

In [3]:
jobtest = pd.read_csv('../data/raw/jobtest.csv')
jobtest.head(5)

Unnamed: 0,TEST,MINORITY,JPERF
0,0.28,1,1.83
1,0.97,1,4.59
2,1.25,1,2.97
3,2.46,1,8.14
4,2.51,1,8.0


In [4]:
# ols minority status impact on job performance
ols_minority = ols('JPERF ~ C(MINORITY)',data=jobtest).fit()
print(ols_minority.summary())

                            OLS Regression Results                            
Dep. Variable:                  JPERF   R-squared:                       0.085
Model:                            OLS   Adj. R-squared:                  0.035
Method:                 Least Squares   F-statistic:                     1.681
Date:                Thu, 25 Jun 2020   Prob (F-statistic):              0.211
Time:                        16:02:02   Log-Likelihood:                -42.993
No. Observations:                  20   AIC:                             89.99
Df Residuals:                      18   BIC:                             91.98
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept            3.8740      0.692  

In [21]:
# ols test impact on job performance
ols_test = ols('JPERF ~ TEST',data=jobtest).fit()
print(ols_test.summary())

                            OLS Regression Results                            
Dep. Variable:                  JPERF   R-squared:                       0.517
Model:                            OLS   Adj. R-squared:                  0.490
Method:                 Least Squares   F-statistic:                     19.25
Date:                Tue, 23 Jun 2020   Prob (F-statistic):           0.000356
Time:                        18:28:05   Log-Likelihood:                -36.614
No. Observations:                  20   AIC:                             77.23
Df Residuals:                      18   BIC:                             79.22
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.0350      0.868      1.192      0.2

In [11]:
# anova on perf ~ minority model
anova_minority = anova_lm(ols_minority)
anova_minority

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(MINORITY),1.0,8.051805,8.051805,1.680582,0.211222
Residual,18.0,86.23945,4.791081,,


In [12]:
# anova on perf ~ test model
anova_test = anova_lm(ols_test)
anova_test

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
TEST,1.0,48.722958,48.722958,19.246127,0.000356
Residual,18.0,45.568297,2.531572,,


In [14]:
# ols test and minority impact on job performance
ols_testminority = ols('JPERF ~ TEST + MINORITY',data=jobtest).fit()
print(ols_testminority.summary())

                            OLS Regression Results                            
Dep. Variable:                  JPERF   R-squared:                       0.572
Model:                            OLS   Adj. R-squared:                  0.522
Method:                 Least Squares   F-statistic:                     11.38
Date:                Tue, 23 Jun 2020   Prob (F-statistic):           0.000731
Time:                        18:19:40   Log-Likelihood:                -35.390
No. Observations:                  20   AIC:                             76.78
Df Residuals:                      17   BIC:                             79.77
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.6120      0.887      0.690      0.5

In [6]:
# ols test, minority, and interaction between test and minority (minority status may affect test) impact on job performance
ols_testminority_interaction = ols('JPERF ~ TEST + MINORITY + TEST:MINORITY',data=jobtest).fit()

# can create the same formula via
ols_testminority_interaction_v2 = ols('JPERF ~ TEST*MINORITY',data=jobtest).fit()

# they are equivalent
print(ols_testminority_interaction.summary())
print(ols_testminority_interaction_v2.summary())

                            OLS Regression Results                            
Dep. Variable:                  JPERF   R-squared:                       0.664
Model:                            OLS   Adj. R-squared:                  0.601
Method:                 Least Squares   F-statistic:                     10.55
Date:                Thu, 25 Jun 2020   Prob (F-statistic):           0.000451
Time:                        16:07:01   Log-Likelihood:                -32.971
No. Observations:                  20   AIC:                             73.94
Df Residuals:                      16   BIC:                             77.92
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept         2.0103      1.050      1.914

In [18]:
# anova on perf ~ test + minority model
anova_lm(ols_testminority)

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
TEST,1.0,48.722958,48.722958,20.542126,0.000295
MINORITY,1.0,5.246751,5.246751,2.212087,0.155246
Residual,17.0,40.321546,2.371856,,


In [19]:
# anova on perf ~ test * minority model
anova_lm(ols_testminority_interaction)

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
TEST,1.0,48.722958,48.722958,24.626621,0.000141
MINORITY,1.0,5.246751,5.246751,2.651927,0.122952
TEST:MINORITY,1.0,8.666073,8.666073,4.380196,0.05265
Residual,16.0,31.655473,1.978467,,
