In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

### Air Traffic Controller Stress Test Example

### Testing using Completely Randomized Design (CRD)

In [2]:
data = pd.read_excel('Week-6-Files/RBD.xlsx')
data

Unnamed: 0,System A,System B,System C
0,15,15,18
1,14,14,14
2,10,11,15
3,13,12,17
4,16,13,16
5,13,13,13


In [5]:
data_new= pd.melt(data.reset_index(),id_vars='index', value_vars = ['System A','System B','System C'])
data_new

Unnamed: 0,index,variable,value
0,0,System A,15
1,1,System A,14
2,2,System A,10
3,3,System A,13
4,4,System A,16
5,5,System A,13
6,0,System B,15
7,1,System B,14
8,2,System B,11
9,3,System B,12


In [6]:
model = ols('value ~ C(variable)',data = data_new).fit()
model.summary()



0,1,2,3
Dep. Variable:,value,R-squared:,0.3
Model:,OLS,Adj. R-squared:,0.207
Method:,Least Squares,F-statistic:,3.214
Date:,"Mon, 21 Mar 2022",Prob (F-statistic):,0.0689
Time:,14:29:02,Log-Likelihood:,-34.554
No. Observations:,18,AIC:,75.11
Df Residuals:,15,BIC:,77.78
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,13.5000,0.738,18.296,0.000,11.927,15.073
C(variable)[T.System B],-0.5000,1.043,-0.479,0.639,-2.724,1.724
C(variable)[T.System C],2.0000,1.043,1.917,0.075,-0.224,4.224

0,1,2,3
Omnibus:,0.471,Durbin-Watson:,2.031
Prob(Omnibus):,0.79,Jarque-Bera (JB):,0.558
Skew:,-0.297,Prob(JB):,0.757
Kurtosis:,2.375,Cond. No.,3.73


In [7]:
anova_table = sm.stats.anova_lm(model,typ = 1)
anova_table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(variable),2.0,21.0,10.5,3.214286,0.068903
Residual,15.0,49.0,3.266667,,


At alpha = 0.05, the table above tells us that the null hypothesis will be rejected i.e. the new system designs have **NO SIGNIFICANT EFFECT on Stress Levels of the Air Traffic Controllers**

### Testing using Randomized Block Design (RBD)

In [8]:
data = pd.read_excel('Week-6-Files/RBD.xlsx')
data

Unnamed: 0,System A,System B,System C
0,15,15,18
1,14,14,14
2,10,11,15
3,13,12,17
4,16,13,16
5,13,13,13


In [10]:
data_new = pd.melt(data.reset_index(),id_vars='index',value_vars=['System A','System B','System C'])
data_new.columns = ['blocks','variable','value']
data_new

Unnamed: 0,blocks,variable,value
0,0,System A,15
1,1,System A,14
2,2,System A,10
3,3,System A,13
4,4,System A,16
5,5,System A,13
6,0,System B,15
7,1,System B,14
8,2,System B,11
9,3,System B,12


In [12]:
model = ols('value ~ C(variable) + C(blocks)',data=data_new).fit()
anova_table = sm.stats.anova_lm(model,typ = 1)
anova_table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(variable),2.0,21.0,10.5,5.526316,0.024181
C(blocks),5.0,30.0,6.0,3.157895,0.057399
Residual,10.0,19.0,1.9,,


Note: C(blocks) is **always used in RBD.**

#### RBD Example 2

In [13]:
data = pd.read_excel('Week-6-Files/RBD2.xlsx')
data

Unnamed: 0,chem1,chem2,chem3,chem4
0,1.3,2.2,1.8,3.9
1,1.6,2.4,1.7,4.4
2,0.5,0.4,0.6,2.0
3,1.2,2.0,1.5,4.1
4,1.1,1.8,1.3,3.4


In [16]:
data_new = pd.melt(data.reset_index(),id_vars='index',value_vars=['chem1','chem2','chem3','chem4'])
data_new.columns = ['blocks','variable','value']
data_new

Unnamed: 0,blocks,variable,value
0,0,chem1,1.3
1,1,chem1,1.6
2,2,chem1,0.5
3,3,chem1,1.2
4,4,chem1,1.1
5,0,chem2,2.2
6,1,chem2,2.4
7,2,chem2,0.4
8,3,chem2,2.0
9,4,chem2,1.8


In [19]:
model = ols('value ~ C(variable) + C(blocks)',data_new).fit()
anova_table = sm.stats.anova_lm(model,typ = 1)
anova_table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(variable),3.0,18.044,6.014667,75.894848,4.51831e-08
C(blocks),4.0,6.693,1.67325,21.113565,2.318913e-05
Residual,12.0,0.951,0.07925,,


At alpha = 0.01, the Null Hypothesis is convincingly rejected i.e. the 4 chemicals have significantly different effects on the strength of the fabric.