In [1]:
from scipy.stats import f_oneway
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [2]:
# Teaching Methodology Example - One Way ANOVA
# Independent variable - Student Performance
# Dependent Variable - Teaching Methodology
a = [4,3,2]
b = [2,4,6]
c = [2,1,3]
# Returns the f_statistic and p_value for F-test
f_oneway(a,b,c)

F_onewayResult(statistic=1.5, pvalue=0.2962962962962962)

In [15]:
teaching_data = pd.read_excel('Week-5-Files/Teaching_methodology.xlsx')
teaching_data.columns = ['Black Board','Case Presentation','PPT']
teaching_data

Unnamed: 0,Black Board,Case Presentation,PPT
0,4,2,2
1,3,4,1
2,2,6,3


In [17]:
teaching_data.reset_index()

Unnamed: 0,index,Black Board,Case Presentation,PPT
0,0,4,2,2
1,1,3,4,1
2,2,2,6,3


In [19]:
# melt() - https://www.geeksforgeeks.org/python-pandas-melt/
data_new = pd.melt(teaching_data.reset_index(),id_vars = ['index'],value_vars = ['Black Board','Case Presentation','PPT'])
data_new

Unnamed: 0,index,variable,value
0,0,Black Board,4
1,1,Black Board,3
2,2,Black Board,2
3,0,Case Presentation,2
4,1,Case Presentation,4
5,2,Case Presentation,6
6,0,PPT,2
7,1,PPT,1
8,2,PPT,3


In [25]:
# OLS - https://www.geeksforgeeks.org/ordinary-least-squares-ols-using-statsmodels/
# Use OLS regression model
model = ols('value ~ C(variable)', data = data_new)
model

<statsmodels.regression.linear_model.OLS at 0x1d724d2adc0>

In [30]:
# Fit the regression line to the data
model = model.fit()

In [31]:
anova_table = sm.stats.anova_lm(model,typ = 1)
anova_table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(variable),2.0,6.0,3.0,1.5,0.296296
Residual,6.0,12.0,2.0,,


## Two-way ANOVA

In [2]:
cat_data = pd.read_excel('Week-6-Files/two-way-anova.xlsx')
cat_data

Unnamed: 0,Value,prep_pro,college
0,500,three_hr,Business
1,580,three_hr,Business
2,540,three_hr,Engineering
3,460,three_hr,Engineering
4,480,three_hr,Artsandscience
5,400,three_hr,Artsandscience
6,460,One-day,Business
7,540,One-day,Business
8,560,One-day,Engineering
9,620,One-day,Engineering


In [4]:
model = ols('Value ~ C(prep_pro) + C(college) + C(college):C(prep_pro)', data = cat_data).fit()
model

<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1ba80094c10>

In [5]:
anova_table = sm.stats.anova_lm(model, typ = 2)
anova_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(prep_pro),6100.0,2.0,1.382872,0.299436
C(college),45300.0,2.0,10.269521,0.004757
C(college):C(prep_pro),11200.0,4.0,1.269521,0.350328
Residual,19850.0,9.0,,


Conclusions:

1. At alpha = 0.05, the null hypothesis is accepted i.e. type of preparation program **has no effect** on CAT scores.
2. At alpha = 0.05, the null hypothesis is rejected i.e. undergraduate background **affects** CAT scores.
3. At alpha = 0.05, the null hypothesis is accepted i.e. undergraduate background has no relation with type of preparation program.

### Week 6 Assignment Question

### CRD

In [5]:
data = pd.DataFrame({'Schedule 1':[75,74,70,73,76,73],'Schedule 2':[76,74,71,72,73,73],'Schedule 3':[78,74,75,77,76,73]})
data

Unnamed: 0,Schedule 1,Schedule 2,Schedule 3
0,75,76,78
1,74,74,74
2,70,71,75
3,73,72,77
4,76,73,76
5,73,73,73


In [6]:
data_copy = pd.melt(data.reset_index(),id_vars = ['index'],value_vars=['Schedule 1','Schedule 2','Schedule 3'])
data_copy

Unnamed: 0,index,variable,value
0,0,Schedule 1,75
1,1,Schedule 1,74
2,2,Schedule 1,70
3,3,Schedule 1,73
4,4,Schedule 1,76
5,5,Schedule 1,73
6,0,Schedule 2,76
7,1,Schedule 2,74
8,2,Schedule 2,71
9,3,Schedule 2,72


In [7]:
model = ols('value ~ C(variable)', data = data_copy).fit()
model.summary()



0,1,2,3
Dep. Variable:,value,R-squared:,0.262
Model:,OLS,Adj. R-squared:,0.164
Method:,Least Squares,F-statistic:,2.663
Date:,"Wed, 06 Apr 2022",Prob (F-statistic):,0.102
Time:,00:06:27,Log-Likelihood:,-35.401
No. Observations:,18,AIC:,76.8
Df Residuals:,15,BIC:,79.47
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,73.5000,0.773,95.035,0.000,71.852,75.148
C(variable)[T.Schedule 2],-0.3333,1.094,-0.305,0.765,-2.665,1.998
C(variable)[T.Schedule 3],2.0000,1.094,1.829,0.087,-0.331,4.331

0,1,2,3
Omnibus:,0.223,Durbin-Watson:,2.01
Prob(Omnibus):,0.894,Jarque-Bera (JB):,0.413
Skew:,-0.134,Prob(JB):,0.813
Kurtosis:,2.308,Cond. No.,3.73


In [8]:
anova_table = sm.stats.anova_lm(model,type = 1)
anova_table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(variable),2.0,19.111111,9.555556,2.662539,0.102434
Residual,15.0,53.833333,3.588889,,


**Null hypothesis is accepted i.e. mu1 = mu2 = mu3**

### RBD

In [9]:
data_copy = pd.melt(data.reset_index(),id_vars = ['index'],value_vars=['Schedule 1','Schedule 2','Schedule 3'])
data_copy.columns = ['blocks','variable','value']
data_copy

Unnamed: 0,blocks,variable,value
0,0,Schedule 1,75
1,1,Schedule 1,74
2,2,Schedule 1,70
3,3,Schedule 1,73
4,4,Schedule 1,76
5,5,Schedule 1,73
6,0,Schedule 2,76
7,1,Schedule 2,74
8,2,Schedule 2,71
9,3,Schedule 2,72


In [10]:
model = ols('value ~ C(variable) + C(blocks)',data = data_copy).fit()
model.summary()



0,1,2,3
Dep. Variable:,value,R-squared:,0.732
Model:,OLS,Adj. R-squared:,0.544
Method:,Least Squares,F-statistic:,3.9
Date:,"Wed, 06 Apr 2022",Prob (F-statistic):,0.026
Time:,00:18:39,Log-Likelihood:,-26.287
No. Observations:,18,AIC:,68.57
Df Residuals:,10,BIC:,75.7
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,75.7778,0.932,81.283,0.000,73.701,77.855
C(variable)[T.Schedule 2],-0.3333,0.807,-0.413,0.688,-2.132,1.466
C(variable)[T.Schedule 3],2.0000,0.807,2.477,0.033,0.201,3.799
C(blocks)[T.1],-2.3333,1.142,-2.044,0.068,-4.877,0.211
C(blocks)[T.2],-4.3333,1.142,-3.795,0.004,-6.877,-1.789
C(blocks)[T.3],-2.3333,1.142,-2.044,0.068,-4.877,0.211
C(blocks)[T.4],-1.3333,1.142,-1.168,0.270,-3.877,1.211
C(blocks)[T.5],-3.3333,1.142,-2.919,0.015,-5.877,-0.789

0,1,2,3
Omnibus:,3.755,Durbin-Watson:,1.795
Prob(Omnibus):,0.153,Jarque-Bera (JB):,1.315
Skew:,0.039,Prob(JB):,0.518
Kurtosis:,1.678,Cond. No.,7.57


In [12]:
anova_table = sm.stats.anova_lm(model,typ = 1)
anova_table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(variable),2.0,19.111111,9.555556,4.886364,0.033088
C(blocks),5.0,34.277778,6.855556,3.505682,0.04329
Residual,10.0,19.555556,1.955556,,


**Null hypothesis is rejected i.e. mu1 != mu2 != mu3**