# ANOVA Example #
##### https://pythonfordatascience.org/anova-python/

In [4]:
import pandas as pd
import scipy.stats as stats
import researchpy as rp
import statsmodels.api as sm
from statsmodels.formula.api import ols
    
import matplotlib.pyplot as plt

In [5]:
# Loading data
df = pd.read_csv("https://raw.githubusercontent.com/Opensourcefordatascience/Data-sets/master/difficile.csv")
df.drop('person', axis= 1, inplace= True)

# Recoding value from numeric to string
df['dose'].replace({1: 'placebo', 2: 'low', 3: 'high'}, inplace= True)
    
# Gettin summary statistics
rp.summary_cont(df['libido'])





Unnamed: 0,Variable,N,Mean,SD,SE,95% Conf.,Interval
0,libido,15.0,3.466667,1.76743,0.456349,2.487896,4.445437


In [6]:
rp.summary_cont(df['libido'].groupby(df['dose']))





Unnamed: 0_level_0,N,Mean,SD,SE,95% Conf.,Interval
dose,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
high,5,5.0,1.581139,0.707107,3.450484,6.549516
low,5,3.2,1.30384,0.583095,1.922236,4.477764
placebo,5,2.2,1.30384,0.583095,0.922236,3.477764


#### Approach 1 | scipy.stats

In [7]:
stats.f_oneway(df['libido'][df['dose'] == 'high'], 
             df['libido'][df['dose'] == 'low'],
             df['libido'][df['dose'] == 'placebo'])

F_onewayResult(statistic=5.11864406779661, pvalue=0.024694289538222603)

#### Approach 2 | statsmodels.api

In [8]:
results = ols('libido ~ C(dose)', data=df).fit()
results.summary()

  "anyway, n=%i" % int(n))


0,1,2,3
Dep. Variable:,libido,R-squared:,0.46
Model:,OLS,Adj. R-squared:,0.37
Method:,Least Squares,F-statistic:,5.119
Date:,"Mon, 14 Jan 2019",Prob (F-statistic):,0.0247
Time:,16:02:30,Log-Likelihood:,-24.683
No. Observations:,15,AIC:,55.37
Df Residuals:,12,BIC:,57.49
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,5.0000,0.627,7.972,0.000,3.634,6.366
C(dose)[T.low],-1.8000,0.887,-2.029,0.065,-3.732,0.132
C(dose)[T.placebo],-2.8000,0.887,-3.157,0.008,-4.732,-0.868

0,1,2,3
Omnibus:,2.517,Durbin-Watson:,2.408
Prob(Omnibus):,0.284,Jarque-Bera (JB):,1.108
Skew:,0.195,Prob(JB):,0.575
Kurtosis:,1.727,Cond. No.,3.73


In [9]:
aov_table = sm.stats.anova_lm(results, typ=2)
aov_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(dose),20.133333,2.0,5.118644,0.024694
Residual,23.6,12.0,,


In [10]:
def anova_table(aov):
    aov['mean_sq'] = aov[:]['sum_sq']/aov[:]['df']
    
    aov['eta_sq'] = aov[:-1]['sum_sq']/sum(aov['sum_sq'])
    
    aov['omega_sq'] = (aov[:-1]['sum_sq']-(aov[:-1]['df']*aov['mean_sq'][-1]))/(sum(aov['sum_sq'])+aov['mean_sq'][-1])
    
    cols = ['sum_sq', 'df', 'mean_sq', 'F', 'PR(>F)', 'eta_sq', 'omega_sq']
    aov = aov[cols]
    return aov

anova_table(aov_table)

Unnamed: 0,sum_sq,df,mean_sq,F,PR(>F),eta_sq,omega_sq
C(dose),20.133333,2.0,10.066667,5.118644,0.024694,0.460366,0.354486
Residual,23.6,12.0,1.966667,,,,
