In [13]:
import numpy as np
from scipy import stats
import pandas as pd
import scikit_posthocs as sp

# Statistical analysis of nitrogen removal

First we import all relevant libraries. Then import our nitrogen removal data (Nitrogen_removal.xlsx) the dataframe is displayed below. 

Since we have multiple groups with one independent variable (temperature) and want to compare them we do an one-way ANOVA like statistical analysis. Therefore before we do so we need to check if we violate the assumptions for doing the one-way ANOVA analysis. Assumptions are normality and equal variance. The analysis for determining if these assumptions hold are Shapiro-wilks and Levene's test respectivly. 

In [14]:
df1 = pd.read_excel('Nitrogen_removal.xlsx')
display(df1) #Making dataframe from excel file

Unnamed: 0,SBWW_10,SBWW_15,SBWW_20,SBWW_25
0,66.16,76.56,68.78,62.24
1,66.98,74.94,71.12,65.66
2,63.54,80.978,66.56,71.22


In [15]:
SBWW_10 = df1['SBWW_10'].values.tolist() #Putting each column from dataframe into lists
SBWW_15 = df1['SBWW_15'].values.tolist() #Putting each column from dataframe into lists
SBWW_20 = df1['SBWW_20'].values.tolist() #Putting each column from dataframe into lists
SBWW_25 = df1['SBWW_25'].values.tolist() #Putting each column from dataframe into lists
data = [SBWW_10, SBWW_15, SBWW_20, SBWW_25]

In [16]:
print(data)

[[66.16, 66.98, 63.54], [76.56, 74.94, 80.97800000000001], [68.78, 71.12, 66.56], [62.24, 65.66, 71.22]]


In [17]:
stats.levene(SBWW_10, SBWW_15, SBWW_20, SBWW_25, center='median', proportiontocut=0.05)

LeveneResult(statistic=0.4681723586141781, pvalue=0.7126236139315524)

In [18]:
print(stats.shapiro(SBWW_10),
stats.shapiro(SBWW_15),
stats.shapiro(SBWW_20),
stats.shapiro(SBWW_25))

ShapiroResult(statistic=0.9163665771484375, pvalue=0.439674437046051) ShapiroResult(statistic=0.9332016110420227, pvalue=0.5007210969924927) ShapiroResult(statistic=0.9997692704200745, pvalue=0.9709845185279846) ShapiroResult(statistic=0.981421709060669, pvalue=0.7388676404953003)


# Normality and equal variance

Since in both the Shapiro-Wilks and Levene's test we do not have any significant difference there is evidence for normality and equal variance. therefore we proceed with a one-way ANOVA test.

In [19]:
stats.f_oneway(SBWW_10, SBWW_15, SBWW_20, SBWW_25)

F_onewayResult(statistic=9.255317153517922, pvalue=0.005580374910646481)

# Since p<0.05 we have evidence for significant difference between nitrogen removal

After doing our one-way ANOVA we can see that with a p-value of approx. 0.005 there is evidence for not accepting $H_0$ - being that the nitrogen removal at temperatures 10, 15, 20, and 25 are not significantly different from each other. So we proceed with a Post Hoc test.

In [20]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd

df2 = pd.DataFrame({'score': [66.16, 66.98, 63.54,
                                76.560, 74.940, 80.978,
                               68.78, 71.12, 66.56,
                               62.24, 65.66, 71.22],
               'group': np.repeat(['SBWW_10', 'SBWW_15', 'SBWW_20', 'SBWW_25'], repeats=3)})
    

tukey = pairwise_tukeyhsd(endog=df2['score'],
                          groups=df2['group'],
                          alpha=0.05)

print(tukey)

  Multiple Comparison of Means - Tukey HSD, FWER=0.05  
 group1  group2 meandiff p-adj   lower    upper  reject
-------------------------------------------------------
SBWW_10 SBWW_15  11.9327 0.0067    3.795 20.0703   True
SBWW_10 SBWW_20     3.26 0.5914  -4.8776 11.3976  False
SBWW_10 SBWW_25   0.8133    0.9  -7.3243   8.951  False
SBWW_15 SBWW_20  -8.6727 0.0372 -16.8103  -0.535   True
SBWW_15 SBWW_25 -11.1193 0.0101  -19.257 -2.9817   True
SBWW_20 SBWW_25  -2.4467 0.7551 -10.5843   5.691  False
-------------------------------------------------------
