# Import libraries

In [None]:
import os
import pandas as pd
import matplotlib

import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats 
import scikit_posthocs as sp
import statsmodels.api as sm
import pylab

In [None]:
from scipy.stats import kruskal
from scipy.stats import mannwhitneyu
from scipy.stats import ttest_ind

# Functions

In [None]:
style_param = {'legend.fontsize': '20',
         'xtick.direction' : 'in',  
         'ytick.direction' : 'in', 
         'xtick.top' : True, 
         'figure.figsize': (10,6),
         'axes.labelsize': '26',
         'axes.titlesize':'26',
         'xtick.labelsize':'20',
         'ytick.labelsize':'20',
         'xtick.major.pad':'16',
         'ytick.major.pad':'16'}
    
pylab.rcParams.update(style_param)

In [None]:
def distrib_q (df):
    fig,ax = plt.subplots(figsize = (10,6))

    mean=df.q.mean()
    df.q.hist(bins=(200), density=True, color='gray')
    ax.axvline(x=0.434, label='Wedenberg q =0.434'.format(mean),c='k',ls='--',lw=1.2)
    ax.set_xlim(0,1)
    ax.set_xlim(0,0.6)
    
    ax.set_xlabel(r'$q [Gy\times \mu m \times keV^{-1}]$')
    ax.set_ylabel("Rozkład gęstości \nprawdopodobieństwa [-]\n")
    ax.legend()
    
    ax.grid()
    ax.minorticks_on()
    ax.grid(which='minor', linestyle=':', linewidth='0.2', color='k')
    print ("mean: ",df.q.mean(), "\nstd: ",df.q.std(), "\nmedian: ",df.q.median(),"\nquantile 2.5%: ", df.q.quantile(0.025),"\nquantile 97.5%: ", df.q.quantile(0.975))

# Read data

In [None]:
open_fname = os.path.join('tmp','distrib_q.h5')
df1 = pd.read_hdf(open_fname, 'data_1')
df2= pd.read_hdf(open_fname, 'data_2')
df3= pd.read_hdf(open_fname, 'data_3')
df5= pd.read_hdf(open_fname, 'data_5')
df4= pd.read_hdf(open_fname, 'data_4')
df6= pd.read_hdf(open_fname, 'data_6')

In [None]:
distrib_q(df1)

In [None]:
distrib_q(df3)

In [None]:
sm.qqplot(df3.q.values, line='45',color='k')
pylab.show()

In [None]:
res = mannwhitneyu(df1.q.values,df2.q.values)
print(res)

In [None]:
res = ttest_ind(df1.q.values,df2.q.values,equal_var=False)
print(res)

In [None]:
print(np.median(df1.q.values),np.median(df2.q.values),np.std(df1.q.values),np.std(df2.q.values))

In [None]:
wyniki = []
ids = []
values = []
i=0

for data in [df1,df2,df3,df4,df5,df6]:
    i=i+1
    wyniki.append(data.q.values)

    ids += list(np.ones(data.q.values.shape[0],dtype=np.uint8)*i)
    values += list(data.q.values)
    
for w in wyniki:
    print(np.median(w),np.std(w))


# ANOVA post-hoc

https://scikit-posthocs.readthedocs.io/en/latest/tutorial/

If normality and other assumptions are violated, one can use a non-parametric Kruskal-Wallis H test (one-way non-parametric ANOVA) to test if samples came from the same distribution.

In [None]:
H,p = kruskal(*wyniki)
print(H,p)

Hipoteza 0 odrzucona, więc robimy Conovera post-hoc

P value tells us we may reject the null hypothesis that the population medians of all of the groups are equal. To learn what groups (species) differ in their medians we need to run post hoc tests.

Podobnie jak w przypadku analizy wariancji, istotny statystycznie wynik testu Kruskala-Wallisa mówi nam tylko o tym, że co najmniej jedna z grup różni się od innej grupy. 

To learn what groups (species) differ in their medians we need to run post hoc tests. scikit-posthocs provides a lot of non-parametric tests mentioned above. Let’s choose Conover’s test.

In [None]:
mydf = pd.DataFrame({'Val':values,'Id':ids})

sp.posthoc_conover(mydf, val_col='Val', group_col='Id', p_adjust = 'holm')

Pairwise comparisons show that we may reject the null hypothesis (p < 0.01) for each pair of species and conclude that all groups (species) differ in their sepal widths.

In [None]:
pc = sp.posthoc_conover(mydf, val_col='Val', group_col='Id')
heatmap_args = {'linewidths': 0.25, 'linecolor': '0.5', 'clip_on': False, 'square': True, 'cbar_ax_bbox': [0.80, 0.35, 0.04, 0.3]}
sp.sign_plot(pc, **heatmap_args)

In [None]:
fig = plt.figure(figsize =(10, 7))

ax = fig.add_axes([0, 0, 1, 1])

data = [df1.q.values,df2.q.values,df3.q.values,df4.q.values,df5.q.values,df6.q.values]

bp = ax.boxplot(data,showfliers=False)

ax.set_ylabel(r'$q\; [Gy\cdot \mu m \cdot keV^{-1}]$'+'\n')
ax.set_xlabel('\nNumer próbki')

ax.grid()
ax.minorticks_on()
ax.grid(which='minor', linestyle=':', linewidth='0.2', color='k')

In [None]:
tmp=pd.DataFrame(list(zip(data)),
                          columns=['q'])
tmp.to_csv("results/figure_5_8")
fig.savefig(fname="results/figure_5_8",dpi= 700)