# ANOVA - Analysis of variance

1. One-way ANOVA
2. Two-way ANOVA
3. ANCOVA - Analysis of covariance

---

## Import Packages
- Visual Python: Data Analysis > Import

In [None]:
# Visual Python: Data Analysis > Import
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
from matplotlib import rcParams
rcParams['font.family'] = 'New Gulim'
rcParams['axes.unicode_minus'] = False

## 1 One-way ANOVA

In [None]:
# Visual Python: Data Analysis > File
df1 = pd.read_csv('./data/09_1_일원분산분석.csv')
df1

In [None]:
# Visual Python: ANOVA
from scipy import stats
def vp_confidence_interval(var, confidence_level=0.95):
    try: sr = pd.Series(var)
    except: return np.nan
    return stats.t.interval(confidence_level, df=sr.count()-1, loc=sr.mean(), scale=sr.std() / np.sqrt(sr.count()) )
def vp_sem(var):
    try: sr = pd.Series(var)
    except: return np.nan
    return sr.std() / np.sqrt(sr.count())

In [None]:
# Visual Python: ANOVA
# One-way ANOVA
vp_df = df1.dropna().copy()

from IPython.display import display, Markdown
_df = pd.DataFrame()
for k, v in  dict(list(vp_df.groupby('교육방법')['판매실적'])).items():
    _df_t = v.reset_index(drop=True)
    _df_t.name = k
    _df = pd.concat([_df, _df_t], axis=1)

# Statistics
display(Markdown('### Statistics'))
display(pd.DataFrame(data={'Count':_df.count(),'Mean':_df.mean(numeric_only=True),'Std. Deviation':_df.std(numeric_only=True),'Min':_df.min(),'Max':_df.max(),
                           'Std. Error Mean':_df.apply(vp_sem),'Confidence interval':0.95,
                           'Lower':_df.apply(vp_confidence_interval).T[0],'Upper':_df.apply(vp_confidence_interval).T[1] }))

# Boxplot
import seaborn as sns
import warnings
with warnings.catch_warnings():
    warnings.simplefilter(action='ignore', category=Warning)
    sns.boxplot(data=_df)
    plt.show()

# Equal Variance test (Levene)
from scipy import stats
_lst = []
_df.apply(lambda x: _lst.append(x.dropna()))
_res = stats.levene(*_lst, center='mean')
display(Markdown('### Equal Variance test (Levene)'))
display(pd.DataFrame(data={'Statistic':_res.statistic,'p-value':_res.pvalue}, index=['Equal Variance test (Levene)']))

# One-way ANOVA
import statsmodels.formula.api as smf
from statsmodels.stats.anova import anova_lm
_model  = smf.ols('판매실적 ~ C(교육방법)', vp_df)
_result = _model.fit()
_dfr = anova_lm(_result)
_dfr.loc['Total','df'] = _dfr['df'].sum()
_dfr.loc['Total','sum_sq'] = _dfr['sum_sq'].sum()
display(Markdown('### One-way ANOVA'))
display(_dfr)

# Post-hoc: Tukey HSD
from statsmodels.sandbox.stats.multicomp import MultiComparison
_res = MultiComparison(vp_df['판매실적'], vp_df['교육방법']).tukeyhsd(alpha=0.05)
display(Markdown('### Post-hoc: Tukey HSD'))
display(_res.summary())

## 2 Two-way ANOVA

In [None]:
# Visual Python: Data Analysis > File
df2 = pd.read_csv('./data/09_2_이원분산분석.csv')
df2

In [None]:
# Visual Python: ANOVA
# Two-way ANOVA
vp_df = df2.dropna().copy()

from IPython.display import display, Markdown
_df = pd.DataFrame()
for k, v in  dict(list(vp_df.groupby(['성별','여행빈도'])['해외여행선호도'])).items():
    _df_t = v.reset_index(drop=True)
    _df_t.name = k
    _df = pd.concat([_df, _df_t], axis=1)
    _df.columns = [[x[0] for x in _df.columns],[x[1] for x in _df.columns]]

# Statistics
display(Markdown('### Statistics'))
display(pd.DataFrame(data={'Count':_df.count(),'Mean':_df.mean(numeric_only=True),'Std. Deviation':_df.std(numeric_only=True),'Min':_df.min(),'Max':_df.max(),
                   'Std. Error Mean':_df.apply(vp_sem),'Confidence interval':0.95,
                   'Lower':_df.apply(vp_confidence_interval).T[0],'Upper':_df.apply(vp_confidence_interval).T[1] }))

# Boxplot
import seaborn as sns
import warnings
with warnings.catch_warnings():
    warnings.simplefilter(action='ignore', category=Warning)
    sns.boxplot(data=_df)
    plt.show()

# Equal Variance test (Levene)
from scipy import stats
_lst = []
_df.apply(lambda x: _lst.append(x.dropna()))
_res = stats.levene(*_lst, center='mean')
display(Markdown('### Equal Variance test (Levene)'))
display(pd.DataFrame(data={'Statistic':_res.statistic,'p-value':_res.pvalue}, index=['Equal Variance test (Levene)']))

# Two-way ANOVA
import statsmodels.formula.api as smf
from statsmodels.stats.anova import anova_lm
_model  = smf.ols('해외여행선호도 ~ C(여행빈도) + C(성별) + C(여행빈도):C(성별)', vp_df)
_result = _model.fit()
_dfr = anova_lm(_result)
_dfr.loc['Total','df'] = _dfr['df'].sum()
_dfr.loc['Total','sum_sq'] = _dfr['sum_sq'].sum()
display(Markdown('### Two-way ANOVA'))
display(_dfr)

# Interaction plot
from statsmodels.graphics.factorplots import interaction_plot
import warnings
with warnings.catch_warnings():
    warnings.simplefilter(action='ignore', category=Warning)
    fig = interaction_plot(x=vp_df['여행빈도'], trace=vp_df['성별'], response=vp_df['해외여행선호도'])
    plt.show()

# Post-hoc: Tukey HSD
from statsmodels.sandbox.stats.multicomp import MultiComparison
_res = MultiComparison(vp_df['해외여행선호도'], vp_df['여행빈도']).tukeyhsd(alpha=0.05)
display(Markdown('### Post-hoc: Tukey HSD'))
display(_res.summary())

## 3 ANCOVA - Analysis of covariance

In [None]:
# Visual Python: Data Analysis > File
df3 = pd.read_csv('./data/09_3_공분산분석.csv')
df3

In [None]:
# Visual Python: ANOVA
# ANCOVA - Analysis of covariance
vp_df = df3.dropna().copy()

from IPython.display import display, Markdown
_df = pd.DataFrame()
for k, v in  dict(list(vp_df.groupby('광고')['제품태도'])).items():
    _df_t = v.reset_index(drop=True)
    _df_t.name = k
    _df = pd.concat([_df, _df_t], axis=1)

# Statistics
display(Markdown('### Statistics'))
display(pd.DataFrame(data={'Count':_df.count(),'Mean':_df.mean(numeric_only=True),'Std. Deviation':_df.std(numeric_only=True),'Min':_df.min(),'Max':_df.max(),
                   'Std. Error Mean':_df.apply(vp_sem),'Confidence interval':0.95,
                   'Lower':_df.apply(vp_confidence_interval).T[0],'Upper':_df.apply(vp_confidence_interval).T[1] }))

# Boxplot
import seaborn as sns
import warnings
with warnings.catch_warnings():
    warnings.simplefilter(action='ignore', category=Warning)
    sns.boxplot(data=_df)
    plt.show()

# ANCOVA - Analysis of covariance
import pingouin as pg
display(Markdown('### ANCOVA - Analysis of covariance'))
display(pg.ancova(data=vp_df, dv='제품태도', between='광고', covar='사전태도'))

---

In [None]:
# End of file