## 기본 데이터, 라이브러리 로드

In [None]:
import numpy as np
import pandas as pd

df_x = pd.read_csv('data/x_train.csv', encoding='euc-kr')
df_y = pd.read_csv('data/y_train.csv', encoding='euc-kr')
df = pd.merge(df_x, df_y, on='cust_id')
df.fillna(0, inplace=True)

In [None]:
man = df.loc[df['gender']==1, '내점일수']
woman = df.loc[df['gender']==0, '내점일수']

## 1. T 검정

- 단일 표본

In [None]:
from scipy.stats import ttest_1samp
ttest_1samp(woman, popmean=10)

- 독립표본

In [None]:
from scipy.stats import ttest_ind
ttest_ind(man, woman, equal_var=True)

- 대응표본

In [None]:
from scipy.stats import ttest_rel
ttest_rel(man, woman)

## 2. 분산분석(ANOVA)

### 1) 일원분산분석(One-way ANOVA)
종속변수 1개, 독립변수 1개

- scipy.stats 이용

In [None]:
import scipy.stats as stats

F_statistic, pVal = stats.f_oneway(group1, group2)

print('F={0:.1f}, p={1:.5f}'.format(F_statistic, pVal))

- statsmodel 이용

In [None]:
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm

model = ols('내점일수 ~ C(gender)', df).fit()
print(anova_lm(model))

In [None]:
- 사후검정

In [None]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd
print(pairwise_tukeyhsd(df['내점일수'], df['gender']))