In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
%matplotlib inline
import numpy as np
import scipy as sp
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import IPython as ip
mpl.style.use('ggplot')
mpl.rc('font', family='Noto Sans CJK TC')
ip.display.set_matplotlib_formats('svg')

In [3]:
np.random.seed(20180701)

## Welch’s T-Test

In [4]:
group_ctl = pd.Series(name='height', data=sp.stats.norm.rvs(loc=170, scale=5, size=100))
group_exp = pd.Series(name='height', data=sp.stats.norm.rvs(loc=170, scale=5, size=3))  # same with lesser sample size

In [5]:
sp.stats.ttest_ind(group_ctl, group_exp)

Ttest_indResult(statistic=-1.4825003632205942, pvalue=0.1413207618623851)

In [6]:
sp.stats.ttest_ind(group_ctl, group_exp, equal_var=False)  # === Welch’s t-test

Ttest_indResult(statistic=-1.474079944377262, pvalue=0.2716372989009761)

Welch’s t-test is better to resist unequal variances and unequal sample sizes.

## Paired Student's t-test

In [7]:
group_ctl = pd.Series(name='height', data=sp.stats.norm.rvs(loc=170, scale=5, size=100))
group_exp = group_ctl + sp.stats.norm.rvs(loc=1, scale=1, size=100)  # different

In [8]:
sp.stats.ttest_ind(group_ctl, group_exp)

Ttest_indResult(statistic=-1.4784029362607924, pvalue=0.14088899541937028)

In [9]:
sp.stats.ttest_rel(group_ctl, group_exp)

Ttest_relResult(statistic=-9.62612703334583, pvalue=7.157064207762357e-16)

The independent Student's t-test is even wrong.

## Mann–Whitney U test

In [10]:
group_ctl = [22, 33, 44, 55, 66, 77]
group_exp = [22, 33, 44, 55, 66, 7700]  # same with an outliner

In [11]:
sp.stats.ttest_ind(group_ctl, group_exp)

Ttest_indResult(statistic=-0.9956557420895134, pvalue=0.3428989847457683)

In [12]:
sp.stats.mannwhitneyu(group_ctl, group_exp)

MannwhitneyuResult(statistic=17.5, pvalue=0.5)

Mann–Whitney U test is better to resist outliners.

## Paired Wilcoxon signed-rank test

In [13]:
group_ctl = np.array([22, 33, 44, 55, 66, 77])
group_exp = group_ctl + sp.stats.norm.rvs(loc=1, scale=1, size=6)  # different

In [14]:
sp.stats.mannwhitneyu(group_ctl, group_exp)

MannwhitneyuResult(statistic=16.0, pvalue=0.40509061820523684)

In [15]:
sp.stats.wilcoxon(group_ctl, group_exp)

WilcoxonResult(statistic=1.0, pvalue=0.046399461870904594)

The Mann–Whitney U test is even wrong.

## Fisher Exact Test

In [16]:
sp.stats.fisher_exact([
    # men, women
    [1, 7],  # studying
    [6, 4],  # not-studying
])

(0.09523809523809523, 0.06561085972850683)

In [17]:
# odds ratio
(1/6) / (7/4)

0.09523809523809523

In [18]:
sp.stats.fisher_exact([
    # men, women
    [43, 44],  # right-handed
    [ 9,  4],  # left-handed
])

(0.43434343434343436, 0.23915695682224267)

In [19]:
(43/9) / (44/4)

0.4343434343434343

## Chi-Square Test of Independence

In [20]:
sp.stats.chi2_contingency([
    # men, women
    [1, 7],  # studying
    [6, 4],  # not-studying
])

(2.457467532467532, 0.11696722987709807, 1, array([[3.11111111, 4.88888889],
        [3.88888889, 6.11111111]]))

In [21]:
# expected frequency
(1+6) * (1+7)/(1+7+6+4)

3.111111111111111

The chi-square test is not suitable for small sample sizes.

In [22]:
sp.stats.chi2_contingency([
    # men, women
    [43, 44],  # right-handed
    [ 9,  4],  # left-handed
], lambda_ = 'log-likelihood')

(1.0909220569849305, 0.296267001141625, 1, array([[45.24, 41.76],
        [ 6.76,  6.24]]))

In [23]:
(43+9) * (43+44)/(43+44+9+4)

45.24