In [None]:
import numpy as np
import pandas as pd
from scipy import stats

%precision 3
np.random.seed(1111)

In [None]:
df = pd.read_csv('./data/ch11_potato.csv')
sample = np.array(df['重さ'])
sample

In [None]:
s_mean = np.mean(sample)
s_mean

In [None]:
rv = stats.norm(130, np.sqrt(9/14))
rv.isf(0.95)

In [None]:
rv = stats.norm(129, np.sqrt(9/14))
rv.isf(0.95)

In [None]:
z = (s_mean -130) / np.sqrt(9/14)
z

In [None]:
rv = stats.norm()
rv.isf(0.95)

In [None]:
rv.cdf(z)

In [None]:
rv.interval(0.95)

In [None]:
rv.cdf(z) * 2

In [None]:
rv = stats.norm(130, np.sqrt(9))

In [None]:
c = stats.norm.isf(0.95)
n_samples = 10000
cnt = 0

for _ in range(n_samples):
    sample_ = np.round(rv.rvs(14), 2)
    s_mean_ = np.mean(sample_)
    z = (s_mean_ - 130) / np.sqrt(9/14)
    if z < c:
        cnt += 1

cnt / n_samples

In [None]:
rv = stats.norm(128, np.sqrt(9))

In [None]:
c = stats.norm.isf(0.95)
n_samples = 10000
cnt = 0

for _ in range(n_samples):
    sample_ = np.round(rv.rvs(14), 2)
    s_mean_ = np.mean(sample_)
    z = (s_mean_ - 130) / np.sqrt(9/14)
    if z >= c:
        cnt += 1

cnt / n_samples

In [None]:
def pmean_test(sample, mean0, p_var, alpha=0.05):
    s_mean = np.mean(sample)
    n = len(sample)
    rv = stats.norm()
    interval = rv.interval(1-alpha)
    
    z = (s_mean - mean0) / np.sqrt(p_var / n)
    if interval[0] <= z <= interval[1]:
        print('帰無仮説を採択')
    else:
        print('帰無仮説を棄却')
    if z < 0:
        p = rv.cdf(z) * 2
    else:
        p = (1 - rv.cdf(z)) * 2
    print(f'p値は{p: 3f}')

In [None]:
pmean_test(sample, 130, 9)

In [None]:
def pvar_test(sample, var0, alpha=0.05):
    u_var = np.var(sample, ddof=1)
    n = len(sample)
    rv = stats.chi2(df=n-1)
    interval = rv.interval(1-alpha)
    
    y = (n - 1) * u_var / var0
    if interval[0] <= y <= interval[1]:
        print('帰無仮説を採択')
    else:
        print('帰無仮説を棄却')
    if y < rv.isf(0.5):
        p = rv.cdf(y) * 2
    else:
        p = (1 - rv.cdf(y)) * 2
    print(f'p値は{p: 3f}')

In [None]:
pvar_test(sample, 9)

In [None]:
def pmean_test2(sample, mean0, alpha=0.05):
    s_mean = np.mean(sample)
    u_var = np.var(sample, ddof=1)
    n = len(sample)
    rv = stats.t(df=n-1)
    interval = rv.interval(1-alpha)
    
    t = (s_mean - mean0) / np.sqrt(u_var / n)
    if interval[0] <= t <= interval[1]:
        print('帰無仮説を採択')
    else:
        print('帰無仮説を棄却')
    if t < 0:
        p = rv.cdf(t) * 2
    else:
        p = (1 - rv.cdf(t)) * 2
    print(f'p値は{p: 3f}')

In [None]:
pmean_test2(sample, 130)

In [None]:
t, p = stats.ttest_1samp(sample, 130)
t, p

In [None]:
training_rel = pd.read_csv('./data/ch11_training_rel.csv')
print(training_rel.shape)
training_rel.head()

In [None]:
training_rel['差'] = training_rel['後'] - training_rel['前']
training_rel.head()

In [None]:
t, p = stats.ttest_1samp(training_rel['差'], 0)
p

In [None]:
t, p = stats.ttest_rel(training_rel['後'], training_rel['前'])
p

In [None]:
training_ind = pd.read_csv('./data/ch11_training_ind.csv')
print(training_ind.shape)
training_ind.head()

In [None]:
t, p = stats.ttest_ind(training_ind['A'], training_ind['B'], equal_var=False)
p

In [None]:
training_rel = pd.read_csv('./data/ch11_training_rel.csv')
toy_df = training_rel[:6].copy()
toy_df

In [None]:
diff = toy_df['後'] - toy_df['前']
toy_df['差'] = diff
toy_df

In [None]:
rank = stats.rankdata(abs(diff)).astype(int)
toy_df['順位'] = rank
toy_df

In [None]:
diff < 0

In [None]:
r_minus = np.sum((diff < 0) * rank)
r_plus = np.sum((diff > 0) * rank)

r_minus, r_plus

In [None]:
toy2_df = training_rel[:6].copy()
toy2_df['後'] = toy2_df['前'] + np.arange(1, 7)
diff2 = toy2_df['後'] - toy2_df['前']
rank2 = stats.rankdata(abs(diff2)).astype(int)
toy2_df['差'] = diff2
toy2_df['順位'] = rank2
toy2_df

In [None]:
r_minus2 = np.sum((diff2 < 0) * rank2)
r_plus2 = np.sum((diff2 > 0) * rank2)

r_minus2, r_plus2

In [None]:
toy3_df = training_rel[:6].copy()
toy3_df['後'] = toy3_df['前'] + [1, -2, -3, 4, 5, -6]
diff3 = toy3_df['後'] - toy3_df['前']
rank3 = stats.rankdata(abs(diff3)).astype(int)
toy3_df['差'] = diff3
toy3_df['順位'] = rank3
toy3_df

In [None]:
r_minus3 = np.sum((diff3 < 0) * rank3)
r_plus3 = np.sum((diff3 > 0) * rank3)

r_minus3, r_plus3

In [None]:
T, p = stats.wilcoxon(training_rel['前'], training_rel['後'])
p                    

In [None]:
T, p = stats.wilcoxon(training_rel['後'] - training_rel['前'])
p    

In [None]:
n = 10000
diffs = np.round(stats.norm(3, 4).rvs(size=(n, 20)))

In [None]:
cnt = 0
alpha = 0.05
for diff in diffs:
    t, p = stats.ttest_1samp(diff, 0)
    if p < alpha:
        cnt += 1
cnt / n

In [None]:
cnt = 0
alpha = 0.05
for diff in diffs:
    T, p = stats.wilcoxon(diff)
    if p < alpha:
        cnt += 1
cnt / n

In [None]:
ad_df = pd.read_csv('./data/ch11_ad.csv')
n = len(ad_df)
print(n)
ad_df.head()

In [None]:
ad_cross = pd.crosstab(ad_df['広告'], ad_df['購入'])
ad_cross

In [None]:
ad_cross['した'] / (ad_cross['した'] + ad_cross['しなかった'])

In [None]:
n_yes, n_not = ad_cross.sum()
n_yes, n_not

In [None]:
n_adA, n_adB = ad_cross.sum(axis=1)
n_adA, n_adB

In [None]:
ad_ef = pd.DataFrame({'した': [n_adA * n_yes / n,
                                              n_adB * n_yes / n],
                                    'しなかった': [n_adA * n_not / n,
                                                       n_adB * n_not / n]},
                                    index=['A', 'B'])
ad_ef

In [None]:
y = ((ad_cross - ad_ef)**2 / ad_ef).sum().sum()
y

In [None]:
(49 - 40)**2 / 40 + (51 - 60)**2 / 60 + (351 - 360)**2 / 360 + (549 - 540)**2 / 540

In [None]:
rv = stats.chi2(1)
1 - rv.cdf(y)

In [None]:
chi2, p, dof, ef = stats.chi2_contingency(ad_cross, correction=False)
chi2, p, dof

In [None]:
ef