# 数理統計学 9章解答 written by Nakamura

In [2]:
import numpy as np
from scipy import stats

In [35]:
# 9.1
samples = [12.7, 6.6, 5.6, 14.3, 11.4, 10.8, 13.8, 11.2, 10.0, 12.8, 7.1, 14.0]
n = len(samples) # sample size
q = 0.95 # confidence level
X_bar = np.mean(samples) # mean
sigma2 = 7 # variance
s2 = np.var(samples, ddof=1) # unbiased variance

# (1)
print('Answer of 9.1 (1) is:', stats.norm.interval(q, X_bar, np.sqrt(sigma2/n)))

# (2)
print('Answer of 9.1 (2) is:', stats.t.interval(q, n-1, X_bar, np.sqrt(s2/n)))

# (3)
chi_left = stats.chi2.ppf((1-q)/2, n-1) # ppfは分布関数(cdf)の逆関数
chi_right = stats.chi2.ppf(q+(1-q)/2, n-1)
print('Answer of 9.1 (3) is:', ((n-1)*s2/chi_right, (n-1)*s2/chi_left))

Answer of 9.1 (1) is: (9.361386113576325, 12.355280553090344)
Answer of 9.1 (2) is: (8.956183398904026, 12.760483267762643)
Answer of 9.1 (2) is: (4.497670853412747, 25.837440037846203)


In [37]:
# 9.2
def bernouli_interval(X_bar, q, n):
    '''
    input: 
        X_bar: avarage of samples
        q: confidence level
        n: sample size
    output:
        confidence interval of bernouli distribution (p159 in Inagaki's textbook)
    '''
    z_a = stats.norm.interval(q)[1]
    first_item = X_bar+z_a**2/(2*n)
    second_item = z_a/np.sqrt(n)*np.sqrt(X_bar*(1-X_bar)+z_a**2/(4*n))
    return np.array([first_item-second_item, first_item+second_item]/(1+z_a**2/n))


n = 300
q = 0.95
X_bar = 180/300
print('Answer of 9.2 is:', bernouli_interval(X_bar, q, n))

Answer of 9.2 is: [0.5436366  0.65383481]


In [62]:
# 9.3
sample_A = [1293, 1385, 1614, 1497, 1340, 1643, 1466, 1094, 1270, 1028, 1711, 1627]
sample_B = [1061, 1065, 1383, 1090, 1021, 1138, 1070, 1143]
q = 0.95
n_A = len(sample_A)
n_B = len(sample_B)
X_barA = np.mean(sample_A)
X_barB = np.mean(sample_B)
s2_A = np.var(sample_A, ddof=1)
s2_B = np.var(sample_B, ddof=1)

# (1)
print('Answer of 9.3 (1) mu_1 is:', stats.t.interval(q, n_A-1, X_barA, np.sqrt(s2_A/n_A)))
print('Answer of 9.3 (1) mu_2 is:', stats.t.interval(q, n_B-1, X_barB, np.sqrt(s2_B/n_B)))

# (2)
chi_leftA = stats.chi2.ppf((1-q)/2, n_A-1) # ppfは分布関数(cdf)の逆関数
chi_rightA = stats.chi2.ppf(q+(1-q)/2, n_A-1)
chi_leftB = stats.chi2.ppf((1-q)/2, n_B-1) # ppfは分布関数(cdf)の逆関数
chi_rightB = stats.chi2.ppf(q+(1-q)/2, n_B-1)
print('Answer of 9.3 (2) sigma_1 is:', ((n_A-1)*s2_A/chi_rightA, (n_A-1)*s2_A/chi_leftA))
print('Answer of 9.3 (2) sigma_2 is:', ((n_B-1)*s2_B/chi_rightB, (n_B-1)*s2_B/chi_leftB))

# (3)
X_diff = X_barA - X_barB
s2_diff = (s2_A*(n_A-1) + s2_B*(n_B-1))/(n_A+n_B-2)
print('Answer of 9.3 (3) is:', stats.t.interval(q, n_A+n_B-2, X_diff, np.sqrt(s2_diff)*np.sqrt(1/n_A+1/n_B)))

Answer of 9.3 (1) mu_1 is: (1274.7049686545536, 1553.2950313454464)
Answer of 9.3 (1) mu_2 is: (1026.7299437799406, 1216.0200562200594)
Answer of 9.3 (2) sigma_1 is: (24119.56258420238, 138557.88302862243)
Answer of 9.3 (2) sigma_2 is: (5602.6475792279625, 53089.24266199098)
Answer of 9.3 (3) is: (114.8816420259761, 470.36835797402387)


In [51]:
# 9.4
samples = [65-74, 74-80, 71-86, 73-95, 74-92, 68-98, 75-74, 65-77, 68-89, 69-87, 67-95, 70-97, 71-85, 70-83, 74-73]
q = 0.95
n = len(samples)
X_bar = np.mean(samples)
s2 = np.var(samples, ddof=1)
print('Answer of 9.4 is:', stats.t.interval(q, n-1, X_bar, np.sqrt(s2/n)))

Answer of 9.4 is: (-20.698472778537997, -10.101527221462003)


In [68]:
# 9.5
samples_after = [65, 74, 71, 73, 74, 68, 75, 65, 68, 69, 67, 70, 71, 70, 74]
samples_pre = [74, 80, 86, 95, 92, 98, 74, 77, 89, 87, 95, 97, 85, 83, 73]
q = 0.95
n_pre = len(samples_pre)
n_after = len(samples_after)
X_pre = np.mean(samples_pre)
X_after = np.mean(samples_after)
s2_pre = np.var(samples_pre, ddof=1)
s2_after = np.var(samples_after, ddof=1)
X_diff = X_after-X_pre
s2_diff = (s2_pre*(n_pre-1) + s2_after*(n_after-1))/(n_pre+n_after-2)
print('Answer of 9.5 is:', stats.t.interval(q, n_pre+n_after-2, X_diff, np.sqrt(s2_diff)*np.sqrt(2/n_pre)))

# 9.4に比べ9.5の方がconfidence intervalが狭い。これは、9.5がafterとpreのs2を使っているため、9.5の方が優れた区間推定法であるからだ。

Answer of 9.5 is: (-20.309398634009472, -10.490601365990539)


In [69]:
# 9.6
q = 0.90
r_hat = s2_pre/s2_after
f_left = stats.f.ppf((1-q)/2, n_pre-1, n_after-1)
f_right = stats.f.ppf(q+(1-q)/2, n_pre-1, n_after-1)
print('Answer of 9.6 is:', (r_hat/f_right, r_hat/f_left))

Answer of 9.6 is: (2.8151455509691474, 17.366333251793176)


In [77]:
# 9.7
q = 0.95
X_man = 55/100
X_wo = 48/60
X_diff = X_man-X_wo
n_man = 100
n_wo = 60
q = 0.95
print('Answer for 9.7 is:', stats.norm.interval(q, X_diff, np.sqrt(X_man*(1-X_man)/n_man + X_wo*(1-X_wo)/n_wo)))

Answer for 9.7 is: (-0.39054003262321957, -0.1094599673767804)


In [81]:
# 9.8
q =  0.95
X_1 = 160/220
X_2 = 160/200
n_1 = 220
n_2 = 200
print('The first answer is:', bernouli_interval(X_1, q, n_1)) # bernouli_interval is defined in #9.2
print('The second answer is:', bernouli_interval(X_2, q, n_2))

X_diff = X_1-X_2
print('The third answer is:', stats.norm.interval(q, X_diff, np.sqrt(X_1*(1-X_1)/n_1 + X_2*(1-X_2)/n_2)))

The first answer is: [0.66489889 0.78184588]
The second answer is: [0.73914481 0.84954799]
The third answer is: (-0.153576149648384, 0.00812160419383852)
