<a href="https://colab.research.google.com/github/no-akatsu/training/blob/main/240909_t_%E6%A4%9C%E5%AE%9A.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# import

In [None]:
import numpy as np
import seaborn as sns
from scipy import stats
from scipy.stats import t

# 性別によるチップ額の差

## データをロード

In [None]:
tips = sns.load_dataset('tips')
tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


## 性別事のチップ額

In [None]:
male_tips = tips[tips['sex'] == 'Male']['tip']
female_tips = tips[tips['sex'] == 'Female']['tip']

In [None]:
male_tips

Unnamed: 0,tip
1,1.66
2,3.50
3,3.31
5,4.71
6,2.00
...,...
236,1.00
237,1.17
239,5.92
241,2.00


In [None]:
female_tips

Unnamed: 0,tip
0,1.01
4,3.61
11,5.00
14,3.02
16,1.67
...,...
226,2.00
229,2.88
238,4.67
240,2.00


In [None]:
# 平均値の確認
print(f'Male tips mean：{male_tips.mean()}')
print(f'Female tips mean：{female_tips.mean()}')

Male tips mean：3.0896178343949043
Female tips mean：2.8334482758620685


## 独立t検定：t値 = グループ間の平均値の差 / 標準誤差

In [None]:
t_statistic, p_value = stats.ttest_ind(male_tips, female_tips)

In [None]:
print(f't値：{t_statistic}')
print(f'p値：{p_value}')

t値：1.387859705421269
p値：0.16645623503456755


# スクラッチ：
$$
t = \frac{\overline{X_1} - \overline{X_2}}{\sqrt{\frac{s_1^2}{n_1} + \frac{s_2^2}{n_2}}}
$$

In [None]:
# 平均値
mean_male = np.mean(male_tips)
mean_female = np.mean(female_tips)

# 分散
var_male = np.var(male_tips, ddof=1) # 標本分散（不変分散）
var_female = np.var(female_tips, ddof=1)

# サンプルサイズ
n_male = len(male_tips)
n_female = len(female_tips)

In [None]:
# t値の計算
t_statistic = (mean_male - mean_female) / np.sqrt((var_male / n_male) + (var_female / n_female))

In [None]:
# 自由度の計算
df = ((var_male / n_male + var_female / n_female) ** 2) / (((var_male / n_male) ** 2) / (n_male - 1) + ((var_female / n_female) ** 2) / (n_female - 1))

In [None]:
# p値の計算（両側検定）
p_value = 2 * (1 - t.cdf(np.abs(t_statistic), df))

In [None]:
# 結果
print(f't値： {t_statistic}')
print(f'p値： {p_value}')
print(f'男性の平均チップ額： {mean_male}')
print(f'女性の平均チップ額 : {mean_female}')

t値： 1.489536377092501
p値： 0.13780683808650296
男性の平均チップ額： 3.0896178343949043
女性の平均チップ額 : 2.8334482758620685
