In [104]:
%matplotlib inline 
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import stats
import os

In [105]:
# csvデータ読み込み
base_dic_name = './data/csv/mfcc/average/'

# issei  ESFJ -> S
# kai    ESFP -> S
# katuki ISFP -> S
name_S = ['issei', 'kai', 'katuki']

# momoka ENFP -> N
# keito  ENFP -> N
name_N = ['momoka', 'keito']

# 分割数
n_list = [3, 5, 7, 9, 15, 20]

In [106]:
def display_boxplot(n, isAbs=False):

  result = pd.DataFrame([], columns=['f値', 't検定(Student)', 't検定(Welch)'])

  # ファイル名の読み込み
  files = []
  folder = base_dic_name + 'n' + str(n) + '/'
  files.extend((folder + fn) for fn in os.listdir(folder))

  # 軸によって分割
  filename_S = []
  filename_N = []
  for f in files:
    for nm_S in name_S:
      if nm_S in f:
        filename_S.append(f)
        break
    else:
      filename_N.append(f)
  
  # ファイル読み込み、結合
  data_S = pd.concat(pd.read_csv(fn) for fn in filename_S)
  data_N = pd.concat(pd.read_csv(fn) for fn in filename_N)

  if isAbs:
    data_S = data_S.abs()
    data_N = data_N.abs()

  # t検定の参考
  # https://qiita.com/suaaa7/items/745ac1ca0a8d6753cf60

  # f検定・t検定(student)
  for i in range(n):
    col_name = 'dct' + str(i)
    S_var = np.var(data_S[col_name], ddof=1)
    N_var = np.var(data_N[col_name], ddof=1)
    S_df = len(data_S[col_name])
    N_df = len(data_N[col_name])
    f = S_var / N_var
    one_sided_pval1 = stats.f.cdf(f, S_df, N_df)  # 片側検定のp値 1
    one_sided_pval2 = stats.f.sf(f, S_df, N_df)   # 片側検定のp値 2
    two_sided_pval = min(one_sided_pval1, one_sided_pval2) * 2  # 両側検定のp値

    #print('F:', round(f, 3), 'p-value: ', round(two_sided_pval, 3))

    # t検定(Welch)
    result.loc[i] = [round(f, 3), round(two_sided_pval, 3), stats.ttest_ind(data_S[col_name], data_N[col_name], equal_var=False)[1]]

  print(result)

In [107]:
for n in n_list:
  print('---------------- n={} のf検定 & t検定（絶対値なし） ----------------'.format(n))
  display_boxplot(n)
  print()

---------------- n=3 のf検定 & t検定（絶対値なし） ----------------
Ttest_indResult(statistic=0.1274783317542854, pvalue=0.8993622736729907)
Ttest_indResult(statistic=-0.6556364602053928, pvalue=0.5162817493587165)
Ttest_indResult(statistic=2.5745137866198675, pvalue=0.014521120380947548)
      f値  t検定(Student)  t検定(Welch)
0  3.321         0.013    0.899362
1  0.913         0.834    0.516282
2  0.781         0.583    0.014521

---------------- n=5 のf検定 & t検定（絶対値なし） ----------------
Ttest_indResult(statistic=0.1274783317542854, pvalue=0.8993622736729907)
Ttest_indResult(statistic=-0.6556364602053928, pvalue=0.5162817493587165)
Ttest_indResult(statistic=2.5745137866198675, pvalue=0.014521120380947548)
Ttest_indResult(statistic=-0.8932473824629519, pvalue=0.3775144200066589)
Ttest_indResult(statistic=0.9573272663250192, pvalue=0.34603205551987704)
      f値  t検定(Student)  t検定(Welch)
0  3.321         0.013    0.899362
1  0.913         0.834    0.516282
2  0.781         0.583    0.014521
3  1.233       

In [108]:
for n in n_list:
  print('---------------- n={} のf検定 & t検定（絶対値あり） ----------------'.format(n))
  display_boxplot(n, True)
  print()

---------------- n=3 のf検定 & t検定（絶対値あり） ----------------
Ttest_indResult(statistic=-0.1274783317542854, pvalue=0.8993622736729907)
Ttest_indResult(statistic=-0.6556364602053928, pvalue=0.5162817493587165)
Ttest_indResult(statistic=0.2568854817709858, pvalue=0.7987176505520193)
      f値  t検定(Student)  t検定(Welch)
0  3.321         0.013    0.899362
1  0.913         0.834    0.516282
2  1.040         0.941    0.798718

---------------- n=5 のf検定 & t検定（絶対値あり） ----------------
Ttest_indResult(statistic=-0.1274783317542854, pvalue=0.8993622736729907)
Ttest_indResult(statistic=-0.6556364602053928, pvalue=0.5162817493587165)
Ttest_indResult(statistic=0.2568854817709858, pvalue=0.7987176505520193)
Ttest_indResult(statistic=-0.8932473824629519, pvalue=0.3775144200066589)
Ttest_indResult(statistic=0.9573272663250192, pvalue=0.34603205551987704)
      f値  t検定(Student)  t検定(Welch)
0  3.321         0.013    0.899362
1  0.913         0.834    0.516282
2  1.040         0.941    0.798718
3  1.233         