<a href="https://colab.research.google.com/github/sasuraibito1125/google_colab/blob/main/%CF%872%E5%88%86%E5%B8%83%E8%A1%A8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 関数定義

In [None]:
#@title #### 信頼係数から $\alpha$ に変換する関数
#@markdown 関数名：`cf2alphas`
#@markdown * 引数：`cf`
#@markdown * 戻り値：`tuple<float, float>`
#@markdown
#@markdown * upper: $\dfrac{100 - CF}{200}$
#@markdown * lower: $\dfrac{100 + CF}{200}$
def cf2alpha(cf):
  '''Convert confidence factor to alpha value.

  Parameters
  ----------
  cf: confidence factor

  Returns
  -------
  float
    uppler and lower values of alpha
  '''
  alpha = (100 - cf) / 200
  return 1 - alpha, alpha

In [None]:
#@title #### $\chi^2$ 分布表生成関数（％点） { vertical-output: true, display-mode: "form" }
#@markdown 関数名：`generate_chi2d_table_pp`
#@markdown * 引数：`both=False`, `alpha=None`, `v=None`, `precision=4`
#@markdown * 戻り値：`DataFrame`
#@markdown * 依存関数：`cf2alpha`
def generate_chi2d_table_pp(both=False, alpha=None, v=None, precision=4):
  '''To generate the table of chi-square distribution of percent points.

  Parameters
  ----------
  both: flag to specify one-sided or both-sided. default False(both-sided).
  alpha: list of points of upper. if not defined, values are [0.99, 0.975, 0.95, 0.90, 0.75, 0.50, 0.25, 0.10, 0.05, 0.025, 0.01, 0.005] as default
  v: list of degrees of freedom. if not defined, values are 1 to 30 and 40 to 100 per 10 steps as default
  precision: precision of the value of probability. default 4.

  Returns
  -------
  DataFrame
    table of chi-square distribution of percent points.
  '''
  import numpy as np
  import pandas as pd
  from scipy.stats import chi2

  def generate_default_alpha():
    cf_list = [70, 80, 90, 95, 98, 99]
    return sorted(list(set([a for pair in [cf2alpha(cf) for cf in cf_list] for a in pair])), reverse=True)

  def generate_default_v():
    v1 = np.arange(1, 31, dtype=int)
    v2 = np.linspace(40, 100, 7, dtype=int)
    return np.concatenate((v1, v2)).reshape(-1, 1)

  if not alpha:
    alpha = generate_default_alpha()
  alpha = np.array(alpha)

  if not v:
    v = generate_default_v()
  else:
    v = np.array(v)
    if v.ndim == 1:
      v = v.reshape(-1, 1)
    elif v.ndim == 2 and v.shape[1] == 1:
      pass
    else:
      print('不正な自由度のリストが指定されました。デフォルト値を使用します。')
      v = generate_default_v()

  df = pd.DataFrame(chi2.ppf(alpha, v),
                    columns=list(map(lambda x: "%.3f" % x, alpha)),
                    index=list(map(lambda x: "%d" % x, v)))
  df.index.name = 'ν'
  return (df * (2 if both else 1)).round(precision)


In [None]:
#@title #### $\chi^2$ 分布表生成関数（逆生存） { vertical-output: true, display-mode: "form" }
#@markdown 関数名：`generate_chi2d_table_is`
#@markdown * 引数：`both=False`, `alpha=None`, `v=None`, `precision=4`
#@markdown * 戻り値：`DataFrame`
#@markdown * 依存関数：`cf2alpha`
def generate_chi2d_table_is(both=False, alpha=None, v=None, precision=4):
  '''To generate the table of chi-square distribution of inverse survival.

  Parameters
  ----------
  both: flag to specify one-sided or both-sided. default False(both-sided).
  alpha: list of points of upper. if not defined, values are [0.99, 0.975, 0.95, 0.90, 0.75, 0.50, 0.25, 0.10, 0.05, 0.025, 0.01, 0.005] as default
  v: list of degrees of freedom. if not defined, values are 1 to 30 and 40 to 100 per 10 steps as default
  precision: precision of the value of probability. default 4.

  Returns
  -------
  DataFrame
    table of chi-square distribution of inverse survival.
  '''
  import numpy as np
  import pandas as pd
  from scipy.stats import chi2

  def generate_default_alpha():
    cf_list = [70, 80, 90, 95, 98, 99]
    return sorted(list(set([a for pair in [cf2alpha(cf) for cf in cf_list] for a in pair])), reverse=True)

  def generate_default_v():
    v1 = np.arange(1, 31, dtype=int)
    v2 = np.linspace(40, 100, 7, dtype=int)
    return np.concatenate((v1, v2)).reshape(-1, 1)

  if not alpha:
    alpha = generate_default_alpha()
  alpha = np.array(alpha)

  if not v:
    v = generate_default_v()
  else:
    v = np.array(v)
    if v.ndim == 1:
      v = v.reshape(-1, 1)
    elif v.ndim == 2 and v.shape[1] == 1:
      pass
    else:
      print('不正な自由度のリストが指定されました。デフォルト値を使用します。')
      v = generate_default_v()

  df = pd.DataFrame(chi2.isf(alpha, v),
                    columns=list(map(lambda x: "%.3f" % x, alpha)),
                    index=list(map(lambda x: "%d" % x, v)))
  df.index.name = 'ν'
  return (df * (2 if both else 1)).round(precision)


In [None]:
#@title #### 表の行と列を強調する関数
#@markdown 関数名：`highlight_subject`
#@markdown * 引数：`df`, `row=None`, `col=None`, `row_color='#FFD0FF'`, `col_color='#D1FDFF'`, `cross_color='#C7B5FF'`
#@markdown * 戻り値：`Styler`
def highlight_subject(df, row=None, col=None,
                      row_color='#FFD0FF',
                      col_color='#D1FDFF',
                      cross_color='#C7B5FF'):
  '''highlight row, column and cross point of DataFrame.

  Parameters
  ----------
  df: a target pandas DataFrame to be highlighted
  row: a target row to be highlighted
  col: a target column to be highlighted
  row_color: a highlight color of row
  col_color: a highlight color of column
  cross_color: a highlight color of cross point of highligh row and column

  Returns
  -------
  Styler
    pandas Styler object to be highlighted
  '''
  import pandas as pd

  idx = pd.IndexSlice
  styler = df.style
  if row:
    styler = styler.set_properties(**{'color':'black', 'background-color': row_color},
                                   subset=idx[idx[row], :])
  if col:
    styler = styler.set_properties(**{'color':'black', 'background-color': col_color},
                                   subset=idx[col])
  if col and row:
    styler = styler.set_properties(**{'color':'black', 'background-color': cross_color},
                                   subset=idx[idx[row], idx[col]])
  return styler


In [None]:
# @title #### $\chi^2$ 分布表から指定された自由度 $\nu$ と $\alpha$ で $\chi^2_{\alpha_{upper}}(\nu), \chi^2_{\alpha_{lower}}(\nu)$ を取得する関数 { vertical-output: true, display-mode: "form" }
#@markdown 関数名：`find_chi2_in_chi2d_table`
#@markdown * 引数：`td`, `nu`, `upper`, `lower=None`, `both=False`
#@markdown * 戻り値：`float`
def find_chi2_in_chi2d_table(c2d, nu, upper, lower=None, both=False):
  '''Find chi-square value from degrees of freedom and alpha in chi-square distribution table.

  Parameters
  ----------
  c2d: pandas DataFrame of chi-square distribution table.
  nu: degrees of freedom.
  uppler: upper n-parcent point.
  lower: lower n-parcent point. the value is computed to (1 - upper) if not defined.
  both: if true, values of upper and lower n-parcent points are finding.

  Returns
  -------
  tuple
    chi-square value(float) of upper and lower points. if both is False, lower is None.

  '''
  return c2d.at[f'{nu:d}', f'{upper:.3f}'], \
         c2d.at[f'{nu:d}', f'{lower if lower else (1 - upper):.3f}'] if both else None


In [None]:
# @title #### 母分散の信頼区間の計算 { vertical-output: true, display-mode: "form" }
#@markdown 関数名：`calc_conf_interval_of_pv`
#@markdown * 引数：`n`, `s2`, `cf=95`, `precision=2`,`c2d=generate_chi2d_table_pp()`
#@markdown * 戻り値： `tuple<float, float, float, float>`
#@markdown * 依存関数：`generate_chi2d_table_pp`, `find_chi2_in_chi2d_table`
def calc_conf_interval_of_pv(n, s2, cf=95, precision=5, c2d=generate_chi2d_table_pp()):
  '''Calculate confidence interval of population variance.

  Parameters
  ----------
  n: sample size.
  s2: unbiased estimate of variance.
  cf: confidnece factor. default is 95.
  c2d: pandas DataFrame of chi-square distribution table. default value is
    result of generate_chi2d_table_pp function.

  Returns
  -------
  tuple
    lower bound, upper bound, and alpha value.
  '''
  upper, lower = find_chi2_in_chi2d_table(c2d, n - 1, *cf2alpha(cf), both=True)
  numerator = (n - 1) * s2
  return round(numerator / upper, precision), round(numerator / lower, precision), upper, lower

In [None]:
# @title #### 適合度の統計量 $\chi^2$ の計算 { vertical-output: true, display-mode: "form" }
#@markdown 関数名：`calc_chi2_goodness`
#@markdown * 引数：`df`, `row_theoretical='理論値'`, `row_actual='実測値'`, `precision=4`
#@markdown * 戻り値： `tuple<float, float, float, float>`
#@markdown * 依存関数：`generate_chi2d_table_pp`, `find_t_in_chi2d_table`
def calc_chi2_goodness(df, row_theoretical='理論値', row_actual='実測値', precision=4):
  '''Calculate chi-square value of goodness of fit.

  Parameters
  ----------
  df: DataFrame of values to test with only target values. (No totals, no percentage, and so on)
  row_theoretical: row name of theoretical value. Default value is '理論値'.
  row_actual: row name of actual value. Default value is '実測値'.
  precision: precision of result value. Default value is 4.

  Returns
  -------
  float
    chi-square value of goodness of fit.
  '''
  column_name = 'ズレ'
  df_T = df.T
  df_T[column_name] = (df_T[row_actual] - df_T[row_theoretical]) ** 2 / df_T[row_theoretical]
  return round(df_T[column_name].sum(), precision)


In [None]:
# @title #### 独立性の統計量 $\chi^2$ の計算 { vertical-output: true, display-mode: "form" }
#@markdown 関数名：`calc_chi2_independence`
#@markdown * 引数：`df_theoretical`, `df_actual`, `precision=3`
#@markdown * 戻り値： `tuple<float, float, float, float>`
#@markdown * 依存関数：`generate_chi2d_table_pp`, `find_t_in_chi2d_table`
def calc_chi2_independence(df_theoretical, df_actual, precision=3):
  '''Calculate chi-square value of independence.

  Parameters
  ----------
  df_theoretical: DataFrame of theoretical value to test with only target values. (No totals, no percentage, and so on)
  df_actual: DataFrame of actual value to test with only target values. (No totals, no percentage, and so on)
  precision: precision of result value. Default value is 3.

  Returns
  -------
  float
    chi-square value of independence.
  '''
  df = (df_actual - df_theoretical) ** 2 / df_theoretical
  return df.sum().sum().round(precision)


# 利用例

In [None]:
# @title #### 利用例：`generate_chi2d_table_pp` { display-mode: "both" }
# @markdown $\chi^2$ 分布表（％点）の表示
c2d = generate_chi2d_table_pp()
c2d

Unnamed: 0_level_0,0.995,0.990,0.975,0.950,0.900,0.850,0.150,0.100,0.050,0.025,0.010,0.005
ν,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,7.8794,6.6349,5.0239,3.8415,2.7055,2.0723,0.0358,0.0158,0.0039,0.001,0.0002,0.0
2,10.5966,9.2103,7.3778,5.9915,4.6052,3.7942,0.325,0.2107,0.1026,0.0506,0.0201,0.01
3,12.8382,11.3449,9.3484,7.8147,6.2514,5.317,0.7978,0.5844,0.3518,0.2158,0.1148,0.0717
4,14.8603,13.2767,11.1433,9.4877,7.7794,6.7449,1.3665,1.0636,0.7107,0.4844,0.2971,0.207
5,16.7496,15.0863,12.8325,11.0705,9.2364,8.1152,1.9938,1.6103,1.1455,0.8312,0.5543,0.4117
6,18.5476,16.8119,14.4494,12.5916,10.6446,9.4461,2.6613,2.2041,1.6354,1.2373,0.8721,0.6757
7,20.2777,18.4753,16.0128,14.0671,12.017,10.7479,3.3583,2.8331,2.1673,1.6899,1.239,0.9893
8,21.955,20.0902,17.5345,15.5073,13.3616,12.0271,4.0782,3.4895,2.7326,2.1797,1.6465,1.3444
9,23.5894,21.666,19.0228,16.919,14.6837,13.288,4.8165,4.1682,3.3251,2.7004,2.0879,1.7349
10,25.1882,23.2093,20.4832,18.307,15.9872,14.5339,5.5701,4.8652,3.9403,3.247,2.5582,2.1559


In [None]:
# @title #### 利用例：`generate_chi2d_table_pp` { run: "auto", display-mode: "both" }
# @markdown $\chi^2$ 分布表（％点）の表示（上側$\alpha\%$点および自由度$\nu$を指定）
# @markdown
# @markdown $\alpha$ と自由度のリスト
alpha = [0.25, 0.1, 0.05, 0.025, 0.001] #@param {type:"raw"}
v = [1, 2, 3, 4, 5, 10, 11, 12, 14, 15, 20, 30, 40, 50, 100, 150, 200] #@param {type:"raw"}
generate_chi2d_table_pp(alpha=alpha, v=v)

Unnamed: 0_level_0,0.250,0.100,0.050,0.025,0.001
ν,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0.1015,0.0158,0.0039,0.001,0.0
2,0.5754,0.2107,0.1026,0.0506,0.002
3,1.2125,0.5844,0.3518,0.2158,0.0243
4,1.9226,1.0636,0.7107,0.4844,0.0908
5,2.6746,1.6103,1.1455,0.8312,0.2102
10,6.7372,4.8652,3.9403,3.247,1.4787
11,7.5841,5.5778,4.5748,3.8157,1.8339
12,8.4384,6.3038,5.226,4.4038,2.2142
14,10.1653,7.7895,6.5706,5.6287,3.0407
15,11.0365,8.5468,7.2609,6.2621,3.4827


In [None]:
# @title #### 利用例：`generate_chi2d_table_is` { display-mode: "both" }
# @markdown $\chi^2$ 分布表（逆生存）の表示
generate_chi2d_table_is()

Unnamed: 0_level_0,0.995,0.990,0.975,0.950,0.900,0.850,0.150,0.100,0.050,0.025,0.010,0.005
ν,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,0.0,0.0002,0.001,0.0039,0.0158,0.0358,2.0723,2.7055,3.8415,5.0239,6.6349,7.8794
2,0.01,0.0201,0.0506,0.1026,0.2107,0.325,3.7942,4.6052,5.9915,7.3778,9.2103,10.5966
3,0.0717,0.1148,0.2158,0.3518,0.5844,0.7978,5.317,6.2514,7.8147,9.3484,11.3449,12.8382
4,0.207,0.2971,0.4844,0.7107,1.0636,1.3665,6.7449,7.7794,9.4877,11.1433,13.2767,14.8603
5,0.4117,0.5543,0.8312,1.1455,1.6103,1.9938,8.1152,9.2364,11.0705,12.8325,15.0863,16.7496
6,0.6757,0.8721,1.2373,1.6354,2.2041,2.6613,9.4461,10.6446,12.5916,14.4494,16.8119,18.5476
7,0.9893,1.239,1.6899,2.1673,2.8331,3.3583,10.7479,12.017,14.0671,16.0128,18.4753,20.2777
8,1.3444,1.6465,2.1797,2.7326,3.4895,4.0782,12.0271,13.3616,15.5073,17.5345,20.0902,21.955
9,1.7349,2.0879,2.7004,3.3251,4.1682,4.8165,13.288,14.6837,16.919,19.0228,21.666,23.5894
10,2.1559,2.5582,3.247,3.9403,4.8652,5.5701,14.5339,15.9872,18.307,20.4832,23.2093,25.1882


In [None]:
# @title #### 利用例：`highlight_subject` { run: "auto", display-mode: "both" }
# @markdown $\chi^2$ 分布表の指定された自由度と $\alpha$ （上側）を強調表示
v = 9 #@param {type:"integer"}
upper = 0.975 #@param {type:"number"}
highlight_subject(c2d.head(15), f'{v:d}', [f'{upper:.3f}', f'{1-upper:.3f}'])

Unnamed: 0_level_0,0.995,0.990,0.975,0.950,0.900,0.850,0.150,0.100,0.050,0.025,0.010,0.005
ν,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,7.8794,6.6349,5.0239,3.8415,2.7055,2.0723,0.0358,0.0158,0.0039,0.001,0.0002,0.0
2,10.5966,9.2103,7.3778,5.9915,4.6052,3.7942,0.325,0.2107,0.1026,0.0506,0.0201,0.01
3,12.8382,11.3449,9.3484,7.8147,6.2514,5.317,0.7978,0.5844,0.3518,0.2158,0.1148,0.0717
4,14.8603,13.2767,11.1433,9.4877,7.7794,6.7449,1.3665,1.0636,0.7107,0.4844,0.2971,0.207
5,16.7496,15.0863,12.8325,11.0705,9.2364,8.1152,1.9938,1.6103,1.1455,0.8312,0.5543,0.4117
6,18.5476,16.8119,14.4494,12.5916,10.6446,9.4461,2.6613,2.2041,1.6354,1.2373,0.8721,0.6757
7,20.2777,18.4753,16.0128,14.0671,12.017,10.7479,3.3583,2.8331,2.1673,1.6899,1.239,0.9893
8,21.955,20.0902,17.5345,15.5073,13.3616,12.0271,4.0782,3.4895,2.7326,2.1797,1.6465,1.3444
9,23.5894,21.666,19.0228,16.919,14.6837,13.288,4.8165,4.1682,3.3251,2.7004,2.0879,1.7349
10,25.1882,23.2093,20.4832,18.307,15.9872,14.5339,5.5701,4.8652,3.9403,3.247,2.5582,2.1559


In [None]:
# @title #### 利用例：`find_chi2_in_chi2d_table` { run: "auto", vertical-output: true, display-mode: "both" }
# @markdown $\chi^2$ 分布表から指定された自由度と $\alpha$ に対する $\chi^2$ 値を検索
# @markdown
# @markdown 自由度、$\alpha$
nu = 9 #@param {type:"integer"}
upper = 0.025 #@param {type:"number"}
find_chi2_in_chi2d_table(c2d, nu, upper, both=True)

(2.7004, 19.0228)

In [None]:
# @title #### 利用例：`calc_conf_interval_of_pv` { run: "auto", vertical-output: true, display-mode: "both" }
# @markdown 母分散の信頼区間の計算
# @markdown
# @markdown サンプルサイズ、不偏分散、信頼係数
n = 10 #@param {type:"integer"}
s2 = 6757.3 #@param {type:"number"}
cf = 95 #@param ["90", "95", "99"] {type:"raw", allow-input: true}
ci = calc_conf_interval_of_pv(n, s2, cf)
print('lower:', round(ci[0], 2), ', upper:', round(ci[1], 2),
      ', alpha_upper:', round(ci[2], 2), ', alpha_lower:', round(ci[3], 2))

lower: 3196.99 , upper: 22521.0 , alpha_upper: 19.02 , alpha_lower: 2.7


In [None]:
# @title #### 利用例：`calc_chi2_goodness` { run: "auto", vertical-output: true, display-mode: "both" }
# @markdown 適合度の統計量 $\chi^2$ の計算
# @markdown
# @markdown |血液型|A型|B型|O型|AB型|
# @markdown |------|---|---|---|----|
# @markdown |理論値|40|30|20|10|
# @markdown |実測値|55|22|16|7|
import pandas as pd
calc_chi2_goodness(pd.DataFrame({
    '血液型': ['実測値', '理論値'],
    'A型': [55, 40],
    'O型': [22, 30],
    'B型': [16, 20],
    'AB型': [7, 10]}).set_index('血液型'))


9.4583

In [None]:
# @title #### 利用例：`calc_chi2_independence` { run: "auto", vertical-output: true, display-mode: "both" }
# @markdown 独立性の統計量 $\chi^2$ の計算
# @markdown
# @markdown 理論値
# @markdown
# @markdown |血液型|A型|B型|O型|AB型|
# @markdown |------|---|---|---|----|
# @markdown |男性|55|22|16|7|
# @markdown |女性|40|32|24|10|
# @markdown
# @markdown 実測値
# @markdown
# @markdown |血液型|A型|B型|O型|AB型|
# @markdown |------|---|---|---|----|
# @markdown |男性|47.5|27|20|5.5|
# @markdown |女性|47.5|27|20|5.5|
import pandas as pd
df_theoretical = pd.DataFrame({
    '血液型': ['男性', '女性'],
    'A型': [55, 40],
    'O型': [22, 32],
    'B型': [16, 24],
    'AB型': [7, 10]}).set_index('血液型')
df_actual = pd.DataFrame({
    '血液型': ['男性', '女性'],
    'A型': [47.5, 47.5],
    'O型': [27, 27],
    'B型': [20, 20],
    'AB型': [5.5, 5.5]}).set_index('血液型')
calc_chi2_independence(df_theoretical, df_actual)

8.36