In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import glob
import re

from sklearn.linear_model import LinearRegression
import seaborn


pd.set_option('display.max_columns', 30)

### ここだけ手動で設定

In [61]:
subject_name_list = ["kumakura","kim","souma","fujii","tubota","toki"]
phase_name_list = ["rest", "practice", "boredom", "flow", "ultra", "overload"]
target_columns = ["bpm", "ibi", "lf", "hf","lf/hf","FC3","FC4","FCz","mean_3ch"]
target_phases = ["boredom", "flow","ultra","overload"]

# データセットの整形

In [3]:
pathes = glob.glob("/Users/miyakooti/repositories/arai_MATLAB_program/csv/?_*/HRV_and_PLI.csv")
pathes.sort()
pathes

['/Users/miyakooti/repositories/arai_MATLAB_program/csv/0_kumakura/HRV_and_PLI.csv',
 '/Users/miyakooti/repositories/arai_MATLAB_program/csv/1_kim/HRV_and_PLI.csv',
 '/Users/miyakooti/repositories/arai_MATLAB_program/csv/2_souma/HRV_and_PLI.csv',
 '/Users/miyakooti/repositories/arai_MATLAB_program/csv/3_fujii/HRV_and_PLI.csv',
 '/Users/miyakooti/repositories/arai_MATLAB_program/csv/4_tubota/HRV_and_PLI.csv',
 '/Users/miyakooti/repositories/arai_MATLAB_program/csv/5_toki/HRV_and_PLI.csv']

In [42]:
import statsmodels.api as sm

### フェーズごとの単回帰分析

In [69]:
for i,target_phase in enumerate(target_phases):
    
    for j,path in enumerate(pathes):
        if "kim" in path:
            continue
        df = pd.read_csv(path,index_col=0).fillna(0)
        # column
        # row
        df = df.loc[[target_phase]] # seriesとして取り出したいときはこっち
        if j == 0:
            flow_dataset = df
        else:
            flow_dataset = pd.concat([flow_dataset, df], axis=0)


    for target_column in target_columns:


        x = flow_dataset[target_column]
        y = flow_dataset[["questionnaire_average"]]

        X = sm.add_constant(x)

        model = sm.OLS(y, X)
        results = model.fit()
        print(f"{target_phase},{target_column},{results.pvalues[target_column]}")
        

boredom,bpm,0.7223031229033197
boredom,ibi,0.6647889890337049
boredom,lf,0.7911615965433659
boredom,hf,0.4005949245266795
boredom,lf/hf,0.3408735096708481
boredom,FC3,0.3145220638896371
boredom,FC4,0.24662850043006335
boredom,FCz,0.6696413964192729
boredom,mean_3ch,0.37215403888931264
flow,bpm,0.36411118872614373
flow,ibi,0.32581179140066985
flow,lf,0.02495862354075056
flow,hf,0.2580588091866477
flow,lf/hf,0.38957750600784385
flow,FC3,0.8716448617571669
flow,FC4,0.7885557062168299
flow,FCz,0.0941040952533568
flow,mean_3ch,0.5118807306602561
ultra,bpm,0.3430930189351431
ultra,ibi,0.3957077433013071
ultra,lf,0.3763195493179827
ultra,hf,0.4953918399398774
ultra,lf/hf,0.43468006769866885
ultra,FC3,0.5657737929957846
ultra,FC4,0.5732617737257457
ultra,FCz,0.4729591098520465
ultra,mean_3ch,0.5430365452369303
overload,bpm,0.6499573129333664
overload,ibi,0.6835739107081864
overload,lf,0.5556694743306541
overload,hf,0.5762391552644546
overload,lf/hf,0.6072371594298216
overload,FC3,0.01687823745

# 線形重回帰分析（マルチこに気をつけよう）

In [12]:
x = flow_dataset[["bpm", "ibi","mean_3ch"]]
y = flow_dataset[["questionnaire_average"]]

#全要素が1の列を説明変数の先頭に追加,切片をつけるために必ず必要
X = sm.add_constant(x)
 
#モデルの設定
model = sm.OLS(y, X)
 
#回帰分析の実行
results = model.fit()
 
#結果の詳細を表示
print(results.summary())

## いい結果のように思えるが、多重共線性により偽の有意性が出てしまっている

                              OLS Regression Results                             
Dep. Variable:     questionnaire_average   R-squared:                       0.977
Model:                               OLS   Adj. R-squared:                  0.942
Method:                    Least Squares   F-statistic:                     28.05
Date:                   Thu, 12 Jan 2023   Prob (F-statistic):             0.0346
Time:                           23:00:04   Log-Likelihood:                 6.7720
No. Observations:                      6   AIC:                            -5.544
Df Residuals:                          2   BIC:                            -6.377
Df Model:                              3                                         
Covariance Type:               nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        112.4892 

  warn("omni_normtest is not valid with less than 8 observations; %i "


# 非線形（２次）単回帰分析

In [70]:
import statsmodels.formula.api as smf

In [94]:
x = flow_dataset[["mean_3ch"]]
y = flow_dataset[["questionnaire_average"]]

# X = np.column_stack((x, x**2))
x = x+x**2

#全要素が1の列を説明変数の先頭に追加（絶対必要！！）
X = sm.add_constant(x)
 
#モデルの設定
model = sm.OLS(y, X)
 
#回帰分析の実行
results = model.fit()
 
#結果の詳細を表示
print(results.summary())

                              OLS Regression Results                             
Dep. Variable:     questionnaire_average   R-squared:                       0.248
Model:                               OLS   Adj. R-squared:                  0.060
Method:                    Least Squares   F-statistic:                     1.318
Date:                   Fri, 13 Jan 2023   Prob (F-statistic):              0.315
Time:                           21:13:35   Log-Likelihood:                -3.6624
No. Observations:                      6   AIC:                             11.32
Df Residuals:                          4   BIC:                             10.91
Df Model:                              1                                         
Covariance Type:               nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          7.2259 

  warn("omni_normtest is not valid with less than 8 observations; %i "


In [51]:
X

Unnamed: 0,const,lf
flow,1.0,227.00897
flow,1.0,227.00897
flow,1.0,479.902818
flow,1.0,445.662097
flow,1.0,428.892008
flow,1.0,449.168624


### 参考
- https://takacity.blog.fc2.com/blog-entry-305.html
- https://self-development.info/%E3%80%90%E5%88%9D%E5%BF%83%E8%80%85%E8%84%B1%E5%87%BA%E3%80%91statsmodels%E3%81%AB%E3%82%88%E3%82%8B%E9%87%8D%E5%9B%9E%E5%B8%B0%E5%88%86%E6%9E%90%E7%B5%90%E6%9E%9C%E3%81%AE%E8%A6%8B%E6%96%B9/
- https://teratail.com/questions/256310

In [59]:
# 決定係数
# 0.9以上	非常によい
# 0.7以上0.9未満	よい
# 0.5以上0.7未満	あまりよくない
# 0.5未満	悪い

# Dep. Variable:     questionnaire_average   R-squared:                       0.915（決定係数。説明変数が目的変数をどれくらい説明できるか）
# Model:                               OLS   Adj. R-squared:                  0.893（自由度調整済み決定係数）
# Method:                    Least Squares   F-statistic:                     42.82（F値）
# Date:                   Fri, 13 Jan 2023   Prob (F-statistic):            0.00282（F値の現れる確率）
# Time:                           12:17:03   Log-Likelihood:                 2.8630
# No. Observations:           データの行数   AIC:                            -1.726
# Df Residuals:               残差の自由度   BIC:                            -2.142
# Df Model:                   要因の自由度                                        
# Covariance Type:  nonrobust（変数間の相関関係）


#                  coef（回帰係数。傾き）    std err（標準誤差）          t      P>|t|      [0.025      0.975]
# ------------------------------------------------------------------------------
# const          7.3625      0.174     42.246      0.000       6.879       7.846
# lf         -6.729e-06   1.03e-06     -6.544      0.003   -9.58e-06   -3.87e-06
# ==============================================================================
# Omnibus:                          nan   Durbin-Watson:                   1.765
# Prob(Omnibus):                    nan   Jarque-Bera (JB):                0.156
# Skew:                          -0.179   Prob(JB):                        0.925
# Kurtosis:                       2.296   Cond. No.                     3.93e+05
# ==============================================================================

In [60]:
dir(results)

['HC0_se',
 'HC1_se',
 'HC2_se',
 'HC3_se',
 '_HCCM',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abat_diagonal',
 '_cache',
 '_data_attr',
 '_data_in_cache',
 '_get_robustcov_results',
 '_is_nested',
 '_use_t',
 '_wexog_singular_values',
 'aic',
 'bic',
 'bse',
 'centered_tss',
 'compare_f_test',
 'compare_lm_test',
 'compare_lr_test',
 'condition_number',
 'conf_int',
 'conf_int_el',
 'cov_HC0',
 'cov_HC1',
 'cov_HC2',
 'cov_HC3',
 'cov_kwds',
 'cov_params',
 'cov_type',
 'df_model',
 'df_resid',
 'eigenvals',
 'el_test',
 'ess',
 'f_pvalue',
 'f_test',
 'fittedvalues',
 'fvalue',
 'get_influence',
 'get_prediction',
 'get_robustcov_results',
 'info_criteria',
