In [1]:
import os
import random
import itertools
import re

# 基本的なライブラリ
import numpy as np
import pandas as pd
import scipy as sp
from scipy import stats

# 描画ライブラリ
import matplotlib.pyplot as plt
import seaborn as sns
from seaborn_analyzer import CustomPairPlot
import graphviz
import pydotplus
from IPython.display import Image
from IPython.display import HTML
from six import StringIO
from ipywidgets import interact, FloatSlider

# 前処理
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.feature_selection import VarianceThreshold

# 補完
from sklearn.experimental import (
    enable_iterative_imputer,
)  # IterativeImputerをimportするために必要
from sklearn.impute import SimpleImputer, IterativeImputer, KNNImputer

# エンコード
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, OrdinalEncoder

# データセット分割
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

# 特徴量選択
from sklearn.feature_selection import (
    GenericUnivariateSelect,
    f_classif,
    mutual_info_classif,
    chi2,
)
from boruta import BorutaPy

# 学習中
import optuna
from tqdm import tqdm
from sklearn.model_selection import learning_curve, cross_validate, cross_val_score

# 評価指標
from sklearn.metrics import roc_auc_score
from sklearn.metrics import make_scorer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

import warnings


# config python file
import config

SEED = config.SEED


from functions import *

fix_seed(SEED)


# 最大表示列数の指定（ここでは50列を指定）N
pd.set_option("display.max_columns", 50)
pd.set_option("display.max_rows", 50)

%matplotlib inline

  y: pd.Series(),


# 目的
遺伝子学的分類に基づいた、予後の2値分類を実施する。  
分類はCLAUDIN_SUBTYPEに基づいて実施。  
予後は5年、10年、15年の3つの年次に分けている。Trueで死亡であることに注意すること。

# データ読み込み
読み込み元：
    config.INTERIM_PICKLE_PREPROCESSED_PROGNOSIS_CROSS_DIR + "/claudin_subtype_chi2"

サブタイプ毎のデータを使用

データの種類が多いので、辞書型で表現する  

In [2]:
# ディレクトリ構造を辞書に反映するための関数
def dir2dict(dic, path):
    for k in os.listdir(path):
        if os.path.isdir(os.path.join(path, k)):
            if not k in dic:
                dic[k] = dict()
            dir2dict(dic[k], path + "/" + k)
        else:
            if k[0] == "X" or k[0] == "y":
                dic[k.split(".")[0]] = pd.read_pickle(path + "/" + k)

In [3]:
df_dict = dict()
dir2dict(df_dict, config.INTERIM_PICKLE_PREPROCESSED_PROGNOSIS_CROSS_DIR)

# モデルのトレーニング

## データ全体のベースライン・学習

### boruta適用データのベースライン・基本学習結果

In [4]:
for year in range(5, 16, 5):  # 予後年数毎のループ

    X_train_tmp = df_dict["chi2"]["boruta"]["train"]["X{0:0=2}".format(year)]
    y_train_tmp = df_dict["chi2"]["boruta"]["train"]["y{0:0=2}".format(year)]
    X_test_tmp = df_dict["chi2"]["boruta"]["test"]["X{0:0=2}".format(year)]
    y_test_tmp = df_dict["chi2"]["boruta"]["test"]["y{0:0=2}".format(year)]
    assert X_train_tmp.shape[0] == y_train_tmp.shape[0], "train size is incorrect"
    assert X_test_tmp.shape[0] == y_test_tmp.shape[0], "test size is incorrect"

    # accuracyの表示
    print("----------" * 10)
    print("予後年数：{0:0=2}年:".format(year))
    if accuracy_score(y_train_tmp, np.zeros(len(y_train_tmp))) >= 0.5:
        score = (
            "0>1".format(year),
            round(accuracy_score(y_train_tmp, np.zeros(len(y_train_tmp))), 3),
        )
    else:
        score = (
            "0>1".format(year),
            round(accuracy_score(y_train_tmp, np.ones(len(y_train_tmp))), 3),
        )
    print("accuracyベースライン：", score)
    print("使用特徴量：", X_train_tmp.columns)
    print("学習サンプルサイズ：", X_train_tmp.shape)
    display("ラベル比率：", y_train_tmp.value_counts())
    display(compare_bcms(X_train_tmp, y_train_tmp))

----------------------------------------------------------------------------------------------------
予後年数：05年:
accuracyベースライン： ('0>1', 0.812)
使用特徴量： Index(['BCL2', 'C1orf106', 'C6orf97', 'CDCA5', 'ESR1', 'EXO1', 'FAM83D',
       'FGD3', 'FGFR4', 'HPN', 'IL6ST', 'KIF20A', 'KRT80', 'MAPT', 'PREX1',
       'SERPINA3', 'SUSD3', 'TMEM26'],
      dtype='object')
学習サンプルサイズ： (1306, 18)


'ラベル比率：'

0    1060
1     246
Name: OS_05years, dtype: int64

11it [12:16, 66.92s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.722308,0.721321,0.462233,0.457753
Quadratic Discriminant Analysis,0.783733,0.73963,0.468855,0.343321
AdaBoost,0.85741,0.799389,0.501189,0.314517
Decision Tree,0.862344,0.777962,0.560306,0.298459
Nearest Neighbors,0.841927,0.778667,0.439279,0.22447
Polynomial SVM,0.840564,0.805514,0.330344,0.162169
Logistic Regression,0.816403,0.811615,0.171174,0.128121
Random Forest,0.844309,0.810869,0.312603,0.118968
Linear SVM,0.811639,0.81165,0.0,0.0
RBF SVM,0.811639,0.81165,0.000889,0.0


----------------------------------------------------------------------------------------------------
予後年数：10年:
accuracyベースライン： ('0>1', 0.636)
使用特徴量： Index(['ATHL1', 'AURKA', 'BCL2', 'CCNB2', 'CDC20', 'CDCA5', 'CLIC6', 'FAM83D',
       'FGD3', 'FGFR4', 'GRB7', 'HIST1H4H', 'KIF20A', 'KRT80', 'LRP2', 'MAPT',
       'NAT1', 'PGR', 'PTTG1', 'SERPINA1', 'SPATA18', 'STC2', 'SUSD3',
       'TMEM26', 'TPX2', 'TROAP', 'UBE2C', 'UHRF1'],
      dtype='object')
学習サンプルサイズ： (1048, 28)


'ラベル比率：'

0    667
1    381
Name: OS_10years, dtype: int64

11it [12:53, 70.29s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.650552,0.645027,0.572135,0.561634
Quadratic Discriminant Analysis,0.74332,0.64315,0.647773,0.507165
Logistic Regression,0.687447,0.674588,0.495516,0.472252
Nearest Neighbors,0.755195,0.629771,0.636979,0.459579
RBF SVM,0.730174,0.679313,0.547958,0.459249
Polynomial SVM,0.855386,0.617344,0.789309,0.44514
Linear SVM,0.689672,0.670778,0.479678,0.442197
AdaBoost,0.779153,0.627839,0.669802,0.437913
Random Forest,0.831319,0.666941,0.729589,0.435684
Decision Tree,0.791242,0.601136,0.692874,0.412793


----------------------------------------------------------------------------------------------------
予後年数：15年:
accuracyベースライン： ('0>1', 0.533)
使用特徴量： Index(['AURKA', 'CCL19', 'CIDEC', 'CLEC3A', 'CLIC6', 'CYP4F22', 'DARC', 'FGD3',
       'HIST1H4H', 'LOC389033', 'MAPT', 'MFAP4', 'MYBPC1', 'NAT1', 'PLIN4',
       'S100P', 'SERPINA3', 'SFRP1', 'SPP1', 'SUSD3', 'TAT', 'TMEM26', 'UBE2C',
       'VTCN1'],
      dtype='object')
学習サンプルサイズ： (811, 24)


'ラベル比率：'

1    432
0    379
Name: OS_15years, dtype: int64

11it [03:37, 19.77s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Random Forest,0.80737,0.653538,0.829001,0.696584
Sigmoid SVM,0.532677,0.532746,0.695061,0.692588
RBF SVM,0.735717,0.638783,0.773667,0.690699
Logistic Regression,0.674475,0.652288,0.708195,0.682886
Naive Bayes,0.64598,0.641238,0.684,0.676557
Linear SVM,0.677215,0.63252,0.712804,0.66849
Nearest Neighbors,0.752706,0.614107,0.781027,0.659791
AdaBoost,0.798192,0.625173,0.813878,0.657457
Quadratic Discriminant Analysis,0.734622,0.611593,0.766278,0.654535
Polynomial SVM,0.843129,0.609124,0.856205,0.638766


## subtype毎のベースライン・学習

### borutaを使用した場合のベースライン・基本学習結果

In [5]:
pd.read_pickle(
    config.INTERIM_PICKLE_PREPROCESSED_PROGNOSIS_CROSS_DIR + "/df_cross.pkl"
)["CLAUDIN_SUBTYPE"].unique()

array(['claudin-low', 'LumA', 'LumB', 'Her2', 'Normal', 'Basal'],
      dtype=object)

In [6]:
for year in range(5, 16, 5):  # 予後年数毎のループ
    print("====={0:0=2}".format(year) * 10)

    for subtype in pd.read_pickle(
        config.INTERIM_PICKLE_PREPROCESSED_PROGNOSIS_CROSS_DIR + "/df_cross.pkl"
    )["CLAUDIN_SUBTYPE"].unique():
        X_train_tmp = df_dict["chi2"]["claudin_subtype"]["train"][
            "X{0:0=2}_{1}".format(year, subtype)
        ]
        y_train_tmp = df_dict["chi2"]["claudin_subtype"]["train"][
            "y{0:0=2}_{1}".format(year, subtype)
        ]
        X_test_tmp = df_dict["chi2"]["claudin_subtype"]["test"][
            "X{0:0=2}_{1}".format(year, subtype)
        ]
        y_test_tmp = df_dict["chi2"]["claudin_subtype"]["test"][
            "y{0:0=2}_{1}".format(year, subtype)
        ]
        assert X_train_tmp.shape[0] == y_train_tmp.shape[0], "train size is incorrect"
        assert X_test_tmp.shape[0] == y_test_tmp.shape[0], "test size is incorrect"

        # accuracyの表示
        print("----------" * 10)
        print("subtype: ", subtype)
        print("予後年数：{0:0=2}年:".format(year))
        if accuracy_score(y_train_tmp, np.zeros(len(y_train_tmp))) >= 0.5:
            score = (
                "0>1".format(year),
                round(accuracy_score(y_train_tmp, np.zeros(len(y_train_tmp))), 3),
            )
        else:
            score = (
                "0>1".format(year),
                round(accuracy_score(y_train_tmp, np.ones(len(y_train_tmp))), 3),
            )
        print("accuracyベースライン：", score)
        print("使用特徴量：", X_train_tmp.columns)
        print("学習サンプルサイズ：", X_train_tmp.shape)
        display("ラベル比率：", y_train_tmp.value_counts())
        display(compare_bcms(X_train_tmp, y_train_tmp))

=====05=====05=====05=====05=====05=====05=====05=====05=====05=====05
----------------------------------------------------------------------------------------------------
subtype:  claudin-low
予後年数：05年:
accuracyベースライン： ('0>1', 0.812)
使用特徴量： Index(['SOX11', 'S100A9', 'SNAR-A3', 'SFRP1', 'C4orf7', 'GABRP', 'CLCA2',
       'CT45A6'],
      dtype='object')
学習サンプルサイズ： (138, 8)


'ラベル比率：'

0    112
1     26
Name: OS_05years, dtype: int64

11it [00:18,  1.70s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.847826,0.825824,0.554174,0.446667
AdaBoost,1.0,0.81978,1.0,0.339048
Quadratic Discriminant Analysis,0.875206,0.818132,0.614816,0.303333
Nearest Neighbors,0.864748,0.812088,0.516862,0.273333
Linear SVM,0.845413,0.834066,0.406932,0.236667
Logistic Regression,0.841381,0.812088,0.462563,0.216667
Polynomial SVM,0.939613,0.759341,0.815806,0.213333
RBF SVM,0.879206,0.812088,0.527113,0.17
Decision Tree,0.834948,0.768681,0.389799,0.068571
Random Forest,0.815626,0.813187,0.036074,0.0


----------------------------------------------------------------------------------------------------
subtype:  LumA
予後年数：05年:
accuracyベースライン： ('0>1', 0.923)
使用特徴量： Index(['CPB1', 'SLC30A8', 'S100P'], dtype='object')
学習サンプルサイズ： (466, 3)


'ラベル比率：'

0    430
1     36
Name: OS_05years, dtype: int64

11it [12:31, 68.35s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.901044,0.89482,0.110757,0.098333
Nearest Neighbors,0.924652,0.924699,0.12802,0.05
AdaBoost,0.942537,0.907539,0.438028,0.025
Quadratic Discriminant Analysis,0.908199,0.903469,0.058005,0.025
Decision Tree,0.922745,0.922618,0.0,0.0
Linear SVM,0.922745,0.922618,0.0,0.0
Logistic Regression,0.922745,0.922618,0.0,0.0
Polynomial SVM,0.922745,0.922618,0.0,0.0
RBF SVM,0.922745,0.922618,0.0,0.0
Random Forest,0.922745,0.922618,0.0,0.0


----------------------------------------------------------------------------------------------------
subtype:  LumB
予後年数：05年:
accuracyベースライン： ('0>1', 0.788)
使用特徴量： Index(['BEX1', 'PDZK1', 'KCNK1', 'IGHG1', 'GRIA2', 'IGKC', 'DB005376', 'FBN2',
       'SCGB2A1', 'TFAP2B', 'BAMBI', 'GP2', 'TPSG1', 'AGTR1', 'SCGB1D2',
       'ANKRD30A', 'TMEM26', 'SCGB2A2', 'IGLL1_1'],
      dtype='object')
学習サンプルサイズ： (306, 19)


'ラベル比率：'

0    241
1     65
Name: OS_05years, dtype: int64

11it [00:29,  2.64s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.739277,0.717527,0.53394,0.498267
Polynomial SVM,0.943718,0.738387,0.86071,0.382452
AdaBoost,0.977484,0.754516,0.946663,0.353967
Logistic Regression,0.81154,0.770753,0.442618,0.306542
Quadratic Discriminant Analysis,0.883805,0.718172,0.764582,0.283286
Nearest Neighbors,0.811908,0.73828,0.480811,0.246396
Decision Tree,0.845679,0.708387,0.612867,0.236465
Linear SVM,0.816265,0.74172,0.43735,0.205143
RBF SVM,0.793382,0.787419,0.05345,0.0
Random Forest,0.797378,0.787419,0.095152,0.0


----------------------------------------------------------------------------------------------------
subtype:  Her2
予後年数：05年:
accuracyベースライン： ('0>1', 0.667)
使用特徴量： Index(['KRT81', 'GFRA1', 'U79293', 'SCUBE2', 'SUSD3', 'CLCA2', 'CALML5',
       'CLIC6', 'AGR3', 'ESR1', 'GRPR'],
      dtype='object')
学習サンプルサイズ： (153, 11)


'ラベル比率：'

0    102
1     51
Name: OS_05years, dtype: int64

11it [00:20,  1.85s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.639813,0.635,0.590706,0.578428
Quadratic Discriminant Analysis,0.718243,0.602917,0.650743,0.490699
Decision Tree,0.782186,0.700833,0.643864,0.480108
Nearest Neighbors,0.725484,0.640833,0.583056,0.400842
Logistic Regression,0.695753,0.6425,0.441498,0.373724
AdaBoost,0.996377,0.6275,0.994845,0.347006
Polynomial SVM,0.874368,0.542917,0.803389,0.277981
Random Forest,0.768338,0.654583,0.518847,0.264683
RBF SVM,0.727668,0.6675,0.409309,0.212381
Linear SVM,0.68849,0.623333,0.271327,0.148117


----------------------------------------------------------------------------------------------------
subtype:  Normal
予後年数：05年:
accuracyベースライン： ('0>1', 0.821)
使用特徴量： Index(['CPB1', 'CFB', 'CALML5', 'AGR3', 'ESR1', 'CST1', 'FABP7'], dtype='object')
学習サンプルサイズ： (95, 7)


'ラベル比率：'

0    78
1    17
Name: OS_05years, dtype: int64

11it [00:14,  1.32s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.875964,0.826667,0.662702,0.416667
AdaBoost,1.0,0.786667,1.0,0.308571
Linear SVM,0.863078,0.837778,0.486434,0.295238
Nearest Neighbors,0.851409,0.826667,0.40942,0.245238
Polynomial SVM,0.921614,0.787778,0.714561,0.24
Quadratic Discriminant Analysis,0.85959,0.797778,0.609841,0.233333
Logistic Regression,0.870137,0.825556,0.546332,0.216667
RBF SVM,0.865431,0.815556,0.4266,0.1
Decision Tree,0.822161,0.816667,0.059259,0.0
Random Forest,0.820999,0.816667,0.0,0.0


----------------------------------------------------------------------------------------------------
subtype:  Basal
予後年数：05年:
accuracyベースライン： ('0>1', 0.655)
使用特徴量： Index(['CXCL13', 'CSN3', 'IGKC', 'IGHG1', 'SCGB2A2'], dtype='object')
学習サンプルサイズ： (148, 5)


'ラベル比率：'

0    97
1    51
Name: OS_05years, dtype: int64

11it [00:15,  1.42s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.712451,0.693333,0.606697,0.568236
Quadratic Discriminant Analysis,0.714718,0.653333,0.616742,0.518362
Logistic Regression,0.72222,0.701905,0.554861,0.50308
Linear SVM,0.731259,0.675238,0.575733,0.480144
Polynomial SVM,0.743991,0.612381,0.612688,0.436768
Decision Tree,0.760498,0.619524,0.627137,0.382463
AdaBoost,0.94591,0.607619,0.918676,0.381962
Random Forest,0.753754,0.612381,0.576092,0.322229
Nearest Neighbors,0.765004,0.58619,0.619039,0.299846
RBF SVM,0.695915,0.58619,0.311053,0.128571


=====10=====10=====10=====10=====10=====10=====10=====10=====10=====10
----------------------------------------------------------------------------------------------------
subtype:  claudin-low
予後年数：10年:
accuracyベースライン： ('0>1', 0.692)
使用特徴量： Index(['ROPN1', 'KRT14', 'SNAR-A3', 'SFRP1', 'ELF5', 'SOX10', 'MYBPC1',
       'PROM1', 'C4orf7', 'GABRP', 'ROPN1B', 'S100A7', 'KRT15'],
      dtype='object')
学習サンプルサイズ： (107, 13)


'ラベル比率：'

0    74
1    33
Name: OS_10years, dtype: int64

11it [00:17,  1.57s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.661458,0.626364,0.583516,0.506486
Polynomial SVM,0.915904,0.726364,0.843508,0.432857
Logistic Regression,0.797476,0.719091,0.628586,0.424286
Nearest Neighbors,0.789218,0.700909,0.601403,0.408571
RBF SVM,0.812038,0.736364,0.591425,0.36381
Decision Tree,0.788187,0.665455,0.619779,0.355952
Quadratic Discriminant Analysis,0.858741,0.652727,0.78707,0.342222
Linear SVM,0.801665,0.717273,0.578724,0.33873
AdaBoost,1.0,0.635455,1.0,0.328333
Random Forest,0.754929,0.69,0.38412,0.125


----------------------------------------------------------------------------------------------------
subtype:  LumA
予後年数：10年:
accuracyベースライン： ('0>1', 0.783)
使用特徴量： Index(['SLC7A2', 'C8orf4', 'BEX1', 'MKX', 'CNKSR3', 'SUSD3', 'MYBPC1',
       'NFKBIZ', 'PVALB', 'ZIC2', 'GLA', 'VTCN1', 'KRT15', 'C6orf126',
       'SLC30A8', 'PTHLH', 'GRIA2', 'CPB1', 'CLEC3A', 'PROM1', 'IGJ', 'CLIC6',
       'S100P', 'PGR'],
      dtype='object')
学習サンプルサイズ： (359, 24)


'ラベル比率：'

0    281
1     78
Name: OS_10years, dtype: int64

11it [00:38,  3.52s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.743114,0.721508,0.555373,0.489598
Quadratic Discriminant Analysis,0.898174,0.746508,0.790011,0.369782
Polynomial SVM,0.990716,0.713095,0.978178,0.354479
Decision Tree,0.838135,0.771825,0.539427,0.353008
Linear SVM,0.817701,0.782857,0.444996,0.318252
Logistic Regression,0.80873,0.755079,0.458758,0.317346
Nearest Neighbors,0.850199,0.763492,0.579077,0.259038
AdaBoost,0.959766,0.735556,0.901827,0.248907
Random Forest,0.804088,0.785635,0.176317,0.037607
RBF SVM,0.815538,0.771746,0.263434,0.0


----------------------------------------------------------------------------------------------------
subtype:  LumB
予後年数：10年:
accuracyベースライン： ('0>1', 0.54)
使用特徴量： Index(['PI15', 'TFAP2B', 'SERPINA1', 'DIO1', 'IGKC', 'SHISA2', 'FAM198B',
       'CBLN2', 'TUBA3D', 'CLIC6'],
      dtype='object')
学習サンプルサイズ： (252, 10)


'ラベル比率：'

0    136
1    116
Name: OS_10years, dtype: int64

11it [00:31,  2.88s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.649914,0.615692,0.646911,0.609369
Logistic Regression,0.656534,0.639385,0.615417,0.588292
Linear SVM,0.665348,0.631231,0.635247,0.587555
Quadratic Discriminant Analysis,0.684295,0.608154,0.680018,0.587028
Random Forest,0.779104,0.623692,0.748426,0.572827
Decision Tree,0.763231,0.627231,0.738759,0.564341
Polynomial SVM,0.793647,0.583538,0.783803,0.557542
Nearest Neighbors,0.718699,0.564154,0.687676,0.543702
RBF SVM,0.718249,0.596,0.685456,0.530334
AdaBoost,0.895948,0.551385,0.886281,0.493195


----------------------------------------------------------------------------------------------------
subtype:  Her2
予後年数：10年:
accuracyベースライン： ('0>1', 0.558)
使用特徴量： Index(['LUM', 'ATHL1', 'CLCA2', 'ESR1', 'PGK1', 'SERPINA3', 'GFRA1', 'U79293',
       'SERHL2', 'HIST1H4H', 'SPINK8', 'RGS11', 'S100A9', 'CAPN8', 'SERPINA5',
       'CALML5', 'S100A8', 'SCUBE2', 'TSC22D1', 'C19orf33', 'CA12', 'CLDN8',
       'SPP1', 'FAR2P2'],
      dtype='object')
学習サンプルサイズ： (129, 24)


'ラベル比率：'

1    72
0    57
Name: OS_10years, dtype: int64

11it [00:21,  1.95s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Random Forest,0.755379,0.675,0.794535,0.729185
Naive Bayes,0.693376,0.675641,0.735917,0.718585
Sigmoid SVM,0.558134,0.557692,0.716317,0.708039
RBF SVM,0.770056,0.628846,0.811534,0.70183
Nearest Neighbors,0.706329,0.598077,0.747829,0.654218
Decision Tree,0.745034,0.644231,0.771752,0.653791
AdaBoost,1.0,0.55,1.0,0.603061
Quadratic Discriminant Analysis,0.965532,0.511538,0.969449,0.599365
Linear SVM,0.74849,0.535897,0.79063,0.593815
Logistic Regression,0.721773,0.54359,0.76483,0.586245


----------------------------------------------------------------------------------------------------
subtype:  Normal
予後年数：10年:
accuracyベースライン： ('0>1', 0.646)
使用特徴量： Index(['SUSD3', 'SNAR-A3', 'HMGCS2', 'UGT2B11', 'LOC389033', 'SCGB3A1', 'TCN1',
       'CLIC6', 'UGT2B7'],
      dtype='object')
学習サンプルサイズ： (79, 9)


'ラベル比率：'

0    51
1    28
Name: OS_10years, dtype: int64

11it [00:15,  1.38s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.800293,0.748214,0.726814,0.594524
RBF SVM,0.846694,0.773214,0.740412,0.55
Quadratic Discriminant Analysis,0.888908,0.696429,0.845791,0.535714
Nearest Neighbors,0.821362,0.758929,0.719163,0.509048
Logistic Regression,0.820012,0.685714,0.728831,0.495476
Polynomial SVM,0.964847,0.608929,0.948057,0.491111
AdaBoost,1.0,0.621429,1.0,0.49
Linear SVM,0.827034,0.673214,0.746736,0.465476
Decision Tree,0.786248,0.633929,0.697615,0.433333
Random Forest,0.789065,0.685714,0.60205,0.316667


----------------------------------------------------------------------------------------------------
subtype:  Basal
予後年数：10年:
accuracyベースライン： ('0>1', 0.557)
使用特徴量： Index(['TFF3', 'CXCL13', 'S100A9', 'CSN3', 'KRT14', 'UBD', 'IGKC', 'CA9',
       'C4orf7', 'GZMB', 'IGHG1', 'SCGB2A2', 'S100A8'],
      dtype='object')
学習サンプルサイズ： (122, 13)


'ラベル比率：'

0    68
1    54
Name: OS_10years, dtype: int64

11it [00:17,  1.55s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.74588,0.704487,0.717849,0.657248
RBF SVM,0.821526,0.697436,0.789965,0.642637
Linear SVM,0.788732,0.713462,0.75656,0.630708
Logistic Regression,0.776906,0.689103,0.74459,0.612454
Random Forest,0.755021,0.630128,0.707873,0.547714
Nearest Neighbors,0.805113,0.635256,0.770882,0.535938
Polynomial SVM,0.923495,0.624359,0.909298,0.527637
Decision Tree,0.752285,0.589103,0.704666,0.509307
Quadratic Discriminant Analysis,0.873428,0.582051,0.850914,0.483521
AdaBoost,1.0,0.523718,1.0,0.445274


=====15=====15=====15=====15=====15=====15=====15=====15=====15=====15
----------------------------------------------------------------------------------------------------
subtype:  claudin-low
予後年数：15年:
accuracyベースライン： ('0>1', 0.575)
使用特徴量： Index(['ROPN1', 'SERPINA3', 'KRT14', 'STC2', 'SFRP1', 'ELF5', 'SOX10', 'PROM1',
       'C4orf7', 'GABRP', 'ROPN1B', 'KRT15'],
      dtype='object')
学習サンプルサイズ： (80, 12)


'ラベル比率：'

0    46
1    34
Name: OS_15years, dtype: int64

11it [00:15,  1.43s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Quadratic Discriminant Analysis,0.890278,0.7625,0.870303,0.570823
Random Forest,0.736111,0.675,0.700558,0.551746
Naive Bayes,0.702778,0.675,0.677535,0.551486
RBF SVM,0.8125,0.75,0.759739,0.549444
Nearest Neighbors,0.790278,0.7,0.747327,0.532222
Polynomial SVM,0.929167,0.675,0.918953,0.527569
AdaBoost,1.0,0.6875,1.0,0.516032
Linear SVM,0.809722,0.725,0.753051,0.477937
Logistic Regression,0.780556,0.6875,0.71786,0.477222
Decision Tree,0.786111,0.675,0.711796,0.47


----------------------------------------------------------------------------------------------------
subtype:  LumA
予後年数：15年:
accuracyベースライン： ('0>1', 0.626)
使用特徴量： Index(['C8orf4', 'SLC30A8', 'BEX1', 'MKX', 'GRIA2', 'CLEC3A', 'RPS25',
       'MYBPC1', 'DB005376', 'ZIC2', 'S100P', 'CLIC6', 'CYP4F22', 'VTCN1'],
      dtype='object')
学習サンプルサイズ： (270, 14)


'ラベル比率：'

0    169
1    101
Name: OS_15years, dtype: int64

11it [00:29,  2.64s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.702058,0.685185,0.633388,0.606979
Logistic Regression,0.747737,0.714815,0.628525,0.578877
Linear SVM,0.742387,0.7,0.618987,0.567276
Quadratic Discriminant Analysis,0.791358,0.655556,0.728428,0.54116
Nearest Neighbors,0.756379,0.674074,0.654314,0.513381
RBF SVM,0.798354,0.674074,0.702899,0.494395
Random Forest,0.790947,0.685185,0.663181,0.482737
Decision Tree,0.785597,0.637037,0.689584,0.453129
AdaBoost,0.923457,0.618519,0.894836,0.447293
Polynomial SVM,0.905761,0.625926,0.867261,0.44442


----------------------------------------------------------------------------------------------------
subtype:  LumB
予後年数：15年:
accuracyベースライン： ('0>1', 0.675)
使用特徴量： Index(['PDZK1', 'CXCL9', 'DIO1', 'IGKC', 'CAPN8', 'FCRLB', 'S100A8', 'KISS1R',
       'SHISA2', 'CBLN2', 'ATP6V1B1', 'PI15', 'TAT', 'CELSR2'],
      dtype='object')
学習サンプルサイズ： (194, 14)


'ラベル比率：'

1    131
0     63
Name: OS_15years, dtype: int64

11it [00:25,  2.31s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Linear SVM,0.794397,0.752895,0.856886,0.821075
Random Forest,0.817291,0.721842,0.878499,0.818576
Logistic Regression,0.787524,0.737105,0.851887,0.809489
Naive Bayes,0.778933,0.747632,0.835369,0.808466
Sigmoid SVM,0.675264,0.675789,0.806105,0.802144
RBF SVM,0.815586,0.706053,0.875283,0.795592
Quadratic Discriminant Analysis,0.819603,0.691842,0.87029,0.774508
Nearest Neighbors,0.76178,0.660263,0.833248,0.762959
AdaBoost,0.983961,0.671053,0.988351,0.75527
Decision Tree,0.795534,0.660526,0.852459,0.749314


----------------------------------------------------------------------------------------------------
subtype:  Her2
予後年数：15年:
accuracyベースライン： ('0>1', 0.684)
使用特徴量： Index(['GLYATL2'], dtype='object')
学習サンプルサイズ： (114, 1)


'ラベル比率：'

1    78
0    36
Name: OS_15years, dtype: int64

11it [02:56, 16.02s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Linear SVM,0.68419,0.682576,0.812352,0.800045
Naive Bayes,0.68419,0.682576,0.812352,0.800045
Polynomial SVM,0.68419,0.682576,0.812352,0.800045
Quadratic Discriminant Analysis,0.68419,0.682576,0.812352,0.800045
RBF SVM,0.68419,0.682576,0.812352,0.800045
Random Forest,0.68419,0.682576,0.812352,0.800045
Sigmoid SVM,0.68419,0.682576,0.812352,0.800045
Logistic Regression,0.68517,0.657576,0.811267,0.784393
AdaBoost,0.875243,0.644697,0.914135,0.744959
Nearest Neighbors,0.766048,0.646212,0.83913,0.737926


----------------------------------------------------------------------------------------------------
subtype:  Normal
予後年数：15年:
accuracyベースライン： ('0>1', 0.571)
使用特徴量： Index(['SNAR-A3', 'CLEC3A', 'LOC389033', 'SCGB3A1', 'CALML5', 'CLIC6'], dtype='object')
学習サンプルサイズ： (56, 6)


'ラベル比率：'

1    32
0    24
Name: OS_15years, dtype: int64

11it [00:13,  1.20s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AdaBoost,1.0,0.826667,1.0,0.837778
Polynomial SVM,0.966235,0.8,0.970563,0.80627
Quadratic Discriminant Analysis,0.859176,0.823333,0.868622,0.795714
Naive Bayes,0.819451,0.806667,0.840498,0.776825
Logistic Regression,0.875059,0.776667,0.888838,0.76127
Linear SVM,0.892902,0.776667,0.904502,0.754127
RBF SVM,0.874824,0.77,0.878207,0.748651
Nearest Neighbors,0.847137,0.753333,0.857262,0.740476
Random Forest,0.741922,0.626667,0.805132,0.714603
Sigmoid SVM,0.571412,0.57,0.727052,0.70583


----------------------------------------------------------------------------------------------------
subtype:  Basal
予後年数：15年:
accuracyベースライン： ('0>1', 0.577)
使用特徴量： Index(['CXCL13', 'S100A9', 'IGKC', 'IGHG1', 'S100A8'], dtype='object')
学習サンプルサイズ： (97, 5)


'ラベル比率：'

1    56
0    41
Name: OS_15years, dtype: int64

11it [00:18,  1.71s/it]


Unnamed: 0_level_0,acc_train,acc_test,f1_train,f1_test
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Random Forest,0.764015,0.676667,0.805319,0.705797
Naive Bayes,0.71361,0.688889,0.752211,0.70117
Quadratic Discriminant Analysis,0.753696,0.678889,0.802238,0.699665
Sigmoid SVM,0.57726,0.572222,0.731687,0.696954
Linear SVM,0.70785,0.644444,0.767234,0.685424
Logistic Regression,0.720494,0.635556,0.773366,0.674402
Polynomial SVM,0.781204,0.627778,0.825534,0.662544
Decision Tree,0.725052,0.625556,0.76207,0.650443
Nearest Neighbors,0.752521,0.584444,0.803261,0.644205
RBF SVM,0.711338,0.552222,0.767649,0.612554


# 予測・最適化

分類器を学習させ、パラメータのチューニングを行い、高い予測精度を目指す。

## optuna

モデルのパラメータをベイズ最適化に基づいて最適化していくoptunaを使用する

### Random Forest


In [7]:
def objective(trial):
    # ランダムフォレストのパラメータチューニング
    n_estimators = trial.suggest_int("n_estimators", 10, 1000)
    criterion = trial.suggest_categorical("criterion", ["gini", "entropy", "log_loss"])
    max_depth = trial.suggest_int("max_depth", 2, 50, log=True)
    max_leaf_noddes = trial.suggest_int("max_leaf_nodes", 2, 100)
    max_features = trial.suggest_categorical("max_features", ["sqrt", "log2", None])

    clf = RandomForestClassifier(
        n_estimators=n_estimators,
        criterion=criterion,
        max_depth=max_depth,
        max_leaf_nodes=max_leaf_noddes,
        max_features=max_features,
        random_state=SEED,
    )
    # 10分割交差検証によるテストデータのaccuracyの出力
    score = cross_val_score(clf, X, y, n_jobs=-1, cv=10, scoring=make_scorer(f1_score))
    accuracy = score.mean()
    return accuracy

##　全サンプルでの予測(boruta)

In [8]:
year = 15
X_train_tmp = df_dict["chi2"]["boruta"]["train"]["X{0:0=2}".format(year)]
y_train_tmp = df_dict["chi2"]["boruta"]["train"]["y{0:0=2}".format(year)]
X_test_tmp = df_dict["chi2"]["boruta"]["test"]["X{0:0=2}".format(year)]
y_test_tmp = df_dict["chi2"]["boruta"]["test"]["y{0:0=2}".format(year)]

X, y = X_train_tmp.copy(), y_train_tmp.copy()
study = optuna.create_study(
    direction="maximize", sampler=optuna.samplers.RandomSampler(seed=SEED)
)
study.optimize(objective, n_trials=100)

[32m[I 2022-08-11 09:32:42,633][0m A new study created in memory with name: no-name-31359920-1ec0-42be-b0e7-645d3bc302b3[0m
[32m[I 2022-08-11 09:33:23,726][0m Trial 0 finished with value: 0.713597642177245 and parameters: {'n_estimators': 548, 'criterion': 'log_loss', 'max_depth': 2, 'max_leaf_nodes': 14, 'max_features': 'log2'}. Best is trial 0 with value: 0.713597642177245.[0m
[32m[I 2022-08-11 09:33:44,311][0m Trial 1 finished with value: 0.713557797004337 and parameters: {'n_estimators': 579, 'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 23, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.713597642177245.[0m
[32m[I 2022-08-11 09:35:07,019][0m Trial 2 finished with value: 0.7055980179639783 and parameters: {'n_estimators': 818, 'criterion': 'log_loss', 'max_depth': 27, 'max_leaf_nodes': 35, 'max_features': 'log2'}. Best is trial 0 with value: 0.713597642177245.[0m
[32m[I 2022-08-11 09:35:43,449][0m Trial 3 finished with value: 0.7011178046392403 and par

In [9]:
# 最も良いパラメータ
print(f"The best value is : \n {study.best_value}")
print(f"The best parameters are : \n {study.best_params}")

The best value is : 
 0.7177175017522909
The best parameters are : 
 {'n_estimators': 945, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 44, 'max_features': 'log2'}


In [10]:
print("tuning前")
rf = RandomForestClassifier(random_state=SEED)
rf.fit(X_train_tmp, y_train_tmp)
pred_tmp = rf.predict(X_test_tmp)
show_scores(y_test_tmp, pred_tmp)

print("tuning後")
rf = RandomForestClassifier(
    n_estimators=705,
    criterion="entropy",
    max_depth=2,
    max_leaf_nodes=13,
    max_features="log2",
    random_state=SEED,
)
rf.fit(X_train_tmp, y_train_tmp)
pred_tmp = rf.predict(X_test_tmp)
show_scores(y_test_tmp, pred_tmp)

tuning前
accuracy:  0.6201550387596899
precision:  0.625
recall:  0.7246376811594203
f1 score:  0.6711409395973154
tuning後
accuracy:  0.6782945736434108
precision:  0.655367231638418
recall:  0.8405797101449275
f1 score:  0.7365079365079366


In [16]:
# optunaの過程を可視化
optuna.visualization.plot_optimization_history(study).show()

ImportError: Tried to import 'plotly' but failed. Please make sure that the package is installed correctly to use this feature. Actual error: No module named 'plotly'.

## subtype毎の予測

In [17]:
for year in range(5, 16, 5):  # 予後年数毎のループ
    print("====={0:0=2}".format(year) * 10)

    for subtype in pd.read_pickle(
        config.INTERIM_PICKLE_PREPROCESSED_PROGNOSIS_CROSS_DIR + "/df_cross.pkl"
    )["CLAUDIN_SUBTYPE"].unique():
        print("-----" * 10)
        print(subtype)
        X_train_tmp = df_dict["chi2"]["claudin_subtype"]["train"][
            "X{0:0=2}_{1}".format(year, subtype)
        ]
        y_train_tmp = df_dict["chi2"]["claudin_subtype"]["train"][
            "y{0:0=2}_{1}".format(year, subtype)
        ]
        X_test_tmp = df_dict["chi2"]["claudin_subtype"]["test"][
            "X{0:0=2}_{1}".format(year, subtype)
        ]
        y_test_tmp = df_dict["chi2"]["claudin_subtype"]["test"][
            "y{0:0=2}_{1}".format(year, subtype)
        ]
        assert X_train_tmp.shape[0] == y_train_tmp.shape[0], "train size is incorrect"
        assert X_test_tmp.shape[0] == y_test_tmp.shape[0], "test size is incorrect"

        print("tuning前")
        rf = RandomForestClassifier(random_state=SEED)
        rf.fit(X_train_tmp, y_train_tmp)
        pred_tmp = rf.predict(X_test_tmp)
        show_scores(y_test_tmp, pred_tmp)

        print("tuning後")
        rf = RandomForestClassifier(
            n_estimators=705,
            criterion="entropy",
            max_depth=2,
            max_leaf_nodes=13,
            max_features="log2",
            random_state=SEED,
        )
        rf.fit(X_train_tmp, y_train_tmp)
        pred_tmp = rf.predict(X_test_tmp)
        show_scores(y_test_tmp, pred_tmp)

=====05=====05=====05=====05=====05=====05=====05=====05=====05=====05
--------------------------------------------------
claudin-low
tuning前
accuracy:  0.7954545454545454
precision:  0.0
recall:  0.0
f1 score:  0.0
tuning後
accuracy:  0.8181818181818182
precision:  0.0
recall:  0.0
f1 score:  0.0
--------------------------------------------------
LumA
tuning前


  _warn_prf(average, modifier, msg_start, len(result))


accuracy:  0.9261744966442953
precision:  0.3333333333333333
recall:  0.1
f1 score:  0.15384615384615383
tuning後
accuracy:  0.9328859060402684
precision:  0.0
recall:  0.0
f1 score:  0.0
--------------------------------------------------
LumB
tuning前


  _warn_prf(average, modifier, msg_start, len(result))


accuracy:  0.8260869565217391
precision:  0.3333333333333333
recall:  0.05263157894736842
f1 score:  0.09090909090909091
tuning後
accuracy:  0.8347826086956521
precision:  0.0
recall:  0.0
f1 score:  0.0
--------------------------------------------------
Her2
tuning前


  _warn_prf(average, modifier, msg_start, len(result))


accuracy:  0.7058823529411765
precision:  0.6666666666666666
recall:  0.3333333333333333
f1 score:  0.4444444444444444
tuning後
accuracy:  0.6470588235294118
precision:  0.5
recall:  0.16666666666666666
f1 score:  0.25
--------------------------------------------------
Normal
tuning前
accuracy:  0.7714285714285715
precision:  0.25
recall:  0.5
f1 score:  0.3333333333333333
tuning後
accuracy:  0.7428571428571429
precision:  0.14285714285714285
recall:  0.25
f1 score:  0.18181818181818182
--------------------------------------------------
Basal
tuning前
accuracy:  0.4523809523809524
precision:  0.3
recall:  0.15789473684210525
f1 score:  0.20689655172413793
tuning後
accuracy:  0.47619047619047616
precision:  0.2857142857142857
recall:  0.10526315789473684
f1 score:  0.15384615384615385
=====10=====10=====10=====10=====10=====10=====10=====10=====10=====10
--------------------------------------------------
claudin-low
tuning前
accuracy:  0.6285714285714286
precision:  0.42857142857142855
recall

  _warn_prf(average, modifier, msg_start, len(result))


accuracy:  0.5268817204301075
precision:  0.3142857142857143
recall:  0.3548387096774194
f1 score:  0.3333333333333333
tuning後
accuracy:  0.5913978494623656
precision:  0.36
recall:  0.2903225806451613
f1 score:  0.3214285714285714
--------------------------------------------------
Her2
tuning前
accuracy:  0.574468085106383
precision:  0.5517241379310345
recall:  0.6956521739130435
f1 score:  0.6153846153846154
tuning後
accuracy:  0.5319148936170213
precision:  0.5142857142857142
recall:  0.782608695652174
f1 score:  0.6206896551724138
--------------------------------------------------
Normal
tuning前
accuracy:  0.6666666666666666
precision:  0.45454545454545453
recall:  0.7142857142857143
f1 score:  0.5555555555555556
tuning後
accuracy:  0.7083333333333334
precision:  0.5
recall:  0.7142857142857143
f1 score:  0.588235294117647
--------------------------------------------------
Basal
tuning前
accuracy:  0.5263157894736842
precision:  0.6923076923076923
recall:  0.391304347826087
f1 score: 

### lightGBM

In [None]:
from lightgbm import LGBMClassifier


def objective(trial):
    params = {
        "objective": "binary",
        "metric": "binary_logloss",
        "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 10.0),
        "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 10.0),
        "max_bin": trial.suggest_int("max_bin", 100, 300),
        "num_leaves": trial.suggest_int("num_leaves", 20, 50),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-3, 1e-1),
        "n_estimators": trial.suggest_int("n_estimators", 10, 1000),
    }
    clf = LGBMClassifier(boosting_type="gbdt", **params, random_state=SEED)
    # 10分割交差検証によるテストデータのaccuracyの出力
    score = cross_val_score(clf, X, y, n_jobs=-1, cv=10)
    accuracy = score.mean()
    return accuracy


year = 15
X_train_tmp = df_dict["chi2"]["boruta"]["train"]["X{0:0=2}".format(year)]
y_train_tmp = df_dict["chi2"]["boruta"]["train"]["y{0:0=2}".format(year)]
X_test_tmp = df_dict["chi2"]["boruta"]["test"]["X{0:0=2}".format(year)]
y_test_tmp = df_dict["chi2"]["boruta"]["test"]["y{0:0=2}".format(year)]

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

In [None]:
# 最も良いパラメータ
print(f"The best value is : \n {study.best_value}")
print(f"The best parameters are : \n {study.best_params}")