<a href="https://colab.research.google.com/github/maskot1977/tmd2022/blob/DKDytdpdfBG1tpbY/tmd2022_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

「AI創薬・ケモインフォマティクス入門」講義資料　（講師：小寺正明）

2月15日(土)19:40～21:10 第4回「化学記述子」

# Optunaによる学習結果保存のための設定

In [None]:
# 以下の設定は必要に応じて適宜調整してください。
dateflag = "0216a"  # 解析日を記録するための変数
MODEL_PATH = "./drive/MyDrive/tmd2022-3/"  # データの保存場所を指定するための変数
learning_time_limit = 300  # １つの学習器あたりに許す最大の学習時間（秒）
timeout_optuna = 600  # Optuna による反復計算に許す最大の学習時間（秒）
n_trials_optuna = 10  # Optuna による反復計算の最大回数（普通は100や1000などの数字を入れる）

# Google Colaboratory から Google Drive へのマウント

In [None]:
# Google Colaboratory から Google Drive にマウント

from google.colab import drive

drive.mount("/content/drive")

In [None]:
# もしデータ保存場所がなければ作る

import os

if not os.path.exists(MODEL_PATH):
    os.makedirs(MODEL_PATH)

# Optuna のインストール

In [None]:
!pip install optuna

# RDKit のインストール

In [None]:
!wget -c https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
!chmod +x Miniconda3-latest-Linux-x86_64.sh
!bash ./Miniconda3-latest-Linux-x86_64.sh -b -f -p /usr/local
!conda install -q -y -c rdkit rdkit python=3.7
import sys

sys.path.append("/usr/local/lib/python3.7/site-packages/")

# 化合物データ取得

In [None]:
import pandas as pd

# csvからのデータ読み込み
url = "https://raw.githubusercontent.com/maskot1977/toydata/main/data/data_18.csv"
df_reg = pd.read_csv(url)
df_reg

# 目的変数

In [None]:
Y = df_reg["Melting point"]
Y.hist(bins=20)

## 回帰用データを分類用データに変換（練習のため）

In [None]:
import numpy as np

df_cla = pd.DataFrame(
    np.where(df_reg > df_reg.describe().median() * 1.8, 1, 0), columns=df_reg.columns
)
df_cla

In [None]:
Y2 = df_cla["Melting point"]
Y2.hist(bins=20)

# RDKit supporter

In [None]:
!pip install git+https://github.com/maskot1977/rdkit_supporter.git

## RDKit 記述子

In [None]:
%%time
from rdkit_supporter.descriptors import calc_descriptors

rdkit_df = calc_descriptors(df_reg["Open Babel SMILES"])
display(rdkit_df)

## フィンガープリント


In [None]:
# rdkit_supporter で取り扱えるフィンガープリントのリスト
from rdkit_supporter.fingerprints import Fingerprinter

fingerprinter = Fingerprinter()
fingerprinter.names

In [None]:
fp_type = "ECFP2"
fp_df = pd.DataFrame(
    [
        vec
        for vec in fingerprinter.transform(df_reg["Open Babel SMILES"], fp_type=fp_type)
    ]
)
fp_df

# 説明変数

In [None]:
X = fp_df

# 欠損値の補間

In [None]:
from sklearn.svm import SVR

model = SVR()
model.fit(X, Y)

In [None]:
from sklearn.impute import KNNImputer

imputer = KNNImputer()
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

In [None]:
from sklearn.svm import SVR

model = SVR()
model.fit(X, Y)

# データ分割

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.33, random_state=53
)
X_tra, X_val, Y_tra, Y_val = train_test_split(
    X_train, Y_train, test_size=0.5, random_state=53
)

In [None]:
X_tra.shape, X_val.shape, X_test.shape, Y_tra.shape, Y_val.shape, Y_test.shape

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test, Y2_train, Y2_test = train_test_split(
    X, Y, Y2, test_size=0.33, random_state=53
)
X_tra, X_val, Y_tra, Y_val, Y2_tra, Y2_val = train_test_split(
    X_train, Y_train, Y2_train, test_size=0.5, random_state=53
)

In [None]:
Y2_tra.shape, Y2_val.shape, Y2_test.shape

# 基本

## 回帰問題

In [None]:
performance_record = {}

# LinearRegression

In [None]:
from rdkit_supporter import depict
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = LinearRegression()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["LR"] = [r1o, r1b, r2o, r2b]

# Ridge

In [None]:
from rdkit_supporter import depict
from sklearn.linear_model import Ridge

model = Ridge()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = Ridge()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["R"] = [r1o, r1b, r2o, r2b]

# Lasso

In [None]:
from rdkit_supporter import depict
from sklearn.linear_model import Lasso

model = Lasso()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = Lasso()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["L"] = [r1o, r1b, r2o, r2b]

# ElasticNet

In [None]:
from rdkit_supporter import depict
from sklearn.linear_model import ElasticNet

model = ElasticNet()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = ElasticNet()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["EN"] = [r1o, r1b, r2o, r2b]

# BayesianRidge

In [None]:
from rdkit_supporter import depict
from sklearn.linear_model import BayesianRidge

model = BayesianRidge()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = BayesianRidge()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["BR"] = [r1o, r1b, r2o, r2b]

# SVR

In [None]:
from rdkit_supporter import depict
from sklearn.svm import SVR

model = SVR()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = SVR()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["SVR"] = [r1o, r1b, r2o, r2b]

# KNeighborsRegressor

In [None]:
from rdkit_supporter import depict
from sklearn.neighbors import KNeighborsRegressor

model = KNeighborsRegressor()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = KNeighborsRegressor()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["KN"] = [r1o, r1b, r2o, r2b]

# GaussianProcessRegressor

In [None]:
from rdkit_supporter import depict
from sklearn.gaussian_process import GaussianProcessRegressor

model = GaussianProcessRegressor()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = GaussianProcessRegressor()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["GP"] = [r1o, r1b, r2o, r2b]

# PLSRegression

In [None]:
from rdkit_supporter import depict
from sklearn.cross_decomposition import PLSRegression

model = PLSRegression()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = PLSRegression()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["PLS"] = [r1o, r1b, r2o, r2b]

# DecisionTreeRegressor

In [None]:
from rdkit_supporter import depict
from sklearn.tree import DecisionTreeRegressor

model = DecisionTreeRegressor()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = DecisionTreeRegressor()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["DT"] = [r1o, r1b, r2o, r2b]

# RandomForestRegressor

In [None]:
from rdkit_supporter import depict
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = RandomForestRegressor()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["RF"] = [r1o, r1b, r2o, r2b]

# AdaBoostRegressor

In [None]:
from rdkit_supporter import depict
from sklearn.ensemble import AdaBoostRegressor

model = AdaBoostRegressor()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = AdaBoostRegressor()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["AB"] = [r1o, r1b, r2o, r2b]

# ExtraTreesRegressor

In [None]:
from rdkit_supporter import depict
from sklearn.ensemble import ExtraTreesRegressor

model = ExtraTreesRegressor()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = ExtraTreesRegressor()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["ET"] = [r1o, r1b, r2o, r2b]

# HistGradientBoostingRegressor

In [None]:
from rdkit_supporter import depict
from sklearn.ensemble import HistGradientBoostingRegressor

model = HistGradientBoostingRegressor()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = HistGradientBoostingRegressor()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["GB"] = [r1o, r1b, r2o, r2b]

# MLPRegressor

In [None]:
from rdkit_supporter import depict
from sklearn.neural_network import MLPRegressor

model = MLPRegressor()
model.fit(X, Y)
r1o, r2o = depict.regression_metrics(model, X, Y)

model = MLPRegressor()
model.fit(X_train, Y_train)
r1b, r2b = depict.regression_metrics(model, X_test, Y_test)

In [None]:
performance_record["MLP"] = [r1o, r1b, r2o, r2b]

# 回帰手法間の比較

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(14, 8))
for name, v in performance_record.items():
    axes[0].scatter(v[0], v[1], label=name)
    axes[0].text(v[0], v[1], name, alpha=0.6)
    axes[1].scatter(v[0], v[2], label=name)
    axes[1].text(v[0], v[2], name, alpha=0.6)
    axes[1].scatter(v[1], v[3], label=name)
    axes[1].text(v[1], v[3], name, alpha=0.6)
    axes[2].scatter(v[2], v[3], label=name)
    axes[2].text(v[2], v[3], name, alpha=0.6)
axes[0].set_xlabel("R (all data)")
axes[0].set_ylabel("R (split data)")
axes[0].grid()
axes[1].set_xlabel("R")
axes[1].set_ylabel("R2")
axes[1].grid()
axes[2].set_xlabel("R2 (all data)")
axes[2].set_ylabel("R2 (split data)")
axes[2].grid()

# 分類問題

In [None]:
performance_record = {}

## LogisticRegression

In [None]:
from rdkit_supporter import depict
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = LogisticRegression()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["LR"] = [po, pb, ro, rb]

## PassiveAggressiveClassifier

In [None]:
from rdkit_supporter import depict
from sklearn.linear_model import PassiveAggressiveClassifier

model = PassiveAggressiveClassifier()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = PassiveAggressiveClassifier()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["PA"] = [po, pb, ro, rb]

## Perceptron

In [None]:
from rdkit_supporter import depict
from sklearn.linear_model import Perceptron

model = Perceptron()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = Perceptron()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["P"] = [po, pb, ro, rb]

## SGDClassifier

In [None]:
from rdkit_supporter import depict
from sklearn.linear_model import SGDClassifier

model = SGDClassifier()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = SGDClassifier()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["SGD"] = [po, pb, ro, rb]

## RidgeClassifier

In [None]:
from rdkit_supporter import depict
from sklearn.linear_model import RidgeClassifier

model = RidgeClassifier()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = RidgeClassifier()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["R"] = [po, pb, ro, rb]

## LinearDiscriminantAnalysis

In [None]:
from rdkit_supporter import depict
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

model = LinearDiscriminantAnalysis()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = LinearDiscriminantAnalysis()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["LDA"] = [po, pb, ro, rb]

## QuadraticDiscriminantAnalysis

In [None]:
from rdkit_supporter import depict
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

model = QuadraticDiscriminantAnalysis()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = QuadraticDiscriminantAnalysis()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["QDA"] = [po, pb, ro, rb]

## SVC

In [None]:
from rdkit_supporter import depict
from sklearn.svm import SVC

model = SVC()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = SVC()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["SVC"] = [po, pb, ro, rb]

## KNeighborsClassifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = KNeighborsClassifier()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["KN"] = [po, pb, ro, rb]

## GaussianProcessClassifier

In [None]:
from sklearn.gaussian_process import GaussianProcessClassifier

model = GaussianProcessClassifier()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = GaussianProcessClassifier()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["GP"] = [po, pb, ro, rb]

## GaussianNB

In [None]:
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = GaussianNB()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["GNB"] = [po, pb, ro, rb]

## BernoulliNB

In [None]:
from sklearn.naive_bayes import BernoulliNB

model = BernoulliNB()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = BernoulliNB()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["BNB"] = [po, pb, ro, rb]

## DecisionTreeClassifier

In [None]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = DecisionTreeClassifier()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["DT"] = [po, pb, ro, rb]

## RandomForestClassifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = RandomForestClassifier()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["RF"] = [po, pb, ro, rb]

## AdaBoostClassifier

In [None]:
from sklearn.ensemble import AdaBoostClassifier

model = AdaBoostClassifier()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = AdaBoostClassifier()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["AB"] = [po, pb, ro, rb]

## ExtraTreesClassifier

In [None]:
from sklearn.ensemble import ExtraTreesClassifier

model = ExtraTreesClassifier()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = ExtraTreesClassifier()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["ET"] = [po, pb, ro, rb]

## HistGradientBoostingClassifier

In [None]:
from sklearn.ensemble import HistGradientBoostingClassifier

model = HistGradientBoostingClassifier()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = HistGradientBoostingClassifier()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["GB"] = [po, pb, ro, rb]

## MLPClassifier

In [None]:
from sklearn.neural_network import MLPClassifier

model = MLPClassifier()
model.fit(X, Y2)
po, ro = depict.classification_metrics(model, X, Y2)

model = MLPClassifier()
model.fit(X_train, Y2_train)
pb, rb = depict.classification_metrics(model, X_test, Y2_test)

In [None]:
performance_record["MLP"] = [po, pb, ro, rb]

## 分類手法間の比較

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(14, 8))
for name, v in performance_record.items():
    axes[0].scatter(v[0], v[1], label=name)
    axes[0].text(v[0], v[1], name, alpha=0.6)

    axes[1].scatter(v[2], v[0], label=name)
    axes[1].text(v[2], v[0], name + "(a)", alpha=0.6)
    axes[1].scatter(v[3], v[1], label=name)
    axes[1].text(v[3], v[1], name + "(s)", alpha=0.6)

    axes[2].scatter(v[2], v[3], label=name)
    axes[2].text(v[2], v[3], name, alpha=0.6)
axes[0].set_xlabel("Precision (all data)")
axes[0].set_ylabel("Precision (split data)")
axes[0].grid()
axes[1].set_xlabel("Recall")
axes[1].set_ylabel("Precision")
axes[1].grid()
axes[2].set_xlabel("Recall (all data)")
axes[2].set_ylabel("Recall (split data)")
axes[2].grid()
axes[0].set_aspect(1.0)
axes[1].set_aspect(1.0)
axes[2].set_aspect(1.0)

# Model Selection

In [None]:
from rdkit_supporter import depict
from sklearn.svm import SVR

model = SVR()
model.fit(X, Y)
depict.regression_metrics(model, X, Y)

model = SVR()
model.fit(X_train, Y_train)
depict.regression_metrics(model, X_test, Y_test)

## GridSearchCV

In [None]:
from sklearn.model_selection import GridSearchCV

parameters = dict(
    {
        "C": [1, 10],
        "kernel": ["poly", "rbf"],
        "gamma": ["auto", "scale"],
        "degree": [1, 3],
        "max_iter": [530000],
    }
)
model = GridSearchCV(SVR(), parameters, cv=2, n_jobs=-1, verbose=3)
model.fit(X_train, Y_train)
depict.regression_metrics(model, X_test, Y_test)
model.best_estimator_

## HalvingGridSearchCV

In [None]:
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV

parameters = dict(
    {
        "C": [1, 10],
        "kernel": ["poly", "rbf"],
        "gamma": ["auto", "scale"],
        "degree": [1, 3],
        "max_iter": [530000],
    }
)
model = HalvingGridSearchCV(SVR(), parameters, cv=2, n_jobs=-1, verbose=3)
model.fit(X_train, Y_train)
depict.regression_metrics(model, X_test, Y_test)
model.best_estimator_

## RandomizedSearchCV

In [None]:
from scipy.stats import randint, uniform
from sklearn.model_selection import RandomizedSearchCV

parameters = dict(
    {
        "C": uniform(1e-4, 1e4),
        "kernel": ["poly", "rbf"],
        "gamma": ["auto", "scale"],
        "degree": randint(1, 3),
        "max_iter": [530000],
    }
)
model = RandomizedSearchCV(SVR(), parameters, cv=2, n_jobs=-1, verbose=3)
model.fit(X_train, Y_train)
depict.regression_metrics(model, X_test, Y_test)
model.best_estimator_

## HalvingRandomSearchCV

In [None]:
from scipy.stats import randint, uniform
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingRandomSearchCV

parameters = dict(
    {
        "C": uniform(1e-4, 1e4),
        "kernel": ["poly", "rbf"],
        "gamma": ["auto", "scale"],
        "degree": randint(1, 3),
        "max_iter": [530000],
    }
)
model = HalvingRandomSearchCV(SVR(), parameters, cv=2, n_jobs=-1, verbose=3)
model.fit(X_train, Y_train)
depict.regression_metrics(model, X_test, Y_test)
model.best_estimator_

# Optunaによる多目的最適化

In [None]:
from functools import wraps


# 学習に時間がかかりすぎる場合に強制終了するための方法
def on_timeout(limit, handler, hint=None):
    def notify_handler(signum, frame):
        handler(
            "'%s' terminated since it did not finish in %d seconds." % (hint, limit)
        )

    def __decorator(function):
        def __wrapper(*args, **kwargs):
            import signal

            signal.signal(signal.SIGALRM, notify_handler)
            signal.alarm(limit)
            result = function(*args, **kwargs)
            signal.alarm(0)
            return result

        return wraps(function)(__wrapper)

    return __decorator


def handler_func(msg):
    print(msg)

In [None]:
import copy
import time

from sklearn.metrics import matthews_corrcoef, r2_score


# Optunaでチューニングするための基本クラス
class BestTune:
    def __init__(self, x_train, x_valid, t_train, t_valid, task="regressor"):
        # 訓練データを格納
        self.x_train = x_train
        self.t_train = t_train

        # 検証データを格納
        self.x_valid = x_valid
        self.t_valid = t_valid

        # regressor か classifier か
        self.task = task
        if self.task[0] == "r" or self.task[0] == "R":
            self.measure = r2_score
        else:
            self.measure = matthews_corrcoef

        # ベストモデルとスコアを格納
        self.best_score = None
        self.best_estimator_ = None

    def get_params(self, trial):
        raise NotImplementedError()

    def get_base_model(self):
        raise NotImplementedError()

    @on_timeout(limit=learning_time_limit, handler=handler_func, hint=u"BestTune")
    def fit(self, trial):
        model = self.get_base_model()(**self.get_params(trial))
        model.fit(self.x_train, self.t_train)
        return model

    def __call__(self, trial):
        start_time = time.perf_counter()
        # 教師データで学習
        model = self.fit(trial)

        # 検証データの予測性能を評価
        score = self.measure(model.predict(self.x_valid), self.t_valid)
        end_time = time.perf_counter()

        # ベストスコアが出れば、そのベストモデルを記録
        if self.best_estimator_ is None or self.best_score < score:
            self.best_score = score
            self.best_estimator_ = copy.deepcopy(model)

        # 多目的最適化
        return max(-1, score), end_time - start_time

In [None]:
# Support Vector Machine
from sklearn.svm import SVC, SVR


class tune_SVM(BestTune):
    def get_base_model(self):
        if self.task[0] == "r" or self.task[0] == "R":
            return SVR
        else:
            return SVC

    def default_params(self):
        params = {
            "C": 1.0,
            "gamma": 1 / len(self.x_train.shape[1]),
        }
        if self.task[0] == "r" or self.task[0] == "R":
            params["epsilon"] = 0.1
        else:
            params["class_weight"] = None
        return params

    def get_params(self, trial):
        # チューニングしたいパラメータの範囲を設定
        params = {}
        params["C"] = trial.suggest_float("C", 1e-10, 1e10, log=True)
        params["gamma"] = trial.suggest_float("gamma", 1e-10, 1e10, log=True)
        params["max_iter"] = 530000

        if self.task[0] == "r" or self.task[0] == "R":
            params["epsilon"] = trial.suggest_float("epsilon", 1e-10, 1e10, log=True)
        else:
            params["class_weight"] = trial.suggest_categorical(
                "class_weight", ["balanced", None]
            )
        return params

In [None]:
# K-Neighbors
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor


class tune_KN(BestTune):
    def get_base_model(self):
        if self.task[0] == "r" or self.task[0] == "R":
            return KNeighborsRegressor
        else:
            return KNeighborsClassifier

    def default_params(self):
        params = {"algorithm": "brute", "n_neighbors": 5, "weights": "uniform"}
        if self.task[0] == "r" or self.task[0] == "R":
            pass
        else:
            pass
        return params

    def get_params(self, trial):
        params = {}
        params["algorithm"] = trial.suggest_categorical(
            "algorithm", ["ball_tree", "kd_tree", "brute"]
        )
        params["n_neighbors"] = trial.suggest_int("n_neighbors", 1, 10)
        params["weights"] = trial.suggest_categorical(
            "weights", ["uniform", "distance"]
        )
        return params

In [None]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor


class tune_DT(BestTune):
    def get_base_model(self):
        if self.task[0] == "r" or self.task[0] == "R":
            return DecisionTreeRegressor
        else:
            return DecisionTreeClassifier

    def default_params(self):
        params = {
            "max_depth": 100,
            "min_samples_leaf": 2,
        }
        if self.task[0] == "r" or self.task[0] == "R":
            params["criterion"] = "squared_error"
        else:
            params["gini"] = "squared_error"
        return params

    def get_params(self, trial):
        params = {}
        params["max_depth"] = trial.suggest_int("max_depth", 1, 100)
        params["min_samples_leaf"] = trial.suggest_int("min_samples_leaf", 1, 10)
        if self.task[0] == "r" or self.task[0] == "R":
            params["criterion"] = trial.suggest_categorical(
                "criterion", ["squared_error", "friedman_mse", "absolute_error"]
            )
        else:
            params["criterion"] = trial.suggest_categorical(
                "criterion", ["gini", "entropy"]  # , "log_loss"]
            )
        return params

In [None]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor


class tune_RF(BestTune):
    def get_base_model(self):
        if self.task[0] == "r" or self.task[0] == "R":
            return RandomForestRegressor
        else:
            return RandomForestClassifier

    def default_params(self):
        params = {
            "max_depth": 100,
            "min_samples_leaf": 1,
        }
        if self.task[0] == "r" or self.task[0] == "R":
            params["criterion"] = "squared_error"
        else:
            params["criterion"] = "gini"
        return params

    def get_params(self, trial):
        params = {}
        params["max_depth"] = trial.suggest_int("max_depth", 1, 100)
        params["min_samples_leaf"] = trial.suggest_int("min_samples_leaf", 1, 10)
        if self.task[0] == "r" or self.task[0] == "R":
            params["criterion"] = trial.suggest_categorical(
                "criterion", ["squared_error", "friedman_mse", "absolute_error"]
            )
        else:
            params["criterion"] = trial.suggest_categorical(
                "criterion", ["gini", "entropy"]  # , "log_loss"]
            )
        return params

In [None]:
# ExtraTrees
from sklearn.ensemble import ExtraTreesClassifier, ExtraTreesRegressor


class tune_ET(BestTune):
    def get_base_model(self):
        if self.task[0] == "r" or self.task[0] == "R":
            return ExtraTreesRegressor
        else:
            return ExtraTreesClassifier

    def default_params(self):
        params = {
            "max_depth": 100,
            "min_samples_leaf": 1,
        }
        if self.task[0] == "r" or self.task[0] == "R":
            params["criterion"] = "squared_error"
        else:
            params["criterion"] = "gini"
        return params

    def get_params(self, trial):
        params = {}
        params["max_depth"] = trial.suggest_int("max_depth", 1, 100)
        params["min_samples_leaf"] = trial.suggest_int("min_samples_leaf", 1, 10)
        if self.task[0] == "r" or self.task[0] == "R":
            params["criterion"] = trial.suggest_categorical(
                "criterion", ["squared_error", "friedman_mse", "absolute_error"]
            )
        else:
            params["criterion"] = trial.suggest_categorical(
                "criterion", ["gini", "entropy"]  # , "log_loss"]
            )
        return params

In [None]:
# GradientBoosting

from sklearn.ensemble import (
    HistGradientBoostingClassifier,
    HistGradientBoostingRegressor,
)


class tune_GB(BestTune):
    def get_base_model(self):
        if self.task[0] == "r" or self.task[0] == "R":
            return HistGradientBoostingRegressor
        else:
            return HistGradientBoostingClassifier

    def default_params(self):
        params = {
            "max_depth": 100,
            "min_samples_leaf": 10,
        }
        if self.task[0] == "r" or self.task[0] == "R":
            params["loss"] = "squared_error"
        else:
            params["loss"] = "log_loss"
        return params

    def get_params(self, trial):
        params = {}
        params["max_depth"] = trial.suggest_int("max_depth", 1, 100)
        params["min_samples_leaf"] = trial.suggest_int("min_samples_leaf", 1, 10)
        if self.task[0] == "r" or self.task[0] == "R":
            params["loss"] = trial.suggest_categorical(
                "loss", ["squared_error", "absolute_error"]
            )
        else:
            params["loss"] = trial.suggest_categorical(
                "loss", ["auto", "binary_crossentropy", "log_loss"]
            )
        return params

In [None]:
# Multi-Layer Perceptron
from sklearn.neural_network import MLPClassifier, MLPRegressor


class tune_MLP(BestTune):
    def get_base_model(self):
        if self.task[0] == "r" or self.task[0] == "R":
            return MLPRegressor
        else:
            return MLPClassifier

    def default_params(self):
        params = {
            "n_layer": 1,
            "in_neuron": 100,
            "mid_neuron": 100,
            "out_neuron": 100,
            "activation": "relu",
            "learning_rate": "constant",
            "batch_size": min(200, self.x_train.shape[0]),
            "early_stopping": False,
            "alpha": 0.0001,
            "learning_rate_init": 0.001,
            "max_iter": 530000,
        }
        if self.task[0] == "r" or self.task[0] == "R":
            pass
        else:
            pass
        return params

    def get_params(self, trial):
        params = {}
        n_layer = trial.suggest_int("n_layer", 1, 10)
        in_neuron = trial.suggest_int("in_neuron", 1, 200)
        mid_neuron = trial.suggest_int("mid_neuron", 1, 200)
        out_neuron = trial.suggest_int("out_neuron", 1, 200)
        params["hidden_layer_sizes"] = (
            [in_neuron] + [mid_neuron] * n_layer + [out_neuron]
        )
        params["activation"] = trial.suggest_categorical(
            "activation", ["logistic", "tanh", "relu"]
        )
        params["learning_rate"] = trial.suggest_categorical(
            "learning_rate", ["constant", "invscaling", "adaptive"]
        )
        params["batch_size"] = trial.suggest_int(
            "batch_size",
            min(100, int(self.x_train.shape[0] / 2)),
            max(200, int(self.x_train.shape[0])),
        )
        # params["solver"] = trial.suggest_categorical(
        #    "solver", ["lbfgs", "adam"]
        # )
        params["early_stopping"] = trial.suggest_categorical(
            "early_stopping", [True, False]
        )
        params["alpha"] = trial.suggest_float("alpha", 1e-6, 1e-2, log=True)
        params["learning_rate_init"] = trial.suggest_float(
            "learning_rate_init", 1e-3, 1e-2, log=True
        )
        params["max_iter"] = trial.suggest_int("max_iter", 200, 530000)
        return params

In [None]:
import optuna


# Optuna で学習を繰り返し、学習履歴を保存する
def train(
    study_name,
    tune_model,
    timeout=timeout_optuna,
    n_trials=n_trials_optuna,
    show_progress_bar=True,
):
    import warnings

    warnings.simplefilter("ignore")
    optuna.logging.set_verbosity(optuna.logging.WARN)

    # 学習環境を立ち上げる
    study = optuna.create_study(
        study_name=study_name,
        storage="sqlite:///" + study_name + ".sql",
        load_if_exists=True,
        directions=["maximize", "minimize"],
        sampler=optuna.samplers.NSGAIISampler(seed=2),
    )

    try:
        study.enqueue_trial(study.best_trial.params)
    except:
        try:
            study.enqueue_trial(tune_model.default_params())
        except:
            pass

    # 学習する
    study.optimize(
        tune_model,
        timeout=timeout,
        n_trials=n_trials,
        show_progress_bar=show_progress_bar,
    )
    return tune_model, study

# SVM (regressor)

In [None]:
strage_name = "SVR_{}".format(dateflag)
overfit, overstudy = train(
    "{}{}_overfit".format(MODEL_PATH, strage_name),
    tune_SVM(X_train, X_train, Y_train, Y_train, task="regressor"),
)
bestfit, beststudy = train(
    "{}{}_bestfit".format(MODEL_PATH, strage_name),
    tune_SVM(X_tra, X_val, Y_tra, Y_val, task="regressor"),
)

In [None]:
import matplotlib.pyplot as plt

study = overstudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
import matplotlib.pyplot as plt

study = beststudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
from rdkit_supporter import depict

print(overfit.best_estimator_)
depict.regression_metrics(overfit.best_estimator_, X_test, Y_test)
print(bestfit.best_estimator_)
depict.regression_metrics(bestfit.best_estimator_, X_test, Y_test)

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

# SVM (classifier)

In [None]:
strage_name = "SVC_{}".format(dateflag)
overfit, overstudy = train(
    "{}{}_overfit".format(MODEL_PATH, strage_name),
    tune_SVM(X_train, X_train, Y2_train, Y2_train, task="classifier"),
)
bestfit, beststudy = train(
    "{}{}_bestfit".format(MODEL_PATH, strage_name),
    tune_SVM(X_tra, X_val, Y2_tra, Y2_val, task="classifier"),
)

In [None]:
import matplotlib.pyplot as plt

study = overstudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
import matplotlib.pyplot as plt

study = beststudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
print(overfit.best_estimator_)
depict.classification_metrics(overfit.best_estimator_, X_test, Y2_test)
print(bestfit.best_estimator_)
depict.classification_metrics(bestfit.best_estimator_, X_test, Y2_test)

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

# K-Neighbors (regressor)

In [None]:
strage_name = "KNR_{}".format(dateflag)
overfit, overstudy = train(
    "{}{}_overfit".format(MODEL_PATH, strage_name),
    tune_KN(X_train, X_train, Y_train, Y_train, task="regressor"),
)
bestfit, beststudy = train(
    "{}{}_bestfit".format(MODEL_PATH, strage_name),
    tune_KN(X_tra, X_val, Y_tra, Y_val, task="regressor"),
)

In [None]:
import matplotlib.pyplot as plt

study = overstudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
import matplotlib.pyplot as plt

study = beststudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
print(overfit.best_estimator_)
depict.regression_metrics(overfit.best_estimator_, X_test, Y_test)
print(bestfit.best_estimator_)
depict.regression_metrics(bestfit.best_estimator_, X_test, Y_test)

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

# K-Neighbors (classifier)

In [None]:
strage_name = "KNC_{}".format(dateflag)
overfit, overstudy = train(
    "{}{}_overfit".format(MODEL_PATH, strage_name),
    tune_KN(X_train, X_train, Y2_train, Y2_train, task="classifier"),
)
bestfit, beststudy = train(
    "{}{}_bestfit".format(MODEL_PATH, strage_name),
    tune_KN(X_tra, X_val, Y2_tra, Y2_val, task="classifier"),
)

In [None]:
import matplotlib.pyplot as plt

study = overstudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
import matplotlib.pyplot as plt

study = beststudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
print(overfit.best_estimator_)
depict.classification_metrics(overfit.best_estimator_, X_test, Y2_test)
print(bestfit.best_estimator_)
depict.classification_metrics(bestfit.best_estimator_, X_test, Y2_test)

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

# Decision Tree (regressor)

In [None]:
strage_name = "DTR_{}".format(dateflag)
overfit, overstudy = train(
    "{}{}_overfit".format(MODEL_PATH, strage_name),
    tune_DT(X_train, X_train, Y_train, Y_train, task="regressor"),
)
bestfit, beststudy = train(
    "{}{}_bestfit".format(MODEL_PATH, strage_name),
    tune_DT(X_tra, X_val, Y_tra, Y_val, task="regressor"),
)

In [None]:
import matplotlib.pyplot as plt

study = overstudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
import matplotlib.pyplot as plt

study = beststudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
print(overfit.best_estimator_)
depict.regression_metrics(overfit.best_estimator_, X_test, Y_test)
print(bestfit.best_estimator_)
depict.regression_metrics(bestfit.best_estimator_, X_test, Y_test)

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

# Decision Tree (classifier)

In [None]:
strage_name = "DTC_{}".format(dateflag)
overfit, overstudy = train(
    "{}{}_overfit".format(MODEL_PATH, strage_name),
    tune_DT(X_train, X_train, Y2_train, Y2_train, task="classifier"),
)
bestfit, beststudy = train(
    "{}{}_bestfit".format(MODEL_PATH, strage_name),
    tune_DT(X_tra, X_val, Y2_tra, Y2_val, task="classifier"),
)

In [None]:
import matplotlib.pyplot as plt

study = overstudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
import matplotlib.pyplot as plt

study = beststudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
print(overfit.best_estimator_)
depict.classification_metrics(overfit.best_estimator_, X_test, Y2_test)
print(bestfit.best_estimator_)
depict.classification_metrics(bestfit.best_estimator_, X_test, Y2_test)

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

# Random Forest (regressor)

In [None]:
strage_name = "RFR_{}".format(dateflag)
overfit, overstudy = train(
    "{}{}_overfit".format(MODEL_PATH, strage_name),
    tune_RF(X_train, X_train, Y_train, Y_train, task="regressor"),
)
bestfit, beststudy = train(
    "{}{}_bestfit".format(MODEL_PATH, strage_name),
    tune_RF(X_tra, X_val, Y_tra, Y_val, task="regressor"),
)

In [None]:
import matplotlib.pyplot as plt

study = overstudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
import matplotlib.pyplot as plt

study = beststudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
print(overfit.best_estimator_)
depict.regression_metrics(overfit.best_estimator_, X_test, Y_test)
print(bestfit.best_estimator_)
depict.regression_metrics(bestfit.best_estimator_, X_test, Y_test)

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

# Random Forest (classifier)

In [None]:
strage_name = "RFC_{}".format(dateflag)
overfit, overstudy = train(
    "{}{}_overfit".format(MODEL_PATH, strage_name),
    tune_RF(X_train, X_train, Y2_train, Y2_train, task="classifier"),
)
bestfit, beststudy = train(
    "{}{}_bestfit".format(MODEL_PATH, strage_name),
    tune_RF(X_tra, X_val, Y2_tra, Y2_val, task="classifier"),
)

In [None]:
import matplotlib.pyplot as plt

study = overstudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
import matplotlib.pyplot as plt

study = beststudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
print(overfit.best_estimator_)
depict.classification_metrics(overfit.best_estimator_, X_test, Y2_test)
print(bestfit.best_estimator_)
depict.classification_metrics(bestfit.best_estimator_, X_test, Y2_test)

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

# Extra Trees (regressor)

In [None]:
strage_name = "ETR_{}".format(dateflag)
overfit, overstudy = train(
    "{}{}_overfit".format(MODEL_PATH, strage_name),
    tune_ET(X_train, X_train, Y_train, Y_train, task="regressor"),
)
bestfit, beststudy = train(
    "{}{}_bestfit".format(MODEL_PATH, strage_name),
    tune_ET(X_tra, X_val, Y_tra, Y_val, task="regressor"),
)

In [None]:
import matplotlib.pyplot as plt

study = overstudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
import matplotlib.pyplot as plt

study = beststudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
print(overfit.best_estimator_)
depict.regression_metrics(overfit.best_estimator_, X_test, Y_test)
print(bestfit.best_estimator_)
depict.regression_metrics(bestfit.best_estimator_, X_test, Y_test)

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

# Extra Trees (classifier)

In [None]:
strage_name = "ETC_{}".format(dateflag)
overfit, overstudy = train(
    "{}{}_overfit".format(MODEL_PATH, strage_name),
    tune_ET(X_train, X_train, Y2_train, Y2_train, task="classifier"),
)
bestfit, beststudy = train(
    "{}{}_bestfit".format(MODEL_PATH, strage_name),
    tune_ET(X_tra, X_val, Y2_tra, Y2_val, task="classifier"),
)

In [None]:
import matplotlib.pyplot as plt

study = overstudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
import matplotlib.pyplot as plt

study = beststudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
print(overfit.best_estimator_)
depict.classification_metrics(overfit.best_estimator_, X_test, Y2_test)
print(bestfit.best_estimator_)
depict.classification_metrics(bestfit.best_estimator_, X_test, Y2_test)

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

# Gradient Boosting (regressor)

In [None]:
strage_name = "GBR_{}".format(dateflag)
overfit, overstudy = train(
    "{}{}_overfit".format(MODEL_PATH, strage_name),
    tune_GB(X_train, X_train, Y_train, Y_train, task="regressor"),
)
bestfit, beststudy = train(
    "{}{}_bestfit".format(MODEL_PATH, strage_name),
    tune_GB(X_tra, X_val, Y_tra, Y_val, task="regressor"),
)

In [None]:
import matplotlib.pyplot as plt

study = overstudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
import matplotlib.pyplot as plt

study = beststudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
from rdkit_supporter import depict

print(overfit.best_estimator_)
depict.regression_metrics(overfit.best_estimator_, X_test, Y_test)
print(bestfit.best_estimator_)
depict.regression_metrics(bestfit.best_estimator_, X_test, Y_test)

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

# Gradient Boosting (classifier)

In [None]:
strage_name = "GBC_{}".format(dateflag)
overfit, overstudy = train(
    "{}{}_overfit".format(MODEL_PATH, strage_name),
    tune_GB(X_train, X_train, Y2_train, Y2_train, task="classifier"),
)
bestfit, beststudy = train(
    "{}{}_bestfit".format(MODEL_PATH, strage_name),
    tune_GB(X_tra, X_val, Y2_tra, Y2_val, task="classifier"),
)

In [None]:
import matplotlib.pyplot as plt

study = overstudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
import matplotlib.pyplot as plt

study = beststudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
print(overfit.best_estimator_)
depict.classification_metrics(overfit.best_estimator_, X_test, Y2_test)
print(bestfit.best_estimator_)
depict.classification_metrics(bestfit.best_estimator_, X_test, Y2_test)

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

# Multi-Layer Perceptron (regressor)

In [None]:
strage_name = "MLPR_{}".format(dateflag)
overfit, overstudy = train(
    "{}{}_overfit".format(MODEL_PATH, strage_name),
    tune_MLP(X_train, X_train, Y_train, Y_train, task="regressor"),
)
bestfit, beststudy = train(
    "{}{}_bestfit".format(MODEL_PATH, strage_name),
    tune_MLP(X_tra, X_val, Y_tra, Y_val, task="regressor"),
)

In [None]:
import matplotlib.pyplot as plt

study = overstudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
import matplotlib.pyplot as plt

study = beststudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
print(overfit.best_estimator_)
depict.regression_metrics(overfit.best_estimator_, X_test, Y_test)
print(bestfit.best_estimator_)
depict.regression_metrics(bestfit.best_estimator_, X_test, Y_test)

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

# Multi-Layer Perceptron (classifier)

In [None]:
strage_name = "MLPC_{}".format(dateflag)
overfit, overstudy = train(
    "{}{}_overfit".format(MODEL_PATH, strage_name),
    tune_MLP(X_train, X_train, Y2_train, Y2_train, task="classifier"),
)
bestfit, beststudy = train(
    "{}{}_bestfit".format(MODEL_PATH, strage_name),
    tune_MLP(X_tra, X_val, Y2_tra, Y2_val, task="classifier"),
)

In [None]:
import matplotlib.pyplot as plt

study = overstudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
import matplotlib.pyplot as plt

study = beststudy
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6))
axes[0].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[0] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[1].scatter(
    [trial.number for trial in study.trials if trial.values is not None],
    [trial.values[1] for trial in study.trials if trial.values is not None],
    alpha=0.4,
)
axes[0].grid()
axes[1].grid()
axes[0].set_ylabel("score")
axes[1].set_ylabel("time")
axes[1].set_xlabel("trial")
plt.show()

In [None]:
from rdkit_supporter import depict

print(overfit.best_estimator_)
depict.classification_metrics(overfit.best_estimator_, X_test, Y2_test)
print(bestfit.best_estimator_)
depict.classification_metrics(bestfit.best_estimator_, X_test, Y2_test)

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = overstudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[0],
    target_name="score",
)
fig.show()

In [None]:
study = beststudy
fig = optuna.visualization.plot_slice(
    study,
    params=list(study.trials[0].params.keys()),
    target=lambda t: t.values[1],
    target_name="time",
)
fig.show()