In [1]:
import optuna

In [2]:
optuna.__version__

'1.5.0'

In [3]:
from sklearn.model_selection import cross_val_score

In [4]:
from sklearn.tree import DecisionTreeClassifier

In [5]:
def objective(trial, x, t, cv):
    max_depth = trial.suggest_int('max_depth', 2, 100)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 100)

    estimator = DecisionTreeClassifier(
        max_depth=max_depth,
        min_samples_split=min_samples_split,
    )

    print('Current_params:', trial.params)
    accuracy = cross_val_score(estimator, x, t, cv=cv).mean()

    return 1 - accuracy

In [6]:
cv = 5

In [7]:
study = optuna.create_study(sampler=optuna.samplers.RandomSampler(0))

In [8]:
from sklearn.datasets import load_breast_cancer
dataset = load_breast_cancer()

t = dataset.target
x = dataset.data

from sklearn.model_selection import train_test_split
x_train_val, x_test, t_train_val, t_test = train_test_split(x, t, test_size=0.2, random_state=1)
x_train, x_val, t_train, t_val = train_test_split(x_train_val, t_train_val, test_size=0.3, random_state=1)

In [9]:
study.optimize(lambda trial: objective(trial, x_train_val, t_train_val, cv), n_trials=10)

Current_params: {'max_depth': 46, 'min_samples_split': 49}


[I 2020-06-19 16:43:11,625] Finished trial#0 with value: 0.07912087912087906 with parameters: {'max_depth': 46, 'min_samples_split': 49}. Best is trial#0 with value: 0.07912087912087906.


Current_params: {'max_depth': 66, 'min_samples_split': 69}


[I 2020-06-19 16:43:11,802] Finished trial#1 with value: 0.07912087912087906 with parameters: {'max_depth': 66, 'min_samples_split': 69}. Best is trial#0 with value: 0.07912087912087906.


Current_params: {'max_depth': 69, 'min_samples_split': 11}


[I 2020-06-19 16:43:11,989] Finished trial#2 with value: 0.06373626373626373 with parameters: {'max_depth': 69, 'min_samples_split': 11}. Best is trial#2 with value: 0.06373626373626373.


Current_params: {'max_depth': 85, 'min_samples_split': 23}


[I 2020-06-19 16:43:12,180] Finished trial#3 with value: 0.07472527472527468 with parameters: {'max_depth': 85, 'min_samples_split': 23}. Best is trial#2 with value: 0.06373626373626373.


Current_params: {'max_depth': 38, 'min_samples_split': 89}


[I 2020-06-19 16:43:12,369] Finished trial#4 with value: 0.08131868131868125 with parameters: {'max_depth': 38, 'min_samples_split': 89}. Best is trial#2 with value: 0.06373626373626373.


Current_params: {'max_depth': 72, 'min_samples_split': 90}


[I 2020-06-19 16:43:12,554] Finished trial#5 with value: 0.07912087912087906 with parameters: {'max_depth': 72, 'min_samples_split': 90}. Best is trial#2 with value: 0.06373626373626373.


Current_params: {'max_depth': 90, 'min_samples_split': 14}


[I 2020-06-19 16:43:12,733] Finished trial#6 with value: 0.07252747252747249 with parameters: {'max_depth': 90, 'min_samples_split': 14}. Best is trial#2 with value: 0.06373626373626373.


Current_params: {'max_depth': 60, 'min_samples_split': 67}


[I 2020-06-19 16:43:12,908] Finished trial#7 with value: 0.08131868131868125 with parameters: {'max_depth': 60, 'min_samples_split': 67}. Best is trial#2 with value: 0.06373626373626373.


Current_params: {'max_depth': 41, 'min_samples_split': 89}


[I 2020-06-19 16:43:13,089] Finished trial#8 with value: 0.07912087912087906 with parameters: {'max_depth': 41, 'min_samples_split': 89}. Best is trial#2 with value: 0.06373626373626373.


Current_params: {'max_depth': 48, 'min_samples_split': 90}


[I 2020-06-19 16:43:13,287] Finished trial#9 with value: 0.07912087912087906 with parameters: {'max_depth': 48, 'min_samples_split': 90}. Best is trial#2 with value: 0.06373626373626373.


In [10]:
print(study.best_trial)

FrozenTrial(number=2, value=0.06373626373626373, datetime_start=datetime.datetime(2020, 6, 19, 16, 43, 11, 804903), datetime_complete=datetime.datetime(2020, 6, 19, 16, 43, 11, 989481), params={'max_depth': 69, 'min_samples_split': 11}, distributions={'max_depth': IntUniformDistribution(high=100, low=2, step=1), 'min_samples_split': IntUniformDistribution(high=100, low=2, step=1)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=2, state=TrialState.COMPLETE)


In [11]:
print(study.best_params)

{'max_depth': 69, 'min_samples_split': 11}


In [12]:
model = DecisionTreeClassifier(**study.best_params)
model.fit(x_train_val, t_train_val)

DecisionTreeClassifier(max_depth=69, min_samples_split=11)

In [13]:
print(model.score(x_train_val, t_train_val))
print(model.score(x_test, t_test))

0.9934065934065934
0.9473684210526315
