In [1]:
import optuna
from optuna_sklearn import EvaluateFunc, Objective

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split

# test regressor

In [3]:
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [4]:
# boston_dataset
from sklearn.datasets import load_boston
boston = load_boston()
X, y = boston.data, boston.target
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=0)

# 標準化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

In [7]:
# setting for optuna
trial_models = {
    'Extra Trees': ExtraTreesRegressor,
    'svr': SVR,
    
}
trial_condition = {
    'Extra Trees': {
        "n_estimators": ('int', 15, 35),
        "max_depth": ('dis', 15, 35, 5),
        "random_state": 128
    },
    'svr': {
        'C': ('log', 1e0, 1e2),
        'epsilon': ('uni', 1e-1, 1e1)
    }
}
score_metric = mean_squared_error

In [8]:
evaluate = EvaluateFunc(X_train, X_val, y_train, y_val, score_metric)
objective = Objective(evaluate, trial_models, trial_condition)
study = optuna.create_study()  # Create a new study.
study.optimize(objective, n_trials=300)  # Invoke optimization of the objective function.

[I 2019-08-26 23:33:17,800] Finished trial#0 resulted in value: 20.522556696693776. Current best value is 20.522556696693776 with parameters: {'classifier': 'Extra Trees', 'Extra Trees_n_estimators': 18, 'Extra Trees_max_depth': 20.0}.
[I 2019-08-26 23:33:17,916] Finished trial#1 resulted in value: 19.96776233481966. Current best value is 19.96776233481966 with parameters: {'classifier': 'svr', 'svr_C': 99.69072668884247, 'svr_epsilon': 4.550317310284096}.
[I 2019-08-26 23:33:18,125] Finished trial#2 resulted in value: 21.179288526434195. Current best value is 19.96776233481966 with parameters: {'classifier': 'svr', 'svr_C': 99.69072668884247, 'svr_epsilon': 4.550317310284096}.
[I 2019-08-26 23:33:18,256] Finished trial#3 resulted in value: 19.2542051383202. Current best value is 19.2542051383202 with parameters: {'classifier': 'Extra Trees', 'Extra Trees_n_estimators': 31, 'Extra Trees_max_depth': 15.0}.
[I 2019-08-26 23:33:18,374] Finished trial#4 resulted in value: 20.32620441606510

[I 2019-08-26 23:33:21,365] Finished trial#35 resulted in value: 17.958447163618665. Current best value is 17.617393102204527 with parameters: {'classifier': 'svr', 'svr_C': 68.68224168081413, 'svr_epsilon': 0.28633192969956045}.
[I 2019-08-26 23:33:21,477] Finished trial#36 resulted in value: 17.73882370899899. Current best value is 17.617393102204527 with parameters: {'classifier': 'svr', 'svr_C': 68.68224168081413, 'svr_epsilon': 0.28633192969956045}.
[I 2019-08-26 23:33:21,574] Finished trial#37 resulted in value: 18.41235428133013. Current best value is 17.617393102204527 with parameters: {'classifier': 'svr', 'svr_C': 68.68224168081413, 'svr_epsilon': 0.28633192969956045}.
[I 2019-08-26 23:33:21,678] Finished trial#38 resulted in value: 20.6139505666333. Current best value is 17.617393102204527 with parameters: {'classifier': 'svr', 'svr_C': 68.68224168081413, 'svr_epsilon': 0.28633192969956045}.
[I 2019-08-26 23:33:21,782] Finished trial#39 resulted in value: 21.658267756756583.

[I 2019-08-26 23:33:25,701] Finished trial#70 resulted in value: 18.114075027890514. Current best value is 17.539114023678234 with parameters: {'classifier': 'svr', 'svr_C': 86.84855807411373, 'svr_epsilon': 0.1161728919865904}.
[I 2019-08-26 23:33:25,843] Finished trial#71 resulted in value: 18.409395698680342. Current best value is 17.539114023678234 with parameters: {'classifier': 'svr', 'svr_C': 86.84855807411373, 'svr_epsilon': 0.1161728919865904}.
[I 2019-08-26 23:33:25,964] Finished trial#72 resulted in value: 38.30531616994293. Current best value is 17.539114023678234 with parameters: {'classifier': 'svr', 'svr_C': 86.84855807411373, 'svr_epsilon': 0.1161728919865904}.
[I 2019-08-26 23:33:26,074] Finished trial#73 resulted in value: 17.553601401999444. Current best value is 17.539114023678234 with parameters: {'classifier': 'svr', 'svr_C': 86.84855807411373, 'svr_epsilon': 0.1161728919865904}.
[I 2019-08-26 23:33:26,193] Finished trial#74 resulted in value: 17.612480799294207. 

[I 2019-08-26 23:33:30,549] Finished trial#105 resulted in value: 17.535730622862253. Current best value is 17.535730622862253 with parameters: {'classifier': 'svr', 'svr_C': 80.04663980607438, 'svr_epsilon': 0.10057809117289092}.
[I 2019-08-26 23:33:30,681] Finished trial#106 resulted in value: 17.917561583804112. Current best value is 17.535730622862253 with parameters: {'classifier': 'svr', 'svr_C': 80.04663980607438, 'svr_epsilon': 0.10057809117289092}.
[I 2019-08-26 23:33:30,804] Finished trial#107 resulted in value: 17.810656091214483. Current best value is 17.535730622862253 with parameters: {'classifier': 'svr', 'svr_C': 80.04663980607438, 'svr_epsilon': 0.10057809117289092}.
[I 2019-08-26 23:33:30,991] Finished trial#108 resulted in value: 20.87819674274114. Current best value is 17.535730622862253 with parameters: {'classifier': 'svr', 'svr_C': 80.04663980607438, 'svr_epsilon': 0.10057809117289092}.
[I 2019-08-26 23:33:31,151] Finished trial#109 resulted in value: 17.83311445

[I 2019-08-26 23:33:35,980] Finished trial#140 resulted in value: 17.73454274030872. Current best value is 17.511698736954017 with parameters: {'classifier': 'svr', 'svr_C': 93.3444561295953, 'svr_epsilon': 0.10730738517410918}.
[I 2019-08-26 23:33:36,129] Finished trial#141 resulted in value: 30.433886798437836. Current best value is 17.511698736954017 with parameters: {'classifier': 'svr', 'svr_C': 93.3444561295953, 'svr_epsilon': 0.10730738517410918}.
[I 2019-08-26 23:33:36,267] Finished trial#142 resulted in value: 17.966594654020174. Current best value is 17.511698736954017 with parameters: {'classifier': 'svr', 'svr_C': 93.3444561295953, 'svr_epsilon': 0.10730738517410918}.
[I 2019-08-26 23:33:36,427] Finished trial#143 resulted in value: 17.51963205713085. Current best value is 17.511698736954017 with parameters: {'classifier': 'svr', 'svr_C': 93.3444561295953, 'svr_epsilon': 0.10730738517410918}.
[I 2019-08-26 23:33:36,548] Finished trial#144 resulted in value: 19.5922887160399

[I 2019-08-26 23:33:40,928] Finished trial#175 resulted in value: 17.516141310817662. Current best value is 17.511698736954017 with parameters: {'classifier': 'svr', 'svr_C': 93.3444561295953, 'svr_epsilon': 0.10730738517410918}.
[I 2019-08-26 23:33:41,111] Finished trial#176 resulted in value: 17.63464185700771. Current best value is 17.511698736954017 with parameters: {'classifier': 'svr', 'svr_C': 93.3444561295953, 'svr_epsilon': 0.10730738517410918}.
[I 2019-08-26 23:33:41,272] Finished trial#177 resulted in value: 17.542789176744787. Current best value is 17.511698736954017 with parameters: {'classifier': 'svr', 'svr_C': 93.3444561295953, 'svr_epsilon': 0.10730738517410918}.
[I 2019-08-26 23:33:41,436] Finished trial#178 resulted in value: 17.79875899038649. Current best value is 17.511698736954017 with parameters: {'classifier': 'svr', 'svr_C': 93.3444561295953, 'svr_epsilon': 0.10730738517410918}.
[I 2019-08-26 23:33:41,590] Finished trial#179 resulted in value: 17.5421314410077

[I 2019-08-26 23:33:46,602] Finished trial#210 resulted in value: 17.62906156906325. Current best value is 17.507523374182888 with parameters: {'classifier': 'svr', 'svr_C': 92.50522322696828, 'svr_epsilon': 0.10310462591727326}.
[I 2019-08-26 23:33:46,765] Finished trial#211 resulted in value: 17.714007135613276. Current best value is 17.507523374182888 with parameters: {'classifier': 'svr', 'svr_C': 92.50522322696828, 'svr_epsilon': 0.10310462591727326}.
[I 2019-08-26 23:33:46,930] Finished trial#212 resulted in value: 17.518511117835104. Current best value is 17.507523374182888 with parameters: {'classifier': 'svr', 'svr_C': 92.50522322696828, 'svr_epsilon': 0.10310462591727326}.
[I 2019-08-26 23:33:47,087] Finished trial#213 resulted in value: 17.58584513077566. Current best value is 17.507523374182888 with parameters: {'classifier': 'svr', 'svr_C': 92.50522322696828, 'svr_epsilon': 0.10310462591727326}.
[I 2019-08-26 23:33:47,250] Finished trial#214 resulted in value: 17.863445551

[I 2019-08-26 23:33:52,546] Finished trial#245 resulted in value: 17.71474756819802. Current best value is 17.507523374182888 with parameters: {'classifier': 'svr', 'svr_C': 92.50522322696828, 'svr_epsilon': 0.10310462591727326}.
[I 2019-08-26 23:33:52,719] Finished trial#246 resulted in value: 17.54258319663044. Current best value is 17.507523374182888 with parameters: {'classifier': 'svr', 'svr_C': 92.50522322696828, 'svr_epsilon': 0.10310462591727326}.
[I 2019-08-26 23:33:52,897] Finished trial#247 resulted in value: 17.630233013841625. Current best value is 17.507523374182888 with parameters: {'classifier': 'svr', 'svr_C': 92.50522322696828, 'svr_epsilon': 0.10310462591727326}.
[I 2019-08-26 23:33:53,091] Finished trial#248 resulted in value: 17.506375484325517. Current best value is 17.506375484325517 with parameters: {'classifier': 'svr', 'svr_C': 93.18515137756697, 'svr_epsilon': 0.10300565353506758}.
[I 2019-08-26 23:33:53,281] Finished trial#249 resulted in value: 17.642379560

[I 2019-08-26 23:34:00,397] Finished trial#280 resulted in value: 17.521386831546103. Current best value is 17.50483555770608 with parameters: {'classifier': 'svr', 'svr_C': 93.54400944617113, 'svr_epsilon': 0.10222919958427226}.
[I 2019-08-26 23:34:00,653] Finished trial#281 resulted in value: 17.66512223430093. Current best value is 17.50483555770608 with parameters: {'classifier': 'svr', 'svr_C': 93.54400944617113, 'svr_epsilon': 0.10222919958427226}.
[I 2019-08-26 23:34:00,912] Finished trial#282 resulted in value: 17.54194270138773. Current best value is 17.50483555770608 with parameters: {'classifier': 'svr', 'svr_C': 93.54400944617113, 'svr_epsilon': 0.10222919958427226}.
[I 2019-08-26 23:34:01,166] Finished trial#283 resulted in value: 17.51288794929135. Current best value is 17.50483555770608 with parameters: {'classifier': 'svr', 'svr_C': 93.54400944617113, 'svr_epsilon': 0.10222919958427226}.
[I 2019-08-26 23:34:01,446] Finished trial#284 resulted in value: 17.72208456752244

# test classifier

In [9]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import RidgeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [10]:
from sklearn.datasets import load_iris
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=0)

In [11]:
trial_models = {
    'Extra Trees': ExtraTreesClassifier,
    'Ridge': RidgeClassifier,
    'kneighbor': KNeighborsClassifier,
    
}
trial_condition = {
    'Extra Trees': {
        'n_estimators': ('int', 1, 100),
        'max_depth': ('dis', 1, 100, 5),
        'random_state': 128
    },
    'Ridge': {
        'alpha': ('log', 1e-2, 1e2)
    },
    'kneighbor': {
        'n_neighbors': ('int', 1, 30),
        'algorithm': ('cat', ('ball_tree', 'kd_tree')),
    }
}
score_metric = accuracy_score

In [12]:
evaluate = EvaluateFunc(X_train, X_val, y_train, y_val, score_metric)
objective = Objective(evaluate, trial_models, trial_condition)
study = optuna.create_study(direction='maximize')  # Create a new study.
study.optimize(objective, n_trials=100)  # Invoke optimization of the objective function.

[I 2019-08-26 23:34:05,105] Finished trial#0 resulted in value: 0.9736842105263158. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 1, 'kneighbor_algorithm': 'kd_tree'}.
[I 2019-08-26 23:34:05,212] Finished trial#1 resulted in value: 0.9473684210526315. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 1, 'kneighbor_algorithm': 'kd_tree'}.
[W 2019-08-26 23:34:05,229] The range of parameter `Extra Trees_max_depth` is not divisible by `q`, and is replaced by [1, 96].
[I 2019-08-26 23:34:05,389] Finished trial#2 resulted in value: 0.9736842105263158. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 1, 'kneighbor_algorithm': 'kd_tree'}.
[I 2019-08-26 23:34:05,471] Finished trial#3 resulted in value: 0.9736842105263158. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kn

[I 2019-08-26 23:34:08,481] Finished trial#30 resulted in value: 0.7894736842105263. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 1, 'kneighbor_algorithm': 'kd_tree'}.
[I 2019-08-26 23:34:08,576] Finished trial#31 resulted in value: 0.8947368421052632. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 1, 'kneighbor_algorithm': 'kd_tree'}.
[W 2019-08-26 23:34:08,598] The range of parameter `Extra Trees_max_depth` is not divisible by `q`, and is replaced by [1, 96].
[I 2019-08-26 23:34:08,700] Finished trial#32 resulted in value: 0.9736842105263158. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 1, 'kneighbor_algorithm': 'kd_tree'}.
[I 2019-08-26 23:34:08,783] Finished trial#33 resulted in value: 0.9736842105263158. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor',

[I 2019-08-26 23:34:12,070] Finished trial#59 resulted in value: 0.9736842105263158. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 1, 'kneighbor_algorithm': 'kd_tree'}.
[W 2019-08-26 23:34:12,176] The range of parameter `Extra Trees_max_depth` is not divisible by `q`, and is replaced by [1, 96].
[I 2019-08-26 23:34:12,315] Finished trial#60 resulted in value: 0.9736842105263158. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 1, 'kneighbor_algorithm': 'kd_tree'}.
[W 2019-08-26 23:34:12,343] The range of parameter `Extra Trees_max_depth` is not divisible by `q`, and is replaced by [1, 96].
[I 2019-08-26 23:34:12,486] Finished trial#61 resulted in value: 0.9736842105263158. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 1, 'kneighbor_algorithm': 'kd_tree'}.
[W 2019-08-26 23:34:12,509] The range of p

[I 2019-08-26 23:34:16,019] Finished trial#86 resulted in value: 0.9736842105263158. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 1, 'kneighbor_algorithm': 'kd_tree'}.
[W 2019-08-26 23:34:16,052] The range of parameter `Extra Trees_max_depth` is not divisible by `q`, and is replaced by [1, 96].
[I 2019-08-26 23:34:16,149] Finished trial#87 resulted in value: 0.8947368421052632. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 1, 'kneighbor_algorithm': 'kd_tree'}.
[W 2019-08-26 23:34:16,186] The range of parameter `Extra Trees_max_depth` is not divisible by `q`, and is replaced by [1, 96].
[I 2019-08-26 23:34:16,389] Finished trial#88 resulted in value: 0.9736842105263158. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 1, 'kneighbor_algorithm': 'kd_tree'}.
[W 2019-08-26 23:34:16,423] The range of p

In [13]:
# 最適解
print(study.best_params)
print(study.best_value)
print(study.best_trial)

{'classifier': 'kneighbor', 'kneighbor_n_neighbors': 1, 'kneighbor_algorithm': 'kd_tree'}
0.9736842105263158
FrozenTrial(number=0, state=<TrialState.COMPLETE: 1>, value=0.9736842105263158, datetime_start=datetime.datetime(2019, 8, 26, 23, 34, 5, 34241), datetime_complete=datetime.datetime(2019, 8, 26, 23, 34, 5, 104697), params={'classifier': 'kneighbor', 'kneighbor_n_neighbors': 1, 'kneighbor_algorithm': 'kd_tree'}, distributions={'classifier': CategoricalDistribution(choices=('Extra Trees', 'Ridge', 'kneighbor')), 'kneighbor_n_neighbors': IntUniformDistribution(low=1, high=30), 'kneighbor_algorithm': CategoricalDistribution(choices=('ball_tree', 'kd_tree'))}, user_attrs={}, system_attrs={'_number': 0}, intermediate_values={}, params_in_internal_repr={'classifier': 2, 'kneighbor_n_neighbors': 1.0, 'kneighbor_algorithm': 1}, trial_id=0)


In [18]:
# ベタ書きで再現
clf = KNeighborsClassifier(n_neighbors=1, algorithm='kd_tree')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_val)
error = accuracy_score(y_val, y_pred)
print(error)

0.9736842105263158


In [19]:
# history
hist_df = study.trials_dataframe()
hist_df.to_csv("boston_hist.csv")