In [1]:
import optuna
from optuna_sklearn import EvaluateFunc, Objective

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split

# test regressor

In [3]:
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [4]:
# boston_dataset
from sklearn.datasets import load_boston
boston = load_boston()
X, y = boston.data, boston.target
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=0)

# 標準化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

In [5]:
# setting for optuna
trial_models = {
    'Extra Trees': ExtraTreesRegressor,
    'svr': SVR,
    
}
trial_condition = {
    'Extra Trees': {
        "n_estimators": ('int', 15, 35),
        "max_depth": ('dis', 15, 35, 5),
        "random_state": 128
    },
    'svr': {
        'C': ('log', 1e0, 1e2),
        'epsilon': ('uni', 1e-1, 1e1)
    }
}
score_metric = mean_squared_error
direction = 'minimize'

In [6]:
evaluate = EvaluateFunc(X_train, X_val, y_train, y_val, score_metric)
objective = Objective(evaluate, trial_models, trial_condition)
study = optuna.create_study(direction=direction)  # Create a new study.
study.optimize(objective, n_trials=50)  # Invoke optimization of the objective function.

[I 2019-08-27 00:06:35,057] Finished trial#0 resulted in value: 28.2990598528681. Current best value is 28.2990598528681 with parameters: {'classifier': 'svr', 'svr_C': 32.02797050834527, 'svr_epsilon': 6.66430185950666}.
[I 2019-08-27 00:06:35,157] Finished trial#1 resulted in value: 20.573943001934435. Current best value is 20.573943001934435 with parameters: {'classifier': 'Extra Trees', 'Extra Trees_n_estimators': 17, 'Extra Trees_max_depth': 30.0}.
[I 2019-08-27 00:06:35,243] Finished trial#2 resulted in value: 39.976508228215785. Current best value is 20.573943001934435 with parameters: {'classifier': 'Extra Trees', 'Extra Trees_n_estimators': 17, 'Extra Trees_max_depth': 30.0}.
[I 2019-08-27 00:06:35,395] Finished trial#3 resulted in value: 21.172910037479877. Current best value is 20.573943001934435 with parameters: {'classifier': 'Extra Trees', 'Extra Trees_n_estimators': 17, 'Extra Trees_max_depth': 30.0}.
[I 2019-08-27 00:06:35,533] Finished trial#4 resulted in value: 19.121

# test classifier

In [7]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import RidgeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [8]:
from sklearn.datasets import load_iris
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=0)

In [9]:
trial_models = {
    'Extra Trees': ExtraTreesClassifier,
    'Ridge': RidgeClassifier,
    'kneighbor': KNeighborsClassifier,
    
}
trial_condition = {
    'Extra Trees': {
        'n_estimators': ('int', 1, 100),
        'max_depth': ('dis', 1, 100, 5),
        'random_state': 128
    },
    'Ridge': {
        'alpha': ('log', 1e-2, 1e2)
    },
    'kneighbor': {
        'n_neighbors': ('int', 1, 30),
        'algorithm': ('cat', ('ball_tree', 'kd_tree')),
    }
}
score_metric = accuracy_score
direction = 'maximize'

In [10]:
evaluate = EvaluateFunc(X_train, X_val, y_train, y_val, score_metric)
objective = Objective(evaluate, trial_models, trial_condition)
study = optuna.create_study(direction=direction)  # Create a new study.
study.optimize(objective, n_trials=50)  # Invoke optimization of the objective function.

[I 2019-08-27 00:06:43,196] Finished trial#0 resulted in value: 0.9736842105263158. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 19, 'kneighbor_algorithm': 'ball_tree'}.
[W 2019-08-27 00:06:43,200] The range of parameter `Extra Trees_max_depth` is not divisible by `q`, and is replaced by [1, 96].
[I 2019-08-27 00:06:43,317] Finished trial#1 resulted in value: 0.9736842105263158. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 19, 'kneighbor_algorithm': 'ball_tree'}.
[I 2019-08-27 00:06:43,410] Finished trial#2 resulted in value: 0.7894736842105263. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneighbor', 'kneighbor_n_neighbors': 19, 'kneighbor_algorithm': 'ball_tree'}.
[I 2019-08-27 00:06:43,497] Finished trial#3 resulted in value: 0.6842105263157895. Current best value is 0.9736842105263158 with parameters: {'classifier': 'kneigh

In [11]:
# 最適解
print(study.best_params)
print(study.best_value)
print(study.best_trial)

{'classifier': 'kneighbor', 'kneighbor_n_neighbors': 19, 'kneighbor_algorithm': 'ball_tree'}
0.9736842105263158
FrozenTrial(number=0, state=<TrialState.COMPLETE: 1>, value=0.9736842105263158, datetime_start=datetime.datetime(2019, 8, 27, 0, 6, 43, 125692), datetime_complete=datetime.datetime(2019, 8, 27, 0, 6, 43, 195718), params={'classifier': 'kneighbor', 'kneighbor_n_neighbors': 19, 'kneighbor_algorithm': 'ball_tree'}, distributions={'classifier': CategoricalDistribution(choices=('Extra Trees', 'Ridge', 'kneighbor')), 'kneighbor_n_neighbors': IntUniformDistribution(low=1, high=30), 'kneighbor_algorithm': CategoricalDistribution(choices=('ball_tree', 'kd_tree'))}, user_attrs={}, system_attrs={'_number': 0}, intermediate_values={}, params_in_internal_repr={'classifier': 2, 'kneighbor_n_neighbors': 19.0, 'kneighbor_algorithm': 0}, trial_id=0)


In [14]:
# ベタ書きで再現
clf = KNeighborsClassifier(n_neighbors=19, algorithm='ball_tree')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_val)
error = accuracy_score(y_val, y_pred)
print(error)

0.9736842105263158


In [15]:
# history
hist_df = study.trials_dataframe()
hist_df.to_csv("boston_hist.csv")