In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import skopt
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import lightgbm
import numpy as np
from lightgbm import Dataset
from lightgbm import LGBMRegressor

In [3]:
X, y = make_regression(n_samples=10000, n_features=100, n_informative=10, random_state=19)
X, X_test, y, y_test = train_test_split(X, y, test_size=0.15, random_state=19)
kf = KFold(n_splits=5)

In [4]:
def train_evaluate(params):
    oof = np.zeros(y.shape)
    model = LGBMRegressor(params)
    for train_index, test_index in kf.split(X):
        X_train, X_val = X[train_index], X[test_index]
        y_train, y_val = y[train_index], y[test_index]
        model = LGBMRegressor(**params)
        model.fit(X_train, y_train)
        temp = model.predict(X_val)
        oof[test_index] = temp
    error = mean_squared_error(y, oof, squared=False)
    return error

In [5]:
def predict(params):
    y_pred = np.zeros(y_test.shape)
    for train_index, _ in kf.split(X):
        X_train = X[train_index]
        y_train = y[train_index]
        model = LGBMRegressor(**params)
        model.fit(X_train, y_train)
        result = model.predict(X_test)
        y_pred += result
    y_pred /= 5
    error = mean_squared_error(y_test, y_pred, squared=False)
    return error

In [6]:
params = {}
print(predict(params))

19.00695101175541


In [7]:
SPACE = [
    skopt.space.Real(0.01, 0.5, name='learning_rate', prior='log-uniform'),
    skopt.space.Integer(1, 30, name='max_depth'),
    skopt.space.Integer(2, 100, name='num_leaves'),
    skopt.space.Real(0.1, 1.0, name='feature_fraction', prior='uniform'),
    skopt.space.Real(0.1, 1.0, name='subsample', prior='uniform')]

@skopt.utils.use_named_args(SPACE)
def objective(**params):
    return train_evaluate(params)

results = skopt.gp_minimize(objective, SPACE, n_calls=30, random_state=19)
best_params = results.x

In [8]:
names = ['learning_rate', 'max_depth', 'num_leaves', 'feature_fraction', 'subsample']

In [9]:
params = dict(zip(names, best_params))
print(predict(params))

13.217658774572124
