In [1]:
import numpy as np
import optuna
import pandas as pd
from scipy.stats import expon
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import ElasticNetCV, LassoCV, RidgeCV
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import GridSearchCV, KFold, RandomizedSearchCV, train_test_split
from sklearn.svm import LinearSVR, SVR
import xgboost as xgb
from yellowbrick.datasets import load_bikeshare


In [31]:
def objective(trial):
    X, y = load_bikeshare()
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=5
    )
    
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dtest = xgb.DMatrix(X_test, label=y_test)
    
    param = {
        "verbosity": 0,
        "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]),
        "lambda": trial.suggest_loguniform("lambda", 1e-8, 1.0),
        "alpha": trial.suggest_loguniform("alpha", 1e-8, 1.0),
    }
    
    if param["booster"] == "gbtree" or param["booster"] == "dart":
        param['max_depth'] = trial.suggest_int('max_depth', 1, 9)
        param['eta'] = trial.suggest_loguniform('eta', 1e-8, 1.0)
        param['gamma'] = trial.suggest_loguniform('gamma', 1e-8, 1.0)
        param['grow_policy'] = trial.suggest_categorical('grow_policy', ['depthwise', 'lossguide'])
        
    if param['booster'] == 'dart':
        param['sample_type'] = trial.suggest_categorical('sample_type', ['uniform', 'weighted'])
        param['normalize_type'] = trial.suggest_categorical('normalize_type', ['tree', 'forest'])
        param['rate_drop'] = trial.suggest_loguniform('rate_drop', 1e-8, 1.0)
        param['skip_drop'] = trial.suggest_loguniform('skip_drop', 1e-8, 1.0)
        
    bst = xgb.cv(param, dtrain, nfolds=5)
    preds = bst.predict(dtest)
    r2 = r2_score(y_test, preds, multioutput="variance_weighted")
    mse = mean_squared_error(y_test, preds)
    return mse
    

In [32]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100)
print(study.best_trial)

  if getattr(data, 'base', None) is not None and \
[W 2019-11-22 17:39:46,626] Setting status of trial#0 as TrialState.FAIL because of the following error: TypeError("cv() got an unexpected keyword argument 'nfolds'")
Traceback (most recent call last):
  File "/home/minghaol/anaconda3/envs/dl_env/lib/python3.7/site-packages/optuna/study.py", line 539, in _run_trial
    result = func(trial)
  File "<ipython-input-31-9fc79559f0d0>", line 29, in objective
    bst = xgb.cv(param, dtrain, nfolds=5)
TypeError: cv() got an unexpected keyword argument 'nfolds'


TypeError: cv() got an unexpected keyword argument 'nfolds'

In [23]:
# seed = int(time.time())


In [2]:
seed = 5

X, y = load_bikeshare()
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=seed
)

kfold_cv = KFold(n_splits=5, shuffle=True, random_state=seed)
alphas = np.logspace(-10, 0.5, 400)


In [109]:
ridge = RidgeCV(cv=kfold_cv, alphas=alphas)
ridge.fit(X_train, y_train)
ridge.score(X_test, y_test)

0.38826231708792236

In [110]:
lasso = LassoCV(cv=kfold_cv, alphas=alphas)
lasso.fit(X_train, y_train)
lasso.score(X_test, y_test)

0.3882835568334244

In [122]:
elastic = ElasticNetCV(cv=kfold_cv, alphas=alphas)
elastic.fit(X_train, y_train)
print("{}: {}".format(elastic.__class__.__name__, elastic.score(X_test, y_test)))

ElasticNetCV: 0.3882448389663511


In [124]:
linearsvr = RandomizedSearchCV(estimator=LinearSVR(), param_distributions={"C": expon(scale=100)}, cv=kfold_cv)
linearsvr.fit(X_train, y_train)
linearsvr.score(X_test, y_test)





0.3417274520977841

In [3]:
# svr = RandomizedSearchCV(estimator=SVR(kernel="rbf"), param_distributions={"C": expon(scale=100), "gamma": expon(scale=0.1)}, cv=kfold_cv)
svr = SVR(kernel='rbf')
svr.fit(X_train, y_train)
svr.score(X_test, y_test)



0.5791651665015393

In [22]:
rfr = RandomForestRegressor(n_estimators=150, random_state=5)
rfr.fit(X_train, y_train)
rfr.score(X_test, y_test)

NameError: name 'X_train' is not defined

In [134]:
xgbr = XGBRegressor(n_estimators=500, random_state=seed, cv=kfold_cv)
xgbr.fit(X_train, y_train)
xgbr.score(X_test, y_test)

  if getattr(data, 'base', None) is not None and \


0.9188110754043546