In [69]:
from datetime import datetime
from numpy import mean
import numpy as np
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from hpsklearn import HyperoptEstimator, pca, normalizer, min_max_scaler, standard_scaler, xgboost_regression
from hpsklearn import random_forest_regression, ada_boost_regression, gradient_boosting_regression, extra_trees_regression, sgd_regression
from hpsklearn import svr, svr_linear, svr_rbf, svr_poly, svr_sigmoid
from hpsklearn import any_regressor
from hpsklearn import any_preprocessing
from hyperopt import tpe, hp
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from xgboost import XGBRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.pipeline import Pipeline

In [36]:
# load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.csv'
dataframe = read_csv(url, header=None)

In [37]:
# split into input and output elements
data = dataframe.values
data = data.astype('float32')
X, y = data[:, :-1], data[:, -1]
print(X.shape, y.shape)

(506, 13) (506,)


In [38]:
# split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

In [59]:
preproc = hp.choice('myprepros_name', 
                    [
                        [min_max_scaler('myprepros_name.only_norm')],
                        [standard_scaler('myprepros_name.only_std_scaler')],
                        [pca('myprepros_name.only_pca')],
                        [pca('myprepros_name.pca'), min_max_scaler('myprepros_name.norm')],
                        [min_max_scaler('myprepros_name.first_norm'), standard_scaler('myprepros_name.second_std_scaler')],
                        []
                    ])

In [57]:
reg = hp.choice( 'model_name',
            [ random_forest_regression ('model_name.random_forest_regression'),
            sgd_regression ('model_name.sgd_regression'),
            ada_boost_regression ('model_name.ada_boost_regression'),
            gradient_boosting_regression ('model_name.gradient_boosting_regression'),
            xgboost_regression ('model_name.xgboost_regression'),
            extra_trees_regression('model_name.extra_trees_regression'),
            svr_linear('model_name.svr_linear'),
            svr_rbf('model_name.svr_rbf'),
            svr('model_name.svr')]) 

In [73]:
#preprocessing=[min_max_scaler('norm_scaler'), pca('my_pca') ]
#regressor=xgboost_regression('xgboost')
#preprocessing=any_preprocessing('pre')
#preprocessing = preproc
#regressor=any_regressor('reg')
#regressor=reg

#01. 100 times with regressor=any_regressor('reg') & preprocessing=any_preprocessing('pre')

#02. 100 times with regressor=reg & preprocessing = preproc

#03. 100 times with regressor=xgboost_regression(name='xgboost',objective='reg:squarederror') & preprocessing = preproc

#04. 100 times with regressor=xgboost_regression(name='xgboost',objective='reg:squarederror') & preprocessing = any_preprocessing('pre')

In [72]:
# define search
init_time = datetime.now()
mae_error = 0
best_model = None
for i in range(10):
    print (i)
    model = HyperoptEstimator(  regressor=xgboost_regression(name='xgboost',objective='reg:squarederror'), 
                                preprocessing=any_preprocessing('pre'), 
                                loss_fn=mean_absolute_error,
                                algo=tpe.suggest, 
                                max_evals=100, 
                                trial_timeout=120)
    # perform the search
    model.fit(X_train, y_train)
    mae = model.score(X_test, y_test)
    #mse = np.sqrt(mse)

    if mae > mae_error:
        mae_error = mae
        best_model = model
        print(mae_error)

fin_time = datetime.now()
print("Execution time : ", (fin_time-init_time))

]
100%|██████████| 69/69 [00:06<00:00, 10.17trial/s, best loss: 1.743239080204683]
100%|██████████| 70/70 [00:06<00:00, 10.63trial/s, best loss: 1.743239080204683]
100%|██████████| 71/71 [00:06<00:00, 10.49trial/s, best loss: 1.743239080204683]
100%|██████████| 72/72 [00:06<00:00, 10.63trial/s, best loss: 1.714458893327152]
100%|██████████| 73/73 [00:07<00:00, 10.39trial/s, best loss: 1.714458893327152]
100%|██████████| 74/74 [00:05<00:00, 13.35trial/s, best loss: 1.714458893327152]
100%|██████████| 75/75 [00:06<00:00, 11.16trial/s, best loss: 1.714458893327152]
100%|██████████| 76/76 [00:06<00:00, 12.48trial/s, best loss: 1.714458893327152]
100%|██████████| 77/77 [00:06<00:00, 11.20trial/s, best loss: 1.714458893327152]
100%|██████████| 78/78 [00:06<00:00, 11.47trial/s, best loss: 1.714458893327152]
100%|██████████| 79/79 [00:05<00:00, 13.81trial/s, best loss: 1.714458893327152]
100%|██████████| 80/80 [00:05<00:00, 13.94trial/s, best loss: 1.714458893327152]
100%|██████████| 81/81 [00

In [74]:
# summarize the best
print("MSE: %.3f" % mae)
print(best_model.best_model())

MSE: 0.892
{'learner': XGBRegressor(base_score=0.5, booster='gbtree',
             colsample_bylevel=0.8297027242206534, colsample_bynode=1,
             colsample_bytree=0.9513954029807743, gamma=0.02022174653713924,
             gpu_id=-1, importance_type='gain', interaction_constraints='',
             learning_rate=0.09281958438582665, max_delta_step=0, max_depth=3,
             min_child_weight=2, missing=nan, monotone_constraints='()',
             n_estimators=3200, n_jobs=0, num_parallel_tree=1,
             objective='reg:linear', random_state=0,
             reg_alpha=0.00881786542441721, reg_lambda=3.2649130010873266,
             scale_pos_weight=1, seed=0, subsample=0.5058451512836544,
             tree_method='exact', validate_parameters=1, verbosity=None), 'preprocs': (StandardScaler(with_mean=False),), 'ex_preprocs': ()}


# MSE: 0.892
{'learner': XGBRegressor(base_score=0.5, booster='gbtree',
             colsample_bylevel=0.8297027242206534, colsample_bynode=1,
             colsample_bytree=0.9513954029807743, gamma=0.02022174653713924,
             gpu_id=-1, importance_type='gain', interaction_constraints='',
             learning_rate=0.09281958438582665, max_delta_step=0, max_depth=3,
             min_child_weight=2, missing=nan, monotone_constraints='()',
             n_estimators=3200, n_jobs=0, num_parallel_tree=1,
             objective='reg:linear', random_state=0,
             reg_alpha=0.00881786542441721, reg_lambda=3.2649130010873266,
             scale_pos_weight=1, seed=0, subsample=0.5058451512836544,
             tree_method='exact', validate_parameters=1, verbosity=None), 'preprocs': (StandardScaler(with_mean=False),), 'ex_preprocs': ()}

In [75]:
init_time = datetime.now()
#objective='reg:squarederror'
xgb_model = XGBRegressor(base_score=0.5, booster='gbtree',
             colsample_bylevel=0.8297027242206534, colsample_bynode=1,
             colsample_bytree=0.9513954029807743, gamma=0.02022174653713924,
             gpu_id=-1, importance_type='gain', interaction_constraints='',
             learning_rate=0.09281958438582665, max_delta_step=0, max_depth=3,
             min_child_weight=2, missing=None, monotone_constraints='()',
             n_estimators=3200, n_jobs=0, num_parallel_tree=1,
             objective='reg:squarederror', random_state=0,
             reg_alpha=0.00881786542441721, reg_lambda=3.2649130010873266,
             scale_pos_weight=1, seed=0, subsample=0.5058451512836544,
             tree_method='exact', validate_parameters=1, verbosity=None)

pipeline = Pipeline(steps=[('Stnd', StandardScaler(with_mean=False)), ('m', xgb_model)])
pipeline.fit(X_train, y_train)

#xgb_model.fit(X_train, y_train, 
             #early_stopping_rounds=5, 
             #eval_set=[(X_test, y_test)], 
             #verbose=False)

#xgb_predictions = xgb_model.predict(X_test)
xgb_predictions = pipeline.predict(X_test)

mae = mean_squared_error(xgb_predictions, y_test)
print("Mean Squard Error:" , np.sqrt(mae))
fin_time = datetime.now()
print("Execution time : ", (fin_time-init_time))

Mean Squard Error: 2.9499547
Execution time :  0:00:01.061171


# Mean Squard Error: 2.9499547