In [1]:
# load data
import pandas as pd
import numpy as np
from pandas import DataFrame
base_path='D:/kaggle/regression/'
all_data=DataFrame.from_csv(base_path+'cleaned_train.csv',index_col='Id')
all_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1460 entries, 1 to 1460
Columns: 263 entries, LotFrontage to SaleCondition__Partial
dtypes: float64(231), int64(32)
memory usage: 2.9 MB


In [2]:
all_id=all_data.index
all_y=all_data['SalePrice']
all_x=all_data.drop(['SalePrice'],axis=1)
all_x.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1460 entries, 1 to 1460
Columns: 262 entries, LotFrontage to SaleCondition__Partial
dtypes: float64(231), int64(31)
memory usage: 2.9 MB


In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(all_x, all_y, test_size=0.2, random_state=42)

In [4]:
def reportParams(best_parameters, score):
    print('score:', score)
    for param_name in sorted(best_parameters.keys()):
        print("%s: %r" % (param_name, best_parameters[param_name]))

In [5]:
from sklearn.metrics import mean_squared_error

def rmse_log_error(predict,reality):
    predict_log=np.log(predict)
    reality_log=np.log(reality)
    mse=mean_squared_error(reality_log, predict_log)
    rmse=np.sqrt(mse)
    return rmse

In [6]:
def find_value_in_array(arr,value):
    for i in range(len(arr)):
        v=arr[i]
        if abs(v - value) <= 1e-5:
            return i
    return -1

In [7]:
arr=[0.001,0.5,1.0,8]
find_value_in_array(arr,8)

3

In [7]:
def remove_dup(arr):
    return np.unique(arr)

In [15]:
remove_dup([1,1,2,3,4,4,4,5,6,8,6,])

array([1, 2, 3, 4, 5, 6, 8])

In [25]:
def exp_space(start,end,base):
    exp_arr=np.linspace(start,end,6)
    return [base**x for x in exp_arr]

In [26]:
exp_space(-4,4,2)

[0.0625,
 0.18946457081379978,
 0.57434917749851755,
 1.7411011265922491,
 5.2780316430915786,
 16.0]

In [35]:
from sklearn.grid_search import GridSearchCV
import math

def auto_param_search(regressor,search_params,iterations, train_X, train_y, test_X, test_y):
    print iterations,' iterations of search for regressor '
    print type(regressor)
    print 'Starting param range is ',params
    
    # Define some parameters
    split=6 # must > 2
    cv_num=5
    
    for i in range(iterations):
        print 'This iteration is for params ',search_params
        
        # Finer grind the range
        params_to_try={}
        for k in search_params.keys():
            print 'Grinding param ',k
            start=search_params[k]['start']
            end=search_params[k]['end']
            # Check if start is just end
            if start==end:
                params_to_try[k]=[start]
            else:
                # linear space if not specified
                if 'dist' not in search_params[k].keys():
                    search_params[k]['dist']='linear'
                # Check the wanted distribution of param
                dist=search_params[k]['dist']
                if dist == 'linear':
                    params_to_try[k]=np.linspace(start,end,split)
                elif dist == 'exp':
                    params_to_try[k] = np.logspace(start, end, num=split, base=2.0) # Use 2 as base
                else:
                    print "Error: Expected type for param ',k,' is not recognized. Cannot proceed!"
                    print "Options: 'linear' or 'exp'"
                    return
            print 
        print 'Grinded params to ',params_to_try
        
        # Find int parameters and round them
        for k in search_params.keys():
            if search_params[k]['type']=='int':
                print 'Rounding param ',k, 'with list ',params_to_try[k]
                params_to_try[k]=[int(round(x)) for x in params_to_try[k]]
                
                # Remove dup
                params_to_try[k]=remove_dup(params_to_try[k])
                
                print 'Now rounded to ',params_to_try[k]
        print 'Grinded params are rounded to ',params_to_try
                
        # Exhaustive search
        gs = GridSearchCV(regressor, params_to_try,scoring='neg_mean_squared_error',cv=cv_num,verbose=2)
        gs.fit(train_X, train_y)
        print 'Report scores'
        print gs.grid_scores_
        print("Report best params")
        best_parameters, score, _ = max(gs.grid_scores_, key=lambda x: x[1])
        reportParams(best_parameters, score)

        # Get the best regressor
        best=gs.best_estimator_
        print 'Params for best estimator are ',best.get_params()
        best.fit(X_train,y_train)
        print 'Now predict'
        best_predict=best.predict(X_test)
        #print 'Predicted values are ',best_predict
        print('RMSE {score}'.format(score=rmse_log_error(y_test, best_predict)))

        # Calculate stats
        predict_series=pd.Series(best_predict)
        print "Std: ",np.std(predict_series)," Mean: ",np.mean(predict_series)
        print 'Min: ',min(predict_series),' Max: ',max(predict_series)
        
        # Update the new range
        for k in best_parameters.keys():
            print 'Update param ',k
            
            # If the start and end meet, no need to update really
            if len(params_to_try[k])==1:
                print 'The start and end meet for param ',k,'. No need to update.'
                continue
            
            # Check the distribution of the param
            param_type=search_params[k]['dist']
            if param_type=='exp':
                start=search_params[k]['start']
                end=search_params[k]['end']
                exp_arr=np.linspace(start,end,split)
                step = exp_arr[1]-exp_arr[0]
                print 'Param ',k,' has exponential distribution. One step is ',step
            else: # linear distribution
                step=params_to_try[k][1]-params_to_try[k][0]
                print 'Param ',k,' has linear distribution. One step is ',step
            
            # Find the best param
            v=best_parameters[k]
            print 'Finding best param for value ',v
            if param_type == 'exp':
                on_exp=math.log(v,2)
                print 'Convert exp back to power ',v,'->',on_exp
                best_index=find_value_in_array(params_to_try[k],on_exp)
            else:
                best_index=find_value_in_array(params_to_try[k],v)
            print 'Best index is ',best_index
            
            # The best param happens to be on the edge
            if best_index == 0:
                print 'Arrived at start of search range ',v
                search_params[k]['start']=v
                target_idx=max(len(params_to_try[k])-1,best_index+2)
                search_params[k]['end']=params_to_try[k][target_idx]
            elif best_index == split-1:
                print 'Arrived at end of search range ',v
                step=(params[k]['end']-params[k]['start'])/split
                #print 'Go one step forward for ',step
                target_idx=min(0,best_index-2)
                search_params[k]['start']=params_to_try[k][target_idx]
                search_params[k]['end']=v
            
            # The param is in the middle of range
            else:
                print 'The param falls in the middle of range'
                if param_type == 'exp':
                    on_exp=math.log(v,2)
                    search_params[k]['start']=on_exp-step
                    search_params[k]['end']=on_exp+step
                else:
                    search_params[k]['start']=v-step
                    search_params[k]['end']=v+step                
            
        print 'Params updated: ',search_params
        print 'Go to next turn'
        

In [None]:
from sklearn.ensemble import RandomForestRegressor

print "Random forest with mse"
params={
    'n_estimators':{
        'start':1,
        'end':7,
        'type':'int',
        'dist':'exp'
    },
    'max_features':{
        'start':1.0,
        'end':1.0,
        'type':'float',
        'dist':'linear'
    },
    'max_depth':{
        'start':10,
        'end':10,
        'type':'int',
        'dist':'linear'
    },
    
}
rfc = RandomForestRegressor(criterion='mse', n_jobs=-1,random_state=42)

auto_param_search(rfc,params,3, X_train, y_train, X_test, y_test)

Random forest with mse
3  iterations of search for regressor 
<class 'sklearn.ensemble.forest.RandomForestRegressor'>
Starting param range is  {'n_estimators': {'start': 1, 'end': 7, 'dist': 'exp', 'type': 'int'}, 'max_features': {'start': 1.0, 'end': 1.0, 'dist': 'linear', 'type': 'float'}, 'max_depth': {'start': 10, 'end': 10, 'dist': 'linear', 'type': 'int'}}
This iteration is for params  {'n_estimators': {'start': 1, 'end': 7, 'dist': 'exp', 'type': 'int'}, 'max_features': {'start': 1.0, 'end': 1.0, 'dist': 'linear', 'type': 'float'}, 'max_depth': {'start': 10, 'end': 10, 'dist': 'linear', 'type': 'int'}}
Grinding param  n_estimators

Grinding param  max_features

Grinding param  max_depth

Grinded params to  {'n_estimators': array([   2.        ,    4.59479342,   10.55606329,   24.25146506,
         55.71523605,  128.        ]), 'max_features': [1.0], 'max_depth': [10]}
Rounding param  n_estimators with list  [   2.            4.59479342   10.55606329   24.25146506   55.71523605
 

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s


[CV] ......... max_features=1.0, n_estimators=2, max_depth=10 -   0.2s
[CV] max_features=1.0, n_estimators=2, max_depth=10 ..................
[CV] ......... max_features=1.0, n_estimators=2, max_depth=10 -   0.2s
[CV] max_features=1.0, n_estimators=2, max_depth=10 ..................
[CV] ......... max_features=1.0, n_estimators=2, max_depth=10 -   0.2s
[CV] max_features=1.0, n_estimators=2, max_depth=10 ..................
[CV] ......... max_features=1.0, n_estimators=2, max_depth=10 -   0.2s
[CV] max_features=1.0, n_estimators=5, max_depth=10 ..................
[CV] ......... max_features=1.0, n_estimators=5, max_depth=10 -   0.2s
[CV] max_features=1.0, n_estimators=5, max_depth=10 ..................
[CV] ......... max_features=1.0, n_estimators=5, max_depth=10 -   0.2s
[CV] max_features=1.0, n_estimators=5, max_depth=10 ..................
[CV] ......... max_features=1.0, n_estimators=5, max_depth=10 -   0.2s
[CV] max_features=1.0, n_estimators=5, max_depth=10 ..................
[CV] .

[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:   13.2s finished


Report scores
[mean: -1615862157.86463, std: 659670094.30769, params: {'max_features': 1.0, 'n_estimators': 2, 'max_depth': 10}, mean: -1217927870.98788, std: 374520596.19878, params: {'max_features': 1.0, 'n_estimators': 5, 'max_depth': 10}, mean: -1075624722.51414, std: 370614251.96174, params: {'max_features': 1.0, 'n_estimators': 11, 'max_depth': 10}, mean: -1042479549.88535, std: 327183912.94519, params: {'max_features': 1.0, 'n_estimators': 24, 'max_depth': 10}, mean: -1004085882.22755, std: 319802167.79886, params: {'max_features': 1.0, 'n_estimators': 56, 'max_depth': 10}, mean: -958145258.25019, std: 309216166.47610, params: {'max_features': 1.0, 'n_estimators': 128, 'max_depth': 10}]
Report best params
('score:', -958145258.250185)
max_depth: 10
max_features: 1.0
n_estimators: 128
Params for best estimator are  {'warm_start': False, 'oob_score': False, 'n_jobs': -1, 'verbose': 0, 'max_leaf_nodes': None, 'bootstrap': True, 'min_samples_leaf': 1, 'n_estimators': 128, 'min_sampl

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s


[CV] ........ max_features=1.0, n_estimators=56, max_depth=10 -   0.4s
[CV] max_features=1.0, n_estimators=56, max_depth=10 .................
[CV] ........ max_features=1.0, n_estimators=56, max_depth=10 -   0.4s
[CV] max_features=1.0, n_estimators=56, max_depth=10 .................
[CV] ........ max_features=1.0, n_estimators=56, max_depth=10 -   0.4s
[CV] max_features=1.0, n_estimators=56, max_depth=10 .................
[CV] ........ max_features=1.0, n_estimators=56, max_depth=10 -   0.4s
[CV] max_features=1.0, n_estimators=78, max_depth=10 .................
[CV] ........ max_features=1.0, n_estimators=78, max_depth=10 -   0.5s
[CV] max_features=1.0, n_estimators=78, max_depth=10 .................
[CV] ........ max_features=1.0, n_estimators=78, max_depth=10 -   0.6s
[CV] max_features=1.0, n_estimators=78, max_depth=10 .................
[CV] ........ max_features=1.0, n_estimators=78, max_depth=10 -   0.7s
[CV] max_features=1.0, n_estimators=78, max_depth=10 .................
[CV] .

[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:   34.5s finished


Report scores
[mean: -1004085882.22755, std: 319802167.79886, params: {'max_features': 1.0, 'n_estimators': 56, 'max_depth': 10}, mean: -974264005.04165, std: 304086412.70710, params: {'max_features': 1.0, 'n_estimators': 78, 'max_depth': 10}, mean: -961599846.68943, std: 300574143.33689, params: {'max_features': 1.0, 'n_estimators': 108, 'max_depth': 10}, mean: -963060864.99602, std: 304900450.24861, params: {'max_features': 1.0, 'n_estimators': 151, 'max_depth': 10}, mean: -965203893.58234, std: 300386362.48463, params: {'max_features': 1.0, 'n_estimators': 211, 'max_depth': 10}, mean: -964532993.96238, std: 299668891.48960, params: {'max_features': 1.0, 'n_estimators': 294, 'max_depth': 10}]
Report best params
('score:', -961599846.6894273)
max_depth: 10
max_features: 1.0
n_estimators: 108
Params for best estimator are  {'warm_start': False, 'oob_score': False, 'n_jobs': -1, 'verbose': 0, 'max_leaf_nodes': None, 'bootstrap': True, 'min_samples_leaf': 1, 'n_estimators': 108, 'min_sam

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.6s remaining:    0.0s


[CV] ........ max_features=1.0, n_estimators=77, max_depth=10 -   0.7s
[CV] max_features=1.0, n_estimators=77, max_depth=10 .................
[CV] ........ max_features=1.0, n_estimators=77, max_depth=10 -   0.6s
[CV] max_features=1.0, n_estimators=77, max_depth=10 .................
[CV] ........ max_features=1.0, n_estimators=77, max_depth=10 -   0.5s
[CV] max_features=1.0, n_estimators=77, max_depth=10 .................
[CV] ........ max_features=1.0, n_estimators=77, max_depth=10 -   0.5s
[CV] max_features=1.0, n_estimators=88, max_depth=10 .................
[CV] ........ max_features=1.0, n_estimators=88, max_depth=10 -   0.6s
[CV] max_features=1.0, n_estimators=88, max_depth=10 .................
[CV] ........ max_features=1.0, n_estimators=88, max_depth=10 -   0.6s
[CV] max_features=1.0, n_estimators=88, max_depth=10 .................
[CV] ........ max_features=1.0, n_estimators=88, max_depth=10 -   0.6s
[CV] max_features=1.0, n_estimators=88, max_depth=10 .................
[CV] .

In [None]:
from sklearn.ensemble import ExtraTreesRegressor

print "ExtraTreesRegressor"
params={
    'n_estimators':{
        'start':1,
        'end':800,
        'type':'int'
    },
    'max_features':{
        'start':0.0001,
        'end':1.0,
        'type':'float'
    },
    'max_depth':{
        'start':1,
        'end':40,
        'type':'int'
    }
        }
etr = ExtraTreesRegressor(criterion='mse', n_jobs=-1,random_state=42)
auto_param_search(etr,params,5, X_train, y_train, X_test, y_test)

In [20]:
from sklearn.ensemble import AdaBoostRegressor

print "Adaboost"
params={
    'n_estimators':{
        'start':200,
        'end':250,
        'type':'int'
    },
    'learning_rate':{
        'start':0.18,
        'end':0.22,
        'type':'float'
    }
}
abr = AdaBoostRegressor(loss='linear',random_state=42)
auto_param_search(abr,params,2, X_train, y_train, X_test, y_test)

Adaboost
2  iterations of search for regressor 
<class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>
Starting param range is  {'n_estimators': {'start': 200, 'end': 250, 'type': 'int'}, 'learning_rate': {'start': 0.18, 'end': 0.22, 'type': 'float'}}
This iteration is for params  {'n_estimators': {'start': 200, 'end': 250, 'type': 'int'}, 'learning_rate': {'start': 0.18, 'end': 0.22, 'type': 'float'}}
Grinding param  n_estimators
Grinding param  learning_rate
Grinded params to  {'n_estimators': array([ 200.,  210.,  220.,  230.,  240.,  250.]), 'learning_rate': array([ 0.18 ,  0.188,  0.196,  0.204,  0.212,  0.22 ])}
Rounding param  n_estimators with list  [ 200.  210.  220.  230.  240.  250.]
Now rounded to  [200 210 220 230 240 250]
Grinded params are rounded to  {'n_estimators': array([200, 210, 220, 230, 240, 250]), 'learning_rate': array([ 0.18 ,  0.188,  0.196,  0.204,  0.212,  0.22 ])}
Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] n_estimators=200, le

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.5s remaining:    0.0s


[CV] ................... n_estimators=200, learning_rate=0.18 -   1.5s
[CV] n_estimators=200, learning_rate=0.18 ............................
[CV] ................... n_estimators=200, learning_rate=0.18 -   1.6s
[CV] n_estimators=200, learning_rate=0.18 ............................
[CV] ................... n_estimators=200, learning_rate=0.18 -   1.4s
[CV] n_estimators=200, learning_rate=0.18 ............................
[CV] ................... n_estimators=200, learning_rate=0.18 -   1.4s
[CV] n_estimators=210, learning_rate=0.18 ............................
[CV] ................... n_estimators=210, learning_rate=0.18 -   1.6s
[CV] n_estimators=210, learning_rate=0.18 ............................
[CV] ................... n_estimators=210, learning_rate=0.18 -   1.6s
[CV] n_estimators=210, learning_rate=0.18 ............................
[CV] ................... n_estimators=210, learning_rate=0.18 -   1.4s
[CV] n_estimators=210, learning_rate=0.18 ............................
[CV] .

[Parallel(n_jobs=1)]: Done 180 out of 180 | elapsed:  6.1min finished


Report scores
[mean: -1313630812.86175, std: 326255620.87724, params: {'n_estimators': 200, 'learning_rate': 0.17999999999999999}, mean: -1310472144.35669, std: 326724679.90184, params: {'n_estimators': 210, 'learning_rate': 0.17999999999999999}, mean: -1306759381.06412, std: 331280166.10577, params: {'n_estimators': 220, 'learning_rate': 0.17999999999999999}, mean: -1315969695.20024, std: 323537890.41756, params: {'n_estimators': 230, 'learning_rate': 0.17999999999999999}, mean: -1315623855.40299, std: 313887664.94822, params: {'n_estimators': 240, 'learning_rate': 0.17999999999999999}, mean: -1306272987.81688, std: 314968429.49005, params: {'n_estimators': 250, 'learning_rate': 0.17999999999999999}, mean: -1291850532.66120, std: 328368856.69797, params: {'n_estimators': 200, 'learning_rate': 0.188}, mean: -1301686200.54824, std: 316769140.12851, params: {'n_estimators': 210, 'learning_rate': 0.188}, mean: -1292382237.78233, std: 323230065.58792, params: {'n_estimators': 220, 'learnin

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.8s remaining:    0.0s


[CV] .................. n_estimators=200, learning_rate=0.204 -   1.9s
[CV] n_estimators=200, learning_rate=0.204 ...........................
[CV] .................. n_estimators=200, learning_rate=0.204 -   1.8s
[CV] n_estimators=200, learning_rate=0.204 ...........................
[CV] .................. n_estimators=200, learning_rate=0.204 -   1.8s
[CV] n_estimators=200, learning_rate=0.204 ...........................
[CV] .................. n_estimators=200, learning_rate=0.204 -   1.8s
[CV] n_estimators=210, learning_rate=0.204 ...........................
[CV] .................. n_estimators=210, learning_rate=0.204 -   2.0s
[CV] n_estimators=210, learning_rate=0.204 ...........................
[CV] .................. n_estimators=210, learning_rate=0.204 -   2.0s
[CV] n_estimators=210, learning_rate=0.204 ...........................
[CV] .................. n_estimators=210, learning_rate=0.204 -   1.9s
[CV] n_estimators=210, learning_rate=0.204 ...........................
[CV] .

[Parallel(n_jobs=1)]: Done 180 out of 180 | elapsed:  6.5min finished


Report scores
[mean: -1324576124.85420, std: 270616986.19665, params: {'n_estimators': 200, 'learning_rate': 0.20400000000000001}, mean: -1321845011.04003, std: 263738094.30852, params: {'n_estimators': 210, 'learning_rate': 0.20400000000000001}, mean: -1324017979.55644, std: 263974321.27910, params: {'n_estimators': 220, 'learning_rate': 0.20400000000000001}, mean: -1325586567.35068, std: 265063242.83792, params: {'n_estimators': 230, 'learning_rate': 0.20400000000000001}, mean: -1329293922.74406, std: 264792371.01303, params: {'n_estimators': 240, 'learning_rate': 0.20400000000000001}, mean: -1326796309.04431, std: 263237842.58669, params: {'n_estimators': 250, 'learning_rate': 0.20400000000000001}, mean: -1299502334.43069, std: 309494347.56756, params: {'n_estimators': 200, 'learning_rate': 0.20720000000000002}, mean: -1307827214.91324, std: 305192863.41611, params: {'n_estimators': 210, 'learning_rate': 0.20720000000000002}, mean: -1304893128.24052, std: 306311354.17954, params: {'

In [23]:
from sklearn.ensemble import AdaBoostRegressor

print "Adaboost"
params={
    'n_estimators':{
        'start':410,
        'end':430,
        'type':'int'
    },
    'learning_rate':{
        'start':0.51,
        'end':0.55,
        'type':'float'
    }
}
abr = AdaBoostRegressor(loss='square',random_state=42)
auto_param_search(abr,params,2, X_train, y_train, X_test, y_test)

Adaboost
2  iterations of search for regressor 
<class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>
Starting param range is  {'n_estimators': {'start': 410, 'end': 430, 'type': 'int'}, 'learning_rate': {'start': 0.51, 'end': 0.55, 'type': 'float'}}
This iteration is for params  {'n_estimators': {'start': 410, 'end': 430, 'type': 'int'}, 'learning_rate': {'start': 0.51, 'end': 0.55, 'type': 'float'}}
Grinding param  n_estimators
Grinding param  learning_rate
Grinded params to  {'n_estimators': array([ 410.,  414.,  418.,  422.,  426.,  430.]), 'learning_rate': array([ 0.51 ,  0.518,  0.526,  0.534,  0.542,  0.55 ])}
Rounding param  n_estimators with list  [ 410.  414.  418.  422.  426.  430.]
Now rounded to  [410 414 418 422 426 430]
Grinded params are rounded to  {'n_estimators': array([410, 414, 418, 422, 426, 430]), 'learning_rate': array([ 0.51 ,  0.518,  0.526,  0.534,  0.542,  0.55 ])}
Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] n_estimators=410, le

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.6s remaining:    0.0s


[CV] ................... n_estimators=410, learning_rate=0.51 -   4.2s
[CV] n_estimators=410, learning_rate=0.51 ............................
[CV] ................... n_estimators=410, learning_rate=0.51 -   3.0s
[CV] n_estimators=410, learning_rate=0.51 ............................
[CV] ................... n_estimators=410, learning_rate=0.51 -   2.5s
[CV] n_estimators=410, learning_rate=0.51 ............................
[CV] ................... n_estimators=410, learning_rate=0.51 -   2.8s
[CV] n_estimators=414, learning_rate=0.51 ............................
[CV] ................... n_estimators=414, learning_rate=0.51 -   3.4s
[CV] n_estimators=414, learning_rate=0.51 ............................
[CV] ................... n_estimators=414, learning_rate=0.51 -   2.6s
[CV] n_estimators=414, learning_rate=0.51 ............................
[CV] ................... n_estimators=414, learning_rate=0.51 -   3.1s
[CV] n_estimators=414, learning_rate=0.51 ............................
[CV] .

[Parallel(n_jobs=1)]: Done 180 out of 180 | elapsed:  9.8min finished


Report scores
[mean: -1469428458.95993, std: 338933196.42646, params: {'n_estimators': 410, 'learning_rate': 0.51000000000000001}, mean: -1468826906.67707, std: 340382659.86337, params: {'n_estimators': 414, 'learning_rate': 0.51000000000000001}, mean: -1471734183.23518, std: 338507600.59344, params: {'n_estimators': 418, 'learning_rate': 0.51000000000000001}, mean: -1469584535.75009, std: 339696800.96096, params: {'n_estimators': 422, 'learning_rate': 0.51000000000000001}, mean: -1468426056.96934, std: 340230386.25714, params: {'n_estimators': 426, 'learning_rate': 0.51000000000000001}, mean: -1471657801.40158, std: 341917796.63949, params: {'n_estimators': 430, 'learning_rate': 0.51000000000000001}, mean: -1468280786.78368, std: 302335559.33452, params: {'n_estimators': 410, 'learning_rate': 0.51800000000000002}, mean: -1469022698.44653, std: 305380893.52294, params: {'n_estimators': 414, 'learning_rate': 0.51800000000000002}, mean: -1468580760.25116, std: 304301507.37885, params: {'

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.8s remaining:    0.0s


[CV] .................. n_estimators=410, learning_rate=0.526 -   3.0s
[CV] n_estimators=410, learning_rate=0.526 ...........................
[CV] .................. n_estimators=410, learning_rate=0.526 -   2.8s
[CV] n_estimators=410, learning_rate=0.526 ...........................
[CV] .................. n_estimators=410, learning_rate=0.526 -   3.1s
[CV] n_estimators=410, learning_rate=0.526 ...........................
[CV] .................. n_estimators=410, learning_rate=0.526 -   3.4s
[CV] n_estimators=412, learning_rate=0.526 ...........................
[CV] .................. n_estimators=412, learning_rate=0.526 -   3.6s
[CV] n_estimators=412, learning_rate=0.526 ...........................
[CV] .................. n_estimators=412, learning_rate=0.526 -   4.9s
[CV] n_estimators=412, learning_rate=0.526 ...........................
[CV] .................. n_estimators=412, learning_rate=0.526 -   4.5s
[CV] n_estimators=412, learning_rate=0.526 ...........................
[CV] .

[Parallel(n_jobs=1)]: Done 180 out of 180 | elapsed:  9.0min finished


Report scores
[mean: -1460170138.69875, std: 313553942.16541, params: {'n_estimators': 410, 'learning_rate': 0.52600000000000002}, mean: -1460507439.86060, std: 310825775.66846, params: {'n_estimators': 412, 'learning_rate': 0.52600000000000002}, mean: -1461360745.79443, std: 310218272.25638, params: {'n_estimators': 413, 'learning_rate': 0.52600000000000002}, mean: -1462670497.94405, std: 309392837.49819, params: {'n_estimators': 415, 'learning_rate': 0.52600000000000002}, mean: -1460336028.57954, std: 309391869.97607, params: {'n_estimators': 416, 'learning_rate': 0.52600000000000002}, mean: -1464716300.25976, std: 310391345.34493, params: {'n_estimators': 418, 'learning_rate': 0.52600000000000002}, mean: -1490663463.30268, std: 352262664.75402, params: {'n_estimators': 410, 'learning_rate': 0.5292}, mean: -1490409301.94912, std: 353544135.96490, params: {'n_estimators': 412, 'learning_rate': 0.5292}, mean: -1491254514.43356, std: 353966689.74344, params: {'n_estimators': 413, 'learn

In [24]:
from sklearn.ensemble import AdaBoostRegressor

print "Adaboost"
params={
    'n_estimators':{
        'start':158,
        'end':174,
        'type':'int'
    },
    'learning_rate':{
        'start':0.33,
        'end':0.35,
        'type':'float'
    }
}
abr2 = AdaBoostRegressor(loss='exponential',random_state=42)
auto_param_search(abr2,params,1, X_train, y_train, X_test, y_test)

Adaboost
1  iterations of search for regressor 
<class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>
Starting param range is  {'n_estimators': {'start': 158, 'end': 174, 'type': 'int'}, 'learning_rate': {'start': 0.33, 'end': 0.35, 'type': 'float'}}
This iteration is for params  {'n_estimators': {'start': 158, 'end': 174, 'type': 'int'}, 'learning_rate': {'start': 0.33, 'end': 0.35, 'type': 'float'}}
Grinding param  n_estimators
Grinding param  learning_rate
Grinded params to  {'n_estimators': array([ 158. ,  161.2,  164.4,  167.6,  170.8,  174. ]), 'learning_rate': array([ 0.33 ,  0.334,  0.338,  0.342,  0.346,  0.35 ])}
Rounding param  n_estimators with list  [ 158.   161.2  164.4  167.6  170.8  174. ]
Now rounded to  [158 161 164 168 171 174]
Grinded params are rounded to  {'n_estimators': array([158, 161, 164, 168, 171, 174]), 'learning_rate': array([ 0.33 ,  0.334,  0.338,  0.342,  0.346,  0.35 ])}
Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] n_estima

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.3s remaining:    0.0s


[CV] ................... n_estimators=158, learning_rate=0.33 -   1.3s
[CV] n_estimators=158, learning_rate=0.33 ............................
[CV] ................... n_estimators=158, learning_rate=0.33 -   1.1s
[CV] n_estimators=158, learning_rate=0.33 ............................
[CV] ................... n_estimators=158, learning_rate=0.33 -   1.7s
[CV] n_estimators=158, learning_rate=0.33 ............................
[CV] ................... n_estimators=158, learning_rate=0.33 -   1.1s
[CV] n_estimators=161, learning_rate=0.33 ............................
[CV] ................... n_estimators=161, learning_rate=0.33 -   1.7s
[CV] n_estimators=161, learning_rate=0.33 ............................
[CV] ................... n_estimators=161, learning_rate=0.33 -   1.0s
[CV] n_estimators=161, learning_rate=0.33 ............................
[CV] ................... n_estimators=161, learning_rate=0.33 -   1.3s
[CV] n_estimators=161, learning_rate=0.33 ............................
[CV] .

[Parallel(n_jobs=1)]: Done 180 out of 180 | elapsed:  4.2min finished


Report scores
[mean: -1293233850.65259, std: 283061611.76795, params: {'n_estimators': 158, 'learning_rate': 0.33000000000000002}, mean: -1291529914.84791, std: 275190368.86414, params: {'n_estimators': 161, 'learning_rate': 0.33000000000000002}, mean: -1291621902.72431, std: 276432661.91969, params: {'n_estimators': 164, 'learning_rate': 0.33000000000000002}, mean: -1291869667.37033, std: 278434993.08017, params: {'n_estimators': 168, 'learning_rate': 0.33000000000000002}, mean: -1289890211.16179, std: 280525261.02336, params: {'n_estimators': 171, 'learning_rate': 0.33000000000000002}, mean: -1289197069.07641, std: 280708323.30905, params: {'n_estimators': 174, 'learning_rate': 0.33000000000000002}, mean: -1300151742.41748, std: 305339974.34615, params: {'n_estimators': 158, 'learning_rate': 0.33400000000000002}, mean: -1306129576.37077, std: 305633241.29177, params: {'n_estimators': 161, 'learning_rate': 0.33400000000000002}, mean: -1309813492.27055, std: 306077641.87939, params: {'