In [36]:
# Let us make the imports for the entire code

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import RandomizedSearchCV
import time
# Enable to start counting processing time
# start = time.time()

In [37]:

# Here we read and organize CSV data

t2 = pd.read_csv('t2_OkumuraHata_Modificado', delimiter='\t')
t3 = pd.read_csv('t3_OkumuraHata_Modificado', delimiter='\t')
min_download = pd.read_csv('file1.csv', delimiter=',')

# Guarantee that we utilize only seeds present in both datasets
t2 = t2[t2.nRun.isin(t3.nRun)]
t3 = t3[t3.nRun.isin(t2.nRun)]
t2 = t2.reset_index(drop=True)
t3 = t3.reset_index(drop=True)




# Combining datasets
data = t2
data = data.drop(['targetCellId', 'downloadTime', 'rxBytes'], axis=1)
data['downloadTimeT2'] = t2.downloadTime
data['downloadTimeT3'] = t3.downloadTime
data['downloadTime'] = min_download.downloadTimeT2
data['rxBytesT2'] = t2.rxBytes
data['rxBytesT3'] = t3.rxBytes

In [38]:
# Data Pre-processing


# Sets data as inputs and labels
x_train = data[['rsrp1','rsrq1','rsrp2','rsrq2','rsrp3','rsrq3','previousrsrp1','previousrsrq1','previousrsrp2','previousrsrq2','previousrsrp3','previousrsrq3']]
x_train = x_train.values
y_train = data[['downloadTime']] 
y_train = y_train.values

# Scaling data
scaler_x = MinMaxScaler(feature_range=(0, 1))
previsores = scaler_x.fit_transform(x_train)
scaler_y = MinMaxScaler(feature_range=(0, 1))
label = scaler_y.fit_transform(y_train)

In [39]:
# Defining the method and the random search

# MLP
from sklearn.neural_network import MLPRegressor

clf = MLPRegressor()



# Grid containing the possible parameters to be combined
param_grid = {

                'learning_rate_init': list(np.logspace(np.log10(0.005), np.log10(0.5), base = 10, num = 500)),
                'hidden_layer_sizes': list(range(1,23,1)),
                'max_iter': list(range(1000,4500,100)),
                'learning_rate' : ['invscaling'],
                'activation' : ['tanh', 'logistic'],
                #'solver' : ['lbfgs'],
                'alpha': [0.001],
             }



# Defining random search
rs_clf = RandomizedSearchCV(clf, param_grid, n_iter=50,
                        n_jobs=None, verbose=2, cv=2,
                        scoring='neg_mean_absolute_error', refit=False, random_state=42)
print("Pesquisa Aleatória...")
inicio = time.time()


# Fitting 
rs_clf.fit(x_train, y_train.ravel())
print("Tempo:", time.time() - inicio)
   
    
# Printing of the best parameters
best_score = rs_clf.best_score_
best_params = rs_clf.best_params_
print("Melhor Pontuação: {}".format(best_score))
print("Melhores parâmetros: ")
for param_name in sorted(best_params.keys()):
    print('%s = %r,' % (param_name, best_params[param_name]))

Pesquisa Aleatória...
Fitting 2 folds for each of 50 candidates, totalling 100 fits
[CV] max_iter=2800, learning_rate_init=0.4353612424619958, learning_rate=invscaling, hidden_layer_sizes=7, alpha=0.001, activation=tanh 
[CV]  max_iter=2800, learning_rate_init=0.4353612424619958, learning_rate=invscaling, hidden_layer_sizes=7, alpha=0.001, activation=tanh, total=   0.1s
[CV] max_iter=2800, learning_rate_init=0.4353612424619958, learning_rate=invscaling, hidden_layer_sizes=7, alpha=0.001, activation=tanh 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV]  max_iter=2800, learning_rate_init=0.4353612424619958, learning_rate=invscaling, hidden_layer_sizes=7, alpha=0.001, activation=tanh, total=   0.1s
[CV] max_iter=4000, learning_rate_init=0.025140436274712397, learning_rate=invscaling, hidden_layer_sizes=17, alpha=0.001, activation=logistic 
[CV]  max_iter=4000, learning_rate_init=0.025140436274712397, learning_rate=invscaling, hidden_layer_sizes=17, alpha=0.001, activation=logistic, total=   0.2s
[CV] max_iter=4000, learning_rate_init=0.025140436274712397, learning_rate=invscaling, hidden_layer_sizes=17, alpha=0.001, activation=logistic 
[CV]  max_iter=4000, learning_rate_init=0.025140436274712397, learning_rate=invscaling, hidden_layer_sizes=17, alpha=0.001, activation=logistic, total=   0.2s
[CV] max_iter=2700, learning_rate_init=0.05985856717094833, learning_rate=invscaling, hidden_layer_sizes=8, alpha=0.001, activation=tanh 
[CV]  max_iter=2700, learning_rate_init=0.05985856717094833, learning_rate=invscaling, hidden_layer_size

[CV]  max_iter=1000, learning_rate_init=0.024909487596025796, learning_rate=invscaling, hidden_layer_sizes=3, alpha=0.001, activation=tanh, total=   0.5s
[CV] max_iter=1000, learning_rate_init=0.024909487596025796, learning_rate=invscaling, hidden_layer_sizes=3, alpha=0.001, activation=tanh 
[CV]  max_iter=1000, learning_rate_init=0.024909487596025796, learning_rate=invscaling, hidden_layer_sizes=3, alpha=0.001, activation=tanh, total=   0.5s
[CV] max_iter=2500, learning_rate_init=0.21991587333251117, learning_rate=invscaling, hidden_layer_sizes=19, alpha=0.001, activation=tanh 
[CV]  max_iter=2500, learning_rate_init=0.21991587333251117, learning_rate=invscaling, hidden_layer_sizes=19, alpha=0.001, activation=tanh, total=   0.1s
[CV] max_iter=2500, learning_rate_init=0.21991587333251117, learning_rate=invscaling, hidden_layer_sizes=19, alpha=0.001, activation=tanh 
[CV]  max_iter=2500, learning_rate_init=0.21991587333251117, learning_rate=invscaling, hidden_layer_sizes=19, alpha=0.001

[CV]  max_iter=1100, learning_rate_init=0.07333354331719838, learning_rate=invscaling, hidden_layer_sizes=7, alpha=0.001, activation=logistic, total=   0.1s
[CV] max_iter=1100, learning_rate_init=0.07333354331719838, learning_rate=invscaling, hidden_layer_sizes=7, alpha=0.001, activation=logistic 
[CV]  max_iter=1100, learning_rate_init=0.07333354331719838, learning_rate=invscaling, hidden_layer_sizes=7, alpha=0.001, activation=logistic, total=   0.1s
[CV] max_iter=1000, learning_rate_init=0.41190936656998023, learning_rate=invscaling, hidden_layer_sizes=9, alpha=0.001, activation=tanh 
[CV]  max_iter=1000, learning_rate_init=0.41190936656998023, learning_rate=invscaling, hidden_layer_sizes=9, alpha=0.001, activation=tanh, total=   0.0s
[CV] max_iter=1000, learning_rate_init=0.41190936656998023, learning_rate=invscaling, hidden_layer_sizes=9, alpha=0.001, activation=tanh 
[CV]  max_iter=1000, learning_rate_init=0.41190936656998023, learning_rate=invscaling, hidden_layer_sizes=9, alpha=

[CV] max_iter=2300, learning_rate_init=0.0915155837810306, learning_rate=invscaling, hidden_layer_sizes=16, alpha=0.001, activation=tanh 
[CV]  max_iter=2300, learning_rate_init=0.0915155837810306, learning_rate=invscaling, hidden_layer_sizes=16, alpha=0.001, activation=tanh, total=   0.0s
[CV] max_iter=2300, learning_rate_init=0.0915155837810306, learning_rate=invscaling, hidden_layer_sizes=16, alpha=0.001, activation=tanh 
[CV]  max_iter=2300, learning_rate_init=0.0915155837810306, learning_rate=invscaling, hidden_layer_sizes=16, alpha=0.001, activation=tanh, total=   0.0s
[CV] max_iter=4100, learning_rate_init=0.35865874722328056, learning_rate=invscaling, hidden_layer_sizes=19, alpha=0.001, activation=tanh 
[CV]  max_iter=4100, learning_rate_init=0.35865874722328056, learning_rate=invscaling, hidden_layer_sizes=19, alpha=0.001, activation=tanh, total=   0.1s
[CV] max_iter=4100, learning_rate_init=0.35865874722328056, learning_rate=invscaling, hidden_layer_sizes=19, alpha=0.001, act

[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:   22.0s finished


In [40]:
# Defining the method and the random search

# Random Forest 
from sklearn.ensemble import RandomForestRegressor

clf = RandomForestRegressor()



# Grid containing the possible parameters to be combined
param_grid = {
                'max_depth': [4, 5, 6, 7, 8],
                'max_samples': [0.5, 0.6, 0.7, 0.8, 0.85],
                'max_features': [0.3, 0.4, 0.5, 0.6, 0.7],
                'criterion' : ['mse'],
                'n_estimators': list(range(10,210,5)),
             }




# Defining random search
rs_clf = RandomizedSearchCV(clf, param_grid, n_iter=50,
                        n_jobs=1, verbose=2, cv=2,
                        scoring='neg_mean_absolute_error', refit=False, random_state=42)
print("Pesquisa Aleatória...")
inicio = time.time()



# Fitting 
rs_clf.fit(x_train, y_train.ravel())
print("Tempo:", time.time() - inicio)
  
    
    
# Printing of the best parameters
best_score = rs_clf.best_score_
best_params = rs_clf.best_params_
print("Melhor Pontuação: {}".format(best_score))
print("Melhores parâmetros: ")
for param_name in sorted(best_params.keys()):
    print('%s = %r,' % (param_name, best_params[param_name]))


Pesquisa Aleatória...
Fitting 2 folds for each of 50 candidates, totalling 100 fits
[CV] n_estimators=110, max_samples=0.6, max_features=0.7, max_depth=4, criterion=mse 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s


[CV]  n_estimators=110, max_samples=0.6, max_features=0.7, max_depth=4, criterion=mse, total=   0.2s
[CV] n_estimators=110, max_samples=0.6, max_features=0.7, max_depth=4, criterion=mse 
[CV]  n_estimators=110, max_samples=0.6, max_features=0.7, max_depth=4, criterion=mse, total=   0.2s
[CV] n_estimators=70, max_samples=0.85, max_features=0.6, max_depth=7, criterion=mse 
[CV]  n_estimators=70, max_samples=0.85, max_features=0.6, max_depth=7, criterion=mse, total=   0.1s
[CV] n_estimators=70, max_samples=0.85, max_features=0.6, max_depth=7, criterion=mse 
[CV]  n_estimators=70, max_samples=0.85, max_features=0.6, max_depth=7, criterion=mse, total=   0.1s
[CV] n_estimators=70, max_samples=0.7, max_features=0.3, max_depth=7, criterion=mse 
[CV]  n_estimators=70, max_samples=0.7, max_features=0.3, max_depth=7, criterion=mse, total=   0.1s
[CV] n_estimators=70, max_samples=0.7, max_features=0.3, max_depth=7, criterion=mse 
[CV]  n_estimators=70, max_samples=0.7, max_features=0.3, max_depth=

[CV]  n_estimators=145, max_samples=0.8, max_features=0.6, max_depth=6, criterion=mse, total=   0.2s
[CV] n_estimators=145, max_samples=0.8, max_features=0.6, max_depth=6, criterion=mse 
[CV]  n_estimators=145, max_samples=0.8, max_features=0.6, max_depth=6, criterion=mse, total=   0.2s
[CV] n_estimators=85, max_samples=0.85, max_features=0.7, max_depth=4, criterion=mse 
[CV]  n_estimators=85, max_samples=0.85, max_features=0.7, max_depth=4, criterion=mse, total=   0.1s
[CV] n_estimators=85, max_samples=0.85, max_features=0.7, max_depth=4, criterion=mse 
[CV]  n_estimators=85, max_samples=0.85, max_features=0.7, max_depth=4, criterion=mse, total=   0.1s
[CV] n_estimators=40, max_samples=0.5, max_features=0.7, max_depth=5, criterion=mse 
[CV]  n_estimators=40, max_samples=0.5, max_features=0.7, max_depth=5, criterion=mse, total=   0.1s
[CV] n_estimators=40, max_samples=0.5, max_features=0.7, max_depth=5, criterion=mse 
[CV]  n_estimators=40, max_samples=0.5, max_features=0.7, max_depth=

[CV]  n_estimators=70, max_samples=0.5, max_features=0.6, max_depth=6, criterion=mse, total=   0.1s
[CV] n_estimators=25, max_samples=0.85, max_features=0.4, max_depth=5, criterion=mse 
[CV]  n_estimators=25, max_samples=0.85, max_features=0.4, max_depth=5, criterion=mse, total=   0.0s
[CV] n_estimators=25, max_samples=0.85, max_features=0.4, max_depth=5, criterion=mse 
[CV]  n_estimators=25, max_samples=0.85, max_features=0.4, max_depth=5, criterion=mse, total=   0.0s
[CV] n_estimators=200, max_samples=0.6, max_features=0.5, max_depth=5, criterion=mse 
[CV]  n_estimators=200, max_samples=0.6, max_features=0.5, max_depth=5, criterion=mse, total=   0.3s
[CV] n_estimators=200, max_samples=0.6, max_features=0.5, max_depth=5, criterion=mse 
[CV]  n_estimators=200, max_samples=0.6, max_features=0.5, max_depth=5, criterion=mse, total=   0.3s
[CV] n_estimators=190, max_samples=0.8, max_features=0.5, max_depth=6, criterion=mse 
[CV]  n_estimators=190, max_samples=0.8, max_features=0.5, max_dep

[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:   15.0s finished


In [41]:
# Defining the method and the random search

# Gradient Boosting Machine 
from sklearn.ensemble import GradientBoostingRegressor

clf = GradientBoostingRegressor()



# Grid containing the possible parameters to be combined
param_grid = {
    
                'max_depth': [4, 5, 6, 7, 8, 9],
                'learning_rate': list(np.logspace(np.log10(0.005), np.log10(0.5), base = 10, num = 480)),
                'subsample': [0.5, 0.6, 0.7, 0.8],
                'max_features': [0.2, 0.3, 0.4, 0.5, 0.6, 0.7],
                'criterion' : ['mse'],
                'n_estimators': list(range(10,190,5)),
             }


# Defining random search
rs_clf = RandomizedSearchCV(clf, param_grid, n_iter=50,
                        n_jobs=1, verbose=2, cv=2,
                        scoring='neg_mean_absolute_error', refit=False, random_state=42)
print("Pesquisa Aleatória...")
inicio = time.time()


# Fitting 
rs_clf.fit(x_train, y_train.ravel())
print("Tempo:", time.time() - inicio)
    

# Printing of the best parameters
best_score = rs_clf.best_score_
best_params = rs_clf.best_params_
print("Melhor Pontuação: {}".format(best_score))
print("Melhores parâmetros: ")
for param_name in sorted(best_params.keys()):
    print('%s = %r,' % (param_name, best_params[param_name]))


Pesquisa Aleatória...
Fitting 2 folds for each of 50 candidates, totalling 100 fits
[CV] subsample=0.7, n_estimators=95, max_features=0.4, max_depth=4, learning_rate=0.3062149371677256, criterion=mse 
[CV]  subsample=0.7, n_estimators=95, max_features=0.4, max_depth=4, learning_rate=0.3062149371677256, criterion=mse, total=   0.1s
[CV] subsample=0.7, n_estimators=95, max_features=0.4, max_depth=4, learning_rate=0.3062149371677256, criterion=mse 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV]  subsample=0.7, n_estimators=95, max_features=0.4, max_depth=4, learning_rate=0.3062149371677256, criterion=mse, total=   0.1s
[CV] subsample=0.5, n_estimators=145, max_features=0.7, max_depth=9, learning_rate=0.30917312603272035, criterion=mse 
[CV]  subsample=0.5, n_estimators=145, max_features=0.7, max_depth=9, learning_rate=0.30917312603272035, criterion=mse, total=   0.3s
[CV] subsample=0.5, n_estimators=145, max_features=0.7, max_depth=9, learning_rate=0.30917312603272035, criterion=mse 
[CV]  subsample=0.5, n_estimators=145, max_features=0.7, max_depth=9, learning_rate=0.30917312603272035, criterion=mse, total=   0.2s
[CV] subsample=0.7, n_estimators=80, max_features=0.3, max_depth=7, learning_rate=0.3931749776071209, criterion=mse 
[CV]  subsample=0.7, n_estimators=80, max_features=0.3, max_depth=7, learning_rate=0.3931749776071209, criterion=mse, total=   0.1s
[CV] subsample=0.7, n_estimators=80, max_features=0.3, max_depth=7, learning_rate=0.3931749776071209, criterion=m

[CV]  subsample=0.8, n_estimators=20, max_features=0.2, max_depth=9, learning_rate=0.13139965394214864, criterion=mse, total=   0.0s
[CV] subsample=0.8, n_estimators=20, max_features=0.2, max_depth=9, learning_rate=0.13139965394214864, criterion=mse 
[CV]  subsample=0.8, n_estimators=20, max_features=0.2, max_depth=9, learning_rate=0.13139965394214864, criterion=mse, total=   0.0s
[CV] subsample=0.6, n_estimators=65, max_features=0.5, max_depth=4, learning_rate=0.009162585094546236, criterion=mse 
[CV]  subsample=0.6, n_estimators=65, max_features=0.5, max_depth=4, learning_rate=0.009162585094546236, criterion=mse, total=   0.1s
[CV] subsample=0.6, n_estimators=65, max_features=0.5, max_depth=4, learning_rate=0.009162585094546236, criterion=mse 
[CV]  subsample=0.6, n_estimators=65, max_features=0.5, max_depth=4, learning_rate=0.009162585094546236, criterion=mse, total=   0.1s
[CV] subsample=0.6, n_estimators=115, max_features=0.7, max_depth=4, learning_rate=0.14746827684708114, criter

[CV]  subsample=0.8, n_estimators=180, max_features=0.7, max_depth=7, learning_rate=0.01320328215026029, criterion=mse, total=   0.4s
[CV] subsample=0.8, n_estimators=185, max_features=0.2, max_depth=4, learning_rate=0.016629939620340584, criterion=mse 
[CV]  subsample=0.8, n_estimators=185, max_features=0.2, max_depth=4, learning_rate=0.016629939620340584, criterion=mse, total=   0.1s
[CV] subsample=0.8, n_estimators=185, max_features=0.2, max_depth=4, learning_rate=0.016629939620340584, criterion=mse 
[CV]  subsample=0.8, n_estimators=185, max_features=0.2, max_depth=4, learning_rate=0.016629939620340584, criterion=mse, total=   0.2s
[CV] subsample=0.7, n_estimators=45, max_features=0.3, max_depth=7, learning_rate=0.08047307355151191, criterion=mse 
[CV]  subsample=0.7, n_estimators=45, max_features=0.3, max_depth=7, learning_rate=0.08047307355151191, criterion=mse, total=   0.1s
[CV] subsample=0.7, n_estimators=45, max_features=0.3, max_depth=7, learning_rate=0.08047307355151191, cr

[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:   13.9s finished


In [42]:
# Defining the method and the random search

# LightGBM
from lightgbm import LGBMRegressor
#import lightgbm

clf = LGBMRegressor()


# Grid containing the possible parameters to be combined
param_grid = {
                'max_depth': [8,9,10,11,12,13],
                'learning_rate': list(np.logspace(np.log10(0.005), np.log10(0.5), base = 10, num = 800)),
                'bagging_fraction': [0.7],
                'feature_fraction': [0.6],
                'objective' :['regression_l1'],
                'eval_metric': ['mae'],
                'n_estimators': [45],
                'min_data_in_leaf': [28],
             }


# Defining random search
rs_clf = RandomizedSearchCV(clf, param_grid, n_iter=50,
                        n_jobs=1, verbose=2, cv=2,
                        scoring='neg_mean_absolute_error', refit=False, random_state=42)
print("Pesquisa Aleatória...")
inicio = time.time()


# Fitting 
rs_clf.fit(x_train, y_train.ravel())
print("Tempo:", time.time() - inicio)
    
    
# Printing of the best parameters
best_score = rs_clf.best_score_
best_params = rs_clf.best_params_
print("Melhor Pontuação: {}".format(best_score))
print("Melhores parâmetros: ")
for param_name in sorted(best_params.keys()):
    print('%s = %r,' % (param_name, best_params[param_name]))

Pesquisa Aleatória...
Fitting 2 folds for each of 50 candidates, totalling 100 fits
[CV] objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=10, learning_rate=0.008846668759298213, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7 
[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=10, learning_rate=0.008846668759298213, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7, total=   0.0s
[CV] objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=10, learning_rate=0.008846668759298213, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7 
[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=10, learning_rate=0.008846668759298213, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7, total=   0.0s

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s



[CV] objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=12, learning_rate=0.12683097278788996, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7 
[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=12, learning_rate=0.12683097278788996, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7, total=   0.0s
[CV] objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=12, learning_rate=0.12683097278788996, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7 
[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=12, learning_rate=0.12683097278788996, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7, total=   0.0s
[CV] objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=8, learning_rate=0.09344573647661855, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7 
[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=8

[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=12, learning_rate=0.04599118875500305, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7, total=   0.0s
[CV] objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=12, learning_rate=0.04599118875500305, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7 
[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=12, learning_rate=0.04599118875500305, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7, total=   0.0s
[CV] objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=13, learning_rate=0.006188505397311934, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7 
[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=13, learning_rate=0.006188505397311934, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7, total=   0.0s
[CV] objective=regression_l1, n_estimators=45, min_data_in_leaf

[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=10, learning_rate=0.03019577805857187, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7, total=   0.0s
[CV] objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=9, learning_rate=0.07420519911228642, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7 
[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=9, learning_rate=0.07420519911228642, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7, total=   0.0s
[CV] objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=9, learning_rate=0.07420519911228642, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7 
[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=9, learning_rate=0.07420519911228642, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7, total=   0.0s
[CV] objective=regression_l1, n_estimators=45, min_data_in_leaf=28, m

[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=12, learning_rate=0.008745275766350964, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7, total=   0.0s
[CV] objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=12, learning_rate=0.008745275766350964, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7 
[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=12, learning_rate=0.008745275766350964, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7, total=   0.0s
[CV] objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=9, learning_rate=0.02384064161560107, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7 
[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=9, learning_rate=0.02384064161560107, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7, total=   0.0s
[CV] objective=regression_l1, n_estimators=45, min_data_in_leaf=

[CV]  objective=regression_l1, n_estimators=45, min_data_in_leaf=28, max_depth=9, learning_rate=0.029507587862705346, feature_fraction=0.6, eval_metric=mae, bagging_fraction=0.7, total=   0.0s
Tempo: 2.8213775157928467
Melhor Pontuação: -0.15634682456365362
Melhores parâmetros: 
bagging_fraction = 0.7,
eval_metric = 'mae',
feature_fraction = 0.6,
learning_rate = 0.15788621811594675,
max_depth = 9,
min_data_in_leaf = 28,
n_estimators = 45,
objective = 'regression_l1',


[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    2.7s finished


In [43]:
# Defining the method and the random search

# XGBoost 
import xgboost as xgb

clf = xgb.XGBRegressor()


# Grid containing the possible parameters to be combined
param_grid = {
    
        'max_depth': [4, 5, 6, 7, 8, 9],
        'learning_rate': list(np.logspace(np.log10(0.005), np.log10(0.5), base = 10, num = 600)),
        'colsample_bytree': [0.3, 0.4, 0.5, 0.6, 0.7, 0.8],
        'colsample_bylevel': [0.4, 0.5, 0.6, 0.7,0.8],
        'booster' : ['gbtree'],
        'objective' :['reg:logistic'],
        'eval_metric': ['mae'],
        'colsample_bynode': [0.4, 0.5, 0.6, 0.7, 0.8],
        'n_estimators': list(range(10,350,5)),
    }


# Defining random search
rs_clf = RandomizedSearchCV(clf, param_grid, n_iter=50,
                        n_jobs=1, verbose=2, cv=2,
                        scoring='neg_mean_absolute_error', refit=False, random_state=42)
print("Pesquisa Aleatória...")
inicio = time.time()



# Fitting 
rs_clf.fit(x_train, y_train)
print("Tempo:", time.time() - inicio)



# Printing of the best parameters    
best_score = rs_clf.best_score_
best_params = rs_clf.best_params_
print("Melhor Pontuação: {}".format(best_score))
print("Melhores parâmetros: ")
for param_name in sorted(best_params.keys()):
    print('%s = %r,' % (param_name, best_params[param_name]))

Pesquisa Aleatória...
Fitting 2 folds for each of 50 candidates, totalling 100 fits
[CV] objective=reg:logistic, n_estimators=110, max_depth=4, learning_rate=0.008630416875102158, eval_metric=mae, colsample_bytree=0.5, colsample_bynode=0.8, colsample_bylevel=0.6, booster=gbtree 
[CV]  objective=reg:logistic, n_estimators=110, max_depth=4, learning_rate=0.008630416875102158, eval_metric=mae, colsample_bytree=0.5, colsample_bynode=0.8, colsample_bylevel=0.6, booster=gbtree, total=   0.1s
[CV] objective=reg:logistic, n_estimators=110, max_depth=4, learning_rate=0.008630416875102158, eval_metric=mae, colsample_bytree=0.5, colsample_bynode=0.8, colsample_bylevel=0.6, booster=gbtree 
[CV]  objective=reg:logistic, n_estimators=110, max_depth=4, learning_rate=0.008630416875102158, eval_metric=mae, colsample_bytree=0.5, colsample_bynode=0.8, colsample_bylevel=0.6, booster=gbtree, total=   0.0s
[CV] objective=reg:logistic, n_estimators=70, max_depth=4, learning_rate=0.030451667056341832, eval_me

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
xgboost.core.XGBoostError: [16:58:12] src/objective/regression_obj.cu:101: label must be in [0,1] for logistic regression

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s



[CV] objective=reg:logistic, n_estimators=340, max_depth=9, learning_rate=0.013376699446991864, eval_metric=mae, colsample_bytree=0.4, colsample_bynode=0.7, colsample_bylevel=0.7, booster=gbtree 
[CV]  objective=reg:logistic, n_estimators=340, max_depth=9, learning_rate=0.013376699446991864, eval_metric=mae, colsample_bytree=0.4, colsample_bynode=0.7, colsample_bylevel=0.7, booster=gbtree, total=   0.0s
[CV] objective=reg:logistic, n_estimators=220, max_depth=6, learning_rate=0.0383511170112182, eval_metric=mae, colsample_bytree=0.8, colsample_bynode=0.6, colsample_bylevel=0.7, booster=gbtree 
[CV]  objective=reg:logistic, n_estimators=220, max_depth=6, learning_rate=0.0383511170112182, eval_metric=mae, colsample_bytree=0.8, colsample_bynode=0.6, colsample_bylevel=0.7, booster=gbtree, total=   0.0s
[CV] objective=reg:logistic, n_estimators=220, max_depth=6, learning_rate=0.0383511170112182, eval_metric=mae, colsample_bytree=0.8, colsample_bynode=0.6, colsample_bylevel=0.7, booster=gbt

[CV]  objective=reg:logistic, n_estimators=65, max_depth=8, learning_rate=0.11514377270595029, eval_metric=mae, colsample_bytree=0.5, colsample_bynode=0.8, colsample_bylevel=0.7, booster=gbtree, total=   0.0s
[CV] objective=reg:logistic, n_estimators=280, max_depth=6, learning_rate=0.05944224631784197, eval_metric=mae, colsample_bytree=0.7, colsample_bynode=0.6, colsample_bylevel=0.5, booster=gbtree 
[CV]  objective=reg:logistic, n_estimators=280, max_depth=6, learning_rate=0.05944224631784197, eval_metric=mae, colsample_bytree=0.7, colsample_bynode=0.6, colsample_bylevel=0.5, booster=gbtree, total=   0.0s
[CV] objective=reg:logistic, n_estimators=280, max_depth=6, learning_rate=0.05944224631784197, eval_metric=mae, colsample_bytree=0.7, colsample_bynode=0.6, colsample_bylevel=0.5, booster=gbtree 
[CV]  objective=reg:logistic, n_estimators=280, max_depth=6, learning_rate=0.05944224631784197, eval_metric=mae, colsample_bytree=0.7, colsample_bynode=0.6, colsample_bylevel=0.5, booster=gbt

[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.3s finished
