In [9]:
import pandas as pd
import xgboost as xgb


from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel, RationalQuadratic
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.utils.fixes import loguniform
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV

from scipy.stats import uniform


In [2]:
df_concreto = pd.read_csv("./Bancos_utilizados/Bancos_apos_filtragem/train.csv")

In [3]:
X = df_concreto.drop(['Resistência'], axis=1)
y = df_concreto['Resistência']

In [4]:
kf = KFold(10, shuffle=True, random_state=451)

## XGBoost

In [5]:
my_pipeline = Pipeline(steps=[('preprocessor', StandardScaler()),
                              ('model', xgb.XGBRegressor())
                             ])

parameters = {'model__learning_rate': uniform(0,1),
              'model__max_depth': [1,2,3,4,5,6,7,8,9,10],
              'model__min_child_weight': uniform(0,10), 
              'model__subsample': uniform(0,1), 
              'model__colsample_bytree': uniform(0,1)} 


svr_grid = RandomizedSearchCV(estimator = my_pipeline,
                            param_distributions = parameters,
                            cv = kf,
                            n_iter = 100,
                            scoring='r2')

svr_grid.fit(X,y)

print(svr_grid.best_params_)

{'model__colsample_bytree': 0.9979435131354824, 'model__learning_rate': 0.373491988716558, 'model__max_depth': 5, 'model__min_child_weight': 5.642688303759642, 'model__subsample': 0.8000064806126039}


## SVR

In [10]:
my_pipeline = Pipeline(steps=[('preprocessor', StandardScaler()),
                              ('model', SVR(kernel='rbf'))
                             ])

parameters = { 'model__C': loguniform(1e-3, 1e3),
'model__gamma': loguniform(1e-3, 1e3)}


svr_grid = RandomizedSearchCV(estimator = my_pipeline,
                            param_distributions = parameters,
                            cv = kf,
                            n_iter = 100,
                            scoring='r2')

svr_grid.fit(X,y)

print(svr_grid.best_params_)

{'model__C': 27.192586103694374, 'model__gamma': 0.5215952561257536}


## GPR

In [None]:
kernel = ConstantKernel() * RationalQuadratic()
my_pipeline = Pipeline(steps=[('preprocessor', StandardScaler()),
                              ('model', GaussianProcessRegressor(kernel=kernel,random_state=0))
                             ])

parameters = { 'model__alpha': loguniform(1e-10, 1e3)}


svr_grid = RandomizedSearchCV(estimator = my_pipeline,
                            param_distributions = parameters,
                            cv = kf,
                            n_iter = 100,
                            scoring='r2')

svr_grid.fit(X,y)

print(svr_grid.best_params_)