# GRIDSEARCHCV

In [1]:
# Importar os módulos

import sklearn
from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression

from sklearn.model_selection import GridSearchCV

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Versões dos módulos

!python --version
%reload_ext watermark
%watermark --iversions

Python 3.9.7
sklearn: 0.24.2



In [3]:
# Carregar o dataset

iris = load_iris()

In [4]:
# Dividir o dataset em treino e testes

xt, x_, yt, y_ = train_test_split(iris.data, iris.target, test_size=0.3, random_state=42)

### Exemplo I

In [5]:
# Criar um estimador

rf = RandomForestClassifier()

In [6]:
# Verificar os seus parametros

for vl in rf.get_params().keys():
  print(vl)

bootstrap
ccp_alpha
class_weight
criterion
max_depth
max_features
max_leaf_nodes
max_samples
min_impurity_decrease
min_impurity_split
min_samples_leaf
min_samples_split
min_weight_fraction_leaf
n_estimators
n_jobs
oob_score
random_state
verbose
warm_start


In [7]:
# Criar uma variável com os parametros

parameters = {'max_depth':[1, 2, 3, 4, 5, 10], 
              'bootstrap': [True, False] }

In [8]:
# Criar o Gridsearch

grid = GridSearchCV(rf, parameters, cv=5, scoring='accuracy')

In [9]:
# Fazer o treinamento do modelo

grid.fit(xt, yt)

GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid={'bootstrap': [True, False],
                         'max_depth': [1, 2, 3, 4, 5, 10]},
             scoring='accuracy')

In [10]:
# Verificar o resultado do melhor parametro

grid.best_params_

{'bootstrap': True, 'max_depth': 1}

In [11]:
# Verificar o score do modelo

grid.score(x_, y_)

1.0

### Exemplo II

In [12]:
# Criar o estimador

knn = KNeighborsClassifier()

In [13]:
# Verificar o seus paramentros

for vl in knn.get_params().keys():
  print(vl)

algorithm
leaf_size
metric
metric_params
n_jobs
n_neighbors
p
weights


In [14]:
# Criar uma variável com os parametros

parameters = {'n_neighbors': [2, 3, 5, 10], 
              'leaf_size': [20, 25, 30],
              'weights': ['uniform', 'distance']}

In [15]:
# Criar o Gridsearch

grid = GridSearchCV(knn, parameters, cv=5, scoring='accuracy')

In [16]:
# Treinar o modelo

grid.fit(xt, yt)

GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'leaf_size': [20, 25, 30],
                         'n_neighbors': [2, 3, 5, 10],
                         'weights': ['uniform', 'distance']},
             scoring='accuracy')

In [17]:
#  Verificar o resultado do melhor parametro

grid.best_params_

{'leaf_size': 20, 'n_neighbors': 2, 'weights': 'distance'}

In [18]:
# Verificar o score do modelo

grid.score(x_, y_)

1.0

### Exemplo III

In [19]:
# Criar os parametros para RF

RF_parameters = {'RF__max_depth':[1, 2, 3, 4, 5, 10], 
                 'RF__bootstrap': [True, False] }

In [20]:
# Cria os parametros para knn

knn_parameters = {'knn__n_neighbors': [2, 3, 5, 10], 
                 'knn__leaf_size': [20, 25, 30],
                 'knn__weights': ['uniform', 'distance']}

In [21]:
# Cria uma variável com os estimadores

estimadores = [('knn', KNeighborsClassifier(), knn_parameters ),
               ('RF', RandomForestClassifier(), RF_parameters ) ]

In [22]:
# Loop nos estimadores
for nome, estimador, parametros in estimadores:

  # Cria o pipeline
  pipe = Pipeline([('mms', MinMaxScaler()), 
                   (nome, estimador)] )
  
  # Cria o gridserch
  grid = GridSearchCV(pipe, param_grid=parametros, scoring='accuracy', cv=5)
  
  # Treina o modelo
  grid.fit(xt, yt)

  print(nome)
  print(grid.best_params_)
  print(grid.score(x_, y_))
  print()

knn
{'knn__leaf_size': 20, 'knn__n_neighbors': 3, 'knn__weights': 'uniform'}
1.0

RF
{'RF__bootstrap': True, 'RF__max_depth': 1}
1.0



### Exemplo IV

In [23]:
# Criar os parametros

parameters = [{'knn__n_neighbors': [2, 3, 5, 10], 
               'knn__leaf_size': [20, 25, 30],
               'knn__weights': ['uniform', 'distance']},
              {'RF__max_depth':[1, 2, 3, 4, 5, 10], 
                 'RF__bootstrap': [True, False] } ]

In [24]:
# Cria uma variável com os estimadores

estimadores = [('knn', KNeighborsClassifier()),
               ('RF', RandomForestClassifier()) ]

nome, estimador = zip(*estimadores)

In [25]:
# Loop nos estimadores
for nome, estimador, parametro in zip(nome, estimador, parameters):
  print(nome)

  # Cria o pipeline
  pipe = Pipeline([('mms', MinMaxScaler()), 
                   (nome, estimador)] )
  
  # Cria o gridserch
  grid = GridSearchCV(pipe, param_grid=parametro, scoring='accuracy', cv=5)
  
  # Treina o modelo
  grid.fit(xt, yt)

  print(nome)
  print(grid.best_params_)
  print(grid.score(x_, y_))
  print()

knn
knn
{'knn__leaf_size': 20, 'knn__n_neighbors': 3, 'knn__weights': 'uniform'}
1.0

RF
RF
{'RF__bootstrap': True, 'RF__max_depth': 3}
1.0

