# Utilizando GridSearchCV para selecionar os melhores parametros

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, recall_score, precision_score

In [2]:
transacoes = pd.read_csv('creditcard.csv')

### Seprando X e Y

In [3]:
X = transacoes.drop('Class',axis=1)
y = transacoes.Class

In [4]:
# Retirando temporariamente os warnings do nosso código
import warnings
warnings.filterwarnings('ignore')

### Treino e teste

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0, stratify=y)

# Podemos ajustar manualmente os hiperparametros para nosso modelo

Faremos um for para testar cada parametro do nosso modelo e selecionar os melhores

Testando 'newton-cg','lbfgs','liblinear' para o parametro SOLVER e os valores [1,100,1000] para C

In [6]:
# Treino e teste
X_train2, X_valid, y_train2, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=0, stratify=y_train)


# Parametros
C = [1,100,1000]
solver = ['newton-cg','lbfgs','liblinear']


# For para testar cada parametro
for i in C:
    for j in solver:
        clf_RL3 = LogisticRegression(random_state=42,
                                solver=j,C=i
                                ).fit(X_train2, y_train2)
        y_pred_RL3 = clf_RL3.predict(X_valid)
        print(i,j,recall_score(y_valid,y_pred_RL3))

1 newton-cg 0.6086956521739131
1 lbfgs 0.6666666666666666
1 liblinear 0.6376811594202898
100 newton-cg 0.6086956521739131
100 lbfgs 0.7246376811594203
100 liblinear 0.6376811594202898
1000 newton-cg 0.6086956521739131
1000 lbfgs 0.6956521739130435
1000 liblinear 0.6376811594202898


# Podemos automatizar essa seleção utilizando o `GridSearchCV`

In [7]:
from sklearn.model_selection import GridSearchCV

### Definindo os parâmetros que queremos testar

In [11]:
parametros = {
    'C': [1,100,1000],
    'solver': ['newton-cg','lbfgs','liblinear'],
}

### Selecionando o modelo: Regressão Logística 

In [12]:
LogReg = LogisticRegression(random_state=42)

### Criando um novo classificador usando os parâmetros que escolhemos anteriormente

In [13]:
clf_GS = GridSearchCV(LogReg, parametros,
                     scoring='recall')

### Fazendo o fit 

In [14]:
clf_GS = clf_GS.fit(X,y)

## Visualizando os melhores parâmetros definidos pelo GridSearchCV

In [15]:
 clf_GS.best_params_

{'C': 100, 'solver': 'lbfgs'}

## Usando esse modelo para fazer as previsões

In [16]:
y_pred_GS = clf_GS.predict(X_test)

## Analisando atraves das metricas

In [17]:
# Matriz de confusão
confusion_matrix(y_test,y_pred_GS)

array([[85248,    47],
       [   46,   102]], dtype=int64)

In [18]:
# Recall
recall_score(y_test,y_pred_GS)

0.6891891891891891

In [19]:
# Precisão
precision_score(y_test,y_pred_GS)

0.6845637583892618

## Podemos tambem visualizar tudo que foi feito

In [20]:
resultados = pd.DataFrame(clf_GS.cv_results_)
resultados

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_solver,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,24.16476,4.142462,0.020953,0.00571,1,newton-cg,"{'C': 1, 'solver': 'newton-cg'}",0.79798,0.636364,0.428571,0.673469,0.459184,0.599114,0.137934,7
1,3.377363,0.169473,0.024169,0.002716,1,lbfgs,"{'C': 1, 'solver': 'lbfgs'}",0.959596,0.828283,0.581633,0.612245,0.153061,0.626964,0.274965,3
2,3.361018,0.846935,0.021702,0.000837,1,liblinear,"{'C': 1, 'solver': 'liblinear'}",0.919192,0.767677,0.459184,0.612245,0.234694,0.598598,0.238083,8
3,23.174236,3.130984,0.02274,0.003754,100,newton-cg,"{'C': 100, 'solver': 'newton-cg'}",0.777778,0.636364,0.438776,0.683673,0.469388,0.601196,0.128821,6
4,4.010283,0.572842,0.027197,0.005364,100,lbfgs,"{'C': 100, 'solver': 'lbfgs'}",0.949495,0.818182,0.561224,0.755102,0.316327,0.680066,0.220784,1
5,5.116999,1.678811,0.032029,0.009592,100,liblinear,"{'C': 100, 'solver': 'liblinear'}",0.919192,0.787879,0.44898,0.612245,0.336735,0.621006,0.213186,4
6,26.855522,4.246666,0.031442,0.013434,1000,newton-cg,"{'C': 1000, 'solver': 'newton-cg'}",0.79798,0.636364,0.438776,0.673469,0.469388,0.603195,0.133355,5
7,3.377253,0.145387,0.024232,0.002844,1000,lbfgs,"{'C': 1000, 'solver': 'lbfgs'}",0.959596,0.808081,0.591837,0.622449,0.204082,0.637209,0.254168,2
8,3.257097,0.552026,0.022652,0.000452,1000,liblinear,"{'C': 1000, 'solver': 'liblinear'}",0.919192,0.787879,0.459184,0.612245,0.183673,0.592435,0.256984,9
