## Preparação dos dados

In [14]:
import numpy as np

from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [15]:
import pickle
with open('credit.pkl', 'rb') as f:
    X_credit_train, y_credit_train, X_credit_test, y_credit_test = pickle.load(f)

In [3]:
X_credit_train.shape, y_credit_train.shape

((24435, 26), (24435,))

In [4]:
X_credit_test.shape, y_credit_test.shape

((8146, 26), (8146,))

In [16]:
X_credit = np.concatenate((X_credit_train, X_credit_test), axis = 0)
X_credit.shape

(32581, 26)

In [6]:
X_credit

array([[ 1.19308855e+00, -5.74015997e-02, -2.93499485e-01, ...,
        -2.22770397e+00, -7.51108864e-01, -6.91554445e-01],
       [ 1.19308855e+00, -5.74015997e-02, -2.93499485e-01, ...,
        -6.64057930e-01, -1.89206505e-01,  4.82840703e-02],
       [-8.38160758e-01, -5.74015997e-02, -2.93499485e-01, ...,
         9.44327625e-01,  1.02824861e+00, -4.44941606e-01],
       ...,
       [ 1.19308855e+00, -5.74015997e-02, -2.93499485e-01, ...,
         5.66278726e-01, -1.90571838e-03,  4.82840703e-02],
       [ 1.19308855e+00, -5.74015997e-02, -2.93499485e-01, ...,
        -4.64967089e-01, -7.51108864e-01, -9.38167283e-01],
       [-8.38160758e-01, -5.74015997e-02, -2.93499485e-01, ...,
        -2.22770397e+00,  1.30919979e+00, -4.44941606e-01]])

In [17]:
y_credit = np.concatenate((y_credit_train, y_credit_test), axis = 0)
y_credit.shape

(32581,)

In [8]:
y_credit

array([0, 0, 1, ..., 0, 0, 1], dtype=int64)

## Árvore de Decisão

In [9]:
parameters = {'criterion' : ["gini", "entropy"], 
             'splitter' : ["best", "random"],
             'min_samples_split' : [2, 5, 10],
             'min_samples_leaf' : [1, 5 ,10]}

In [10]:
grid_search = GridSearchCV(estimator = DecisionTreeClassifier(), param_grid = parameters, )
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_
melhor_estimativa = grid_search.best_estimator_

In [11]:
print(f'Melhores parametros: {melhores_parametros}')
print(f'Score: {melhor_resultado}')

Melhores parametros: {'criterion': 'entropy', 'min_samples_leaf': 10, 'min_samples_split': 10, 'splitter': 'random'}
Score: 0.9210275001594261


## Randon Forest

In [12]:
parameters = {'criterion': ['gini', 'entropy'],
              'n_estimators': [10, 50, 100],
              'min_samples_split': [2, 5, 10],
              'min_samples_leaf': [1, 5, 10]}

In [13]:
grid_search = GridSearchCV(estimator = RandomForestClassifier(), param_grid = parameters)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_

In [14]:
print(f'Melhores parametros: {melhores_parametros}')
print(f'Score: {melhor_resultado}')

Melhores parametros: {'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Score: 0.9343481698194447


## KNN

In [15]:
parameters = {'n_neighbors' : [3, 5, 10, 15],
              'p' : [1, 2]}

In [16]:
grid_search = GridSearchCV(estimator=KNeighborsClassifier(), param_grid=parameters)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_

In [17]:
print(f'Melhores parametros: {melhores_parametros}')
print(f'Score: {melhor_resultado}')

Melhores parametros: {'n_neighbors': 15, 'p': 1}
Score: 0.8980693549938288


## Regressão Logística

In [18]:
parameters = {'tol': [0.0001, 0.00001, 0.000001],
              'C': [1.0, 1.5, 2.0],
              'solver': ['lbfgs', 'sag', 'saga']}

In [19]:
grid_search = GridSearchCV(estimator=LogisticRegression(), param_grid=parameters)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_



In [20]:
print(f'Melhores parametros: {melhores_parametros}')
print(f'Score: {melhor_resultado}')

Melhores parametros: {'C': 1.0, 'solver': 'lbfgs', 'tol': 0.0001}
Score: 0.8666400092763951


## SVM

In [21]:
parameters = {'tol': [0.001, 0.0001, 0.00001],
              'C': [1.0, 1.5, 2.0],
              'kernel': ['rbf', 'linear', 'poly', 'sigmoid']}

In [22]:
grid_search = GridSearchCV(estimator=SVC(), param_grid=parameters)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_

In [23]:
print(f'Melhores parametros: {melhores_parametros}')
print(f'Score: {melhor_resultado}')

Melhores parametros: {'C': 2.0, 'kernel': 'rbf', 'tol': 0.001}
Score: 0.9147355977797315


## Redes Neurais

In [18]:
parameters = {'activation': ['logistic', 'tanh', 'relu'],
              'solver': ['sgd', 'adam'],
              'batch_size': [10, 56]}

In [19]:
grid_search = GridSearchCV(estimator=MLPClassifier(), param_grid=parameters)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_





In [21]:
print(f'Melhores parametros: {melhores_parametros}')
print(f'Score: {melhor_resultado}')

Melhores parametros: {'activation': 'logistic', 'batch_size': 10, 'solver': 'sgd'}
Score: 0.9190632414086668
