In [11]:
import pickle
import numpy as np
import pandas as pd

from sklearn.model_selection import cross_val_score, KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [2]:
with open('credit.pkl', 'rb') as f:
    X_credit_train, y_credit_train, X_credit_test, y_credit_test = pickle.load(f)

In [4]:
X_credit = np.concatenate((X_credit_train, X_credit_test), axis=0)

In [5]:
X_credit.shape

(32581, 26)

In [6]:
y_credit = np.concatenate((y_credit_train, y_credit_test), axis=0)

In [7]:
y_credit.shape

(32581,)

In [8]:
resultados_arvore = []
resultados_random_forest = []
resultados_knn = []
resultados_logistica = []
resultados_svm = []
resultados_rede_neural = []

In [14]:
for i in range(30):
    kfold = KFold(n_splits=10, shuffle=True, random_state=i)
    
    arvore = DecisionTreeClassifier(criterion= 'entropy', min_samples_leaf= 10, min_samples_split= 10, splitter= 'random')
    scores = cross_val_score(arvore, X_credit, y_credit, cv=kfold)
    resultados_arvore.append(scores.mean())
    
    randon_forest = RandomForestClassifier(criterion= 'entropy', min_samples_leaf= 1, min_samples_split= 2, n_estimators= 50)
    scores = cross_val_score(randon_forest, X_credit, y_credit, cv=kfold)
    resultados_random_forest.append(scores.mean())
    
    knn = KNeighborsClassifier(n_neighbors= 15, p= 1)
    scores = cross_val_score(knn, X_credit, y_credit, cv=kfold)
    resultados_knn.append(scores.mean())
    
    logistica = LogisticRegression(C= 1.0, solver= 'lbfgs', tol= 0.0001)
    scores = cross_val_score(logistica, X_credit, y_credit, cv=kfold)
    resultados_logistica.append(scores.mean())
    
    svm = SVC(C= 2.0, kernel= 'rbf', tol= 0.001)
    scores = cross_val_score(svm, X_credit, y_credit, cv=kfold)
    resultados_svm.append(scores.mean())
    
    rede_neural = MLPClassifier(activation = 'relu', batch_size = 56, solver = 'adam')
    scores = cross_val_score(rede_neural, X_credit, y_credit, cv=kfold)
    resultados_rede_neural.append(scores.mean())















In [38]:
resultados = pd.DataFrame({'Random forest': resultados_random_forest,'KNN': resultados_knn,'Logistica': resultados_logistica,
                           'SVM': resultados_svm, 'Rede neural': resultados_rede_neural, 'Arvore': resultados_logistica})

In [46]:
resultados 

Unnamed: 0,Random forest,KNN,Logistica,SVM,Rede neural,Arvore
0,0.93398,0.899727,0.866487,0.914214,0.917099,0.920445
1,0.933704,0.899359,0.866057,0.914306,0.919861,0.919278
2,0.933704,0.900034,0.866364,0.914644,0.91937,0.92158
3,0.933704,0.89942,0.866211,0.914245,0.91713,0.91888
4,0.93312,0.899543,0.86664,0.914551,0.917958,0.916731
5,0.933704,0.900095,0.866426,0.914398,0.919493,0.920721
6,0.934195,0.900126,0.866394,0.914674,0.917651,0.919309
7,0.933642,0.900065,0.86661,0.914306,0.917774,0.920506
8,0.933918,0.899972,0.866303,0.914306,0.916853,0.921273
9,0.933612,0.899451,0.866916,0.914889,0.917467,0.920444


In [47]:
resultados.describe()

Unnamed: 0,Random forest,KNN,Logistica,SVM,Rede neural,Arvore
count,30.0,30.0,30.0,30.0,30.0,30.0
mean,0.933814,0.899748,0.866484,0.914464,0.918223,0.920051
std,0.000343,0.000329,0.0002,0.000237,0.000778,0.00106
min,0.93312,0.898959,0.866057,0.91406,0.916853,0.916731
25%,0.933619,0.899566,0.866341,0.914306,0.91782,0.919355
50%,0.93375,0.899742,0.866487,0.914413,0.918097,0.920337
75%,0.934003,0.899965,0.866632,0.914605,0.918603,0.920713
max,0.934502,0.900525,0.866916,0.91495,0.919861,0.921887


In [48]:
resultados.var()

Random forest    1.176210e-07
KNN              1.083169e-07
Logistica        3.990895e-08
SVM              5.637967e-08
Rede neural      6.051081e-07
Arvore           1.124050e-06
dtype: float64

In [49]:
#Coeficiente de variação
(resultados.std() / resultados.mean()) * 100

Random forest    0.036727
KNN              0.036579
Logistica        0.023056
SVM              0.025965
Rede neural      0.084717
Arvore           0.115234
dtype: float64