In [30]:
# Carregando dataset
from sklearn.datasets import load_wine
wine=load_wine()

In [31]:
# Obtendo dados X e y
X = wine.data[:,:] # Parametro de entrada
y = wine.target # Saída real

In [32]:
# Divisão em treino e teste
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.33,random_state=42)

In [33]:
# Carregando, fitando e predizendo com modelo Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
y_pred = rfc.predict(X_test)

In [34]:
# Carregando série de métrica: Acuracia, Recall e Precision
from sklearn.metrics import accuracy_score, recall_score, precision_score

rfc_acc = round(accuracy_score(y_test, y_pred), 6)
rfc_rec = round(recall_score(y_test, y_pred, average='weighted'), 6)
rfc_pre = round(precision_score(y_test, y_pred, average='weighted'), 6)

In [35]:
print('=== Random Forest Classifier ===')
print('Acurácia: ', rfc_acc)
print('Recall: ', rfc_rec)
print('Precision: ', rfc_pre)

=== Random Forest Classifier ===
Acurácia:  1.0
Recall:  1.0
Precision:  1.0


In [36]:
# Carregando, fitando e predizendo com modelo KNN
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(3)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [37]:
# Métricas para KNN
knn_acc = round(accuracy_score(y_test, y_pred), 6)
knn_rec = round(recall_score(y_test, y_pred, average='weighted'), 6)
knn_pre = round(precision_score(y_test, y_pred, average='weighted'), 6)

In [38]:
print('=== KNN ===')
print('Acurácia: ', knn_acc)
print('Recall: ', knn_rec)
print('Precision: ', knn_pre)

=== KNN ===
Acurácia:  0.694915
Recall:  0.694915
Precision:  0.698231


In [39]:
# Comparando
print('KNN vs Random Forest\n')
print('Classes: {0}\n'.format(wine.target_names))
print('Acurácia: {0} vs {1}'.format(knn_acc, rfc_acc))
print('Recall: {0} vs {1}'.format(knn_rec, rfc_rec))
print('Precision: {0} vs {1}'.format(knn_pre, rfc_pre))

KNN vs Random Forest

Classes: ['class_0' 'class_1' 'class_2']

Acurácia: 0.694915 vs 1.0
Recall: 0.694915 vs 1.0
Precision: 0.698231 vs 1.0


In [40]:
# Validação cruzada
from sklearn.model_selection import cross_val_score
cv_rfc = cross_val_score(rfc,X,y)
cv_knn = cross_val_score(knn,X,y)
print('\nValidação Cruzada:\n{0} vs \n{1} '.format(cv_knn, cv_rfc))

sum_cv_rfc = 0
for cv_score in cv_rfc:
    sum_cv_rfc += cv_score
    
print('\nResultado Random Forest: {0}'.format(sum_cv_rfc/5))

sum_cv_knn = 0
for cv_score in cv_knn:
    sum_cv_knn += cv_score
    
print('\nResultado KNN: {0}'.format(sum_cv_knn/5))


Validação Cruzada:
[0.63888889 0.69444444 0.66666667 0.65714286 0.85714286] vs 
[0.97222222 0.94444444 0.97222222 0.97142857 1.        ] 

Resultado Random Forest: 0.9720634920634922

Resultado KNN: 0.7028571428571428


In [41]:
# Buscando Hiperparametros
from sklearn.model_selection import GridSearchCV

# RFC
parameters = {'min_samples_split':(2,6)}
rfc_hps = GridSearchCV(rfc, parameters)
rfc_hps.fit(X,y)
print('Melhor valor para min_samples_split: {0}'.format(rfc_hps.best_params_['min_samples_split']))

Melhor valor para min_samples_split: 2


In [26]:
# KNN
parameters = {'n_neighbors':(1,20)}
knn_hps = GridSearchCV(knn, parameters)
knn_hps.fit(X,y)
print('Melhor valor para n_neighbors: {0}'.format(knn_hps.best_params_['n_neighbors']))

Melhor valor para n_neighbors: 1
