In [1]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# URL do dataset Wine
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'

# Nome das colunas em português BR
column_names_pt_br = [
    'classe',
    'alcool',
    'acido_malico',
    'cinzas',
    'alcalinidade_de_cinzas',
    'magnesio',
    'fenois_totais',
    'flavanoides',
    'fenois_nao_flavanoides',
    'proantocianinas',
    'intensidade_de_cor',
    'matiz',
    'od280_od315_de_vinhos_diluidos',
    'prolina'
]

# Ler o arquivo CSV com as colunas especificadas, definindo a coluna 'classe' como object
vinhos = pd.read_csv(url, names=column_names_pt_br, dtype={'classe': object})

# Separação das variáveis
X = vinhos.drop('classe', axis=1)
y = vinhos['classe']

# Função para realizar k-fold cross-validation e avaliar KNN com diferentes valores de k
def avaliar_knn_kfold(random_state):
    kf = KFold(n_splits=10, shuffle=True, random_state=random_state)
    accuracies_k3 = []
    accuracies_k5 = []

    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # KNN com k=3
        knn3 = KNeighborsClassifier(n_neighbors=3)
        knn3.fit(X_train, y_train)
        y_pred_knn3 = knn3.predict(X_test)
        accuracy_knn3 = accuracy_score(y_test, y_pred_knn3)
        accuracies_k3.append(accuracy_knn3)

        # KNN com k=5
        knn5 = KNeighborsClassifier(n_neighbors=5)
        knn5.fit(X_train, y_train)
        y_pred_knn5 = knn5.predict(X_test)
        accuracy_knn5 = accuracy_score(y_test, y_pred_knn5)
        accuracies_k5.append(accuracy_knn5)

    # Resultados médios e desvios padrões
    mean_accuracy_k3 = np.mean(accuracies_k3)
    std_accuracy_k3 = np.std(accuracies_k3)
    mean_accuracy_k5 = np.mean(accuracies_k5)
    std_accuracy_k5 = np.std(accuracies_k5)

    print(f'Random State: {random_state}')
    print(f'k=3: Mean Accuracy = {mean_accuracy_k3}, Std = {std_accuracy_k3}')
    print(f'k=5: Mean Accuracy = {mean_accuracy_k5}, Std = {std_accuracy_k5}')
    print('-' * 40)

# Avaliar com diferentes random_states
random_states = [42, 17, 24]
for state in random_states:
    avaliar_knn_kfold(state)


Random State: 42
k=3: Mean Accuracy = 0.70359477124183, Std = 0.13918691592617136
k=5: Mean Accuracy = 0.6640522875816993, Std = 0.10007472964927165
----------------------------------------
Random State: 17
k=3: Mean Accuracy = 0.6986928104575163, Std = 0.10403044405660643
k=5: Mean Accuracy = 0.6875816993464052, Std = 0.11211770116307625
----------------------------------------
Random State: 24
k=3: Mean Accuracy = 0.7127450980392157, Std = 0.09681644876344142
k=5: Mean Accuracy = 0.7127450980392157, Std = 0.09397285976504524
----------------------------------------
