# Importação de bibliotecas e da base

In [54]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# URL do dataset Wine
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'

# Nome das colunas em português BR
column_names_pt_br = [
    'classe',
    'alcool',
    'acido_malico',
    'cinzas',
    'alcalinidade_de_cinzas',
    'magnesio',
    'fenois_totais',
    'flavanoides',
    'fenois_nao_flavanoides',
    'proantocianinas',
    'intensidade_de_cor',
    'matiz',
    'od280_od315_de_vinhos_diluidos',
    'prolina'
]

# Ler o arquivo CSV com as colunas especificadas, definindo a coluna 'classe' como object
vinhos = pd.read_csv(url, names=column_names_pt_br, dtype={'classe': object})

# Função de avaliação de random state e valor knn com Cross-Validation

In [56]:
def cross_validate_knn(random_state, vinhos):
    print(f"\nCross-validation com random_state={random_state}")
    
    # Separação dos dados
    X = vinhos.drop('classe', axis=1)
    y = vinhos['classe']
    
    # Configuração do KFold
    kf = KFold(n_splits=10, shuffle=True, random_state=random_state)
    
    accuracies_k3 = []
    accuracies_k5 = []
    
    # K-Fold Cross Validation
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
        # KNN com k=3
        knn_3 = KNeighborsClassifier(n_neighbors=3)
        knn_3.fit(X_train, y_train)
        y_pred_3 = knn_3.predict(X_test)
        accuracies_k3.append(accuracy_score(y_test, y_pred_3))
        
        # KNN com k=5
        knn_5 = KNeighborsClassifier(n_neighbors=5)
        knn_5.fit(X_train, y_train)
        y_pred_5 = knn_5.predict(X_test)
        accuracies_k5.append(accuracy_score(y_test, y_pred_5))
    
    # Calculando média e desvio padrão
    mean_k3 = np.mean(accuracies_k3)
    std_k3 = np.std(accuracies_k3)
    mean_k5 = np.mean(accuracies_k5)
    std_k5 = np.std(accuracies_k5)
    
    print(f"Média de acurácia (k=3): {mean_k3:.4f} ± {std_k3:.4f}")
    print(f"Média de acurácia (k=5): {mean_k5:.4f} ± {std_k5:.4f}")
    
    # Determinando o melhor valor de k
    if mean_k3 > mean_k5:
        best_k = 3
    elif mean_k5 > mean_k3:
        best_k = 5
    else:
        best_k = "Empate"
    
    print(f"Melhor valor de k: {best_k}")
    
    # Retornando os resultados
    return {
        "random_state": random_state,
        "mean_accuracy_k3": mean_k3,
        "std_accuracy_k3": std_k3,
        "mean_accuracy_k5": mean_k5,
        "std_accuracy_k5": std_k5,
        "best_k": best_k
    }

# Comparando resultados

In [59]:
# Executando os experimentos para os random_states especificados
random_states = [42, 17, 24]
results = []
for rs in random_states:
    result = cross_validate_knn(random_state=rs, vinhos=vinhos)
    results.append(result)


Cross-validation com random_state=42
Média de acurácia (k=3): 0.7036 ± 0.1392
Média de acurácia (k=5): 0.6641 ± 0.1001
Melhor valor de k: 3

Cross-validation com random_state=17
Média de acurácia (k=3): 0.6987 ± 0.1040
Média de acurácia (k=5): 0.6876 ± 0.1121
Melhor valor de k: 3

Cross-validation com random_state=24
Média de acurácia (k=3): 0.7127 ± 0.0968
Média de acurácia (k=5): 0.7127 ± 0.0940
Melhor valor de k: Empate


In [61]:
# Analisando os resultados
print("\nResultados finais:")
for res in results:
    print(res)


Resultados finais:
{'random_state': 42, 'mean_accuracy_k3': 0.70359477124183, 'std_accuracy_k3': 0.13918691592617136, 'mean_accuracy_k5': 0.6640522875816993, 'std_accuracy_k5': 0.10007472964927165, 'best_k': 3}
{'random_state': 17, 'mean_accuracy_k3': 0.6986928104575163, 'std_accuracy_k3': 0.10403044405660643, 'mean_accuracy_k5': 0.6875816993464052, 'std_accuracy_k5': 0.11211770116307625, 'best_k': 3}
{'random_state': 24, 'mean_accuracy_k3': 0.7127450980392157, 'std_accuracy_k3': 0.09681644876344142, 'mean_accuracy_k5': 0.7127450980392157, 'std_accuracy_k5': 0.09397285976504524, 'best_k': 'Empate'}
