In [1]:
import pandas as pd

# URL do dataset Wine
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'

# Nome das colunas em português BR
column_names_pt_br = [
    'classe',
    'alcool',
    'acido_malico',
    'cinzas',
    'alcalinidade_de_cinzas',
    'magnesio',
    'fenois_totais',
    'flavanoides',
    'fenois_nao_flavanoides',
    'proantocianinas',
    'intensidade_de_cor',
    'matiz',
    'od280_od315_de_vinhos_diluidos',
    'prolina'
]

# Ler o arquivo CSV com as colunas especificadas, definindo a coluna 'classe' como object
vinhos = pd.read_csv(url, names=column_names_pt_br, dtype={'classe': object})

In [2]:
# Separando as variáveis
X = vinhos.drop('classe', axis=1)
y = vinhos['classe']

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

random_states = [42, 17, 24]
k_values = [3, 5]

for random_state in random_states:
  for k in k_values:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=random_state, stratify=y)

    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    print(f"Experimento com random_state={random_state}, k={k}:")
    print(f"Acurácia: {accuracy}")
    print("-" * 20)

Experimento com random_state=42, k=3:
Acurácia: 0.6851851851851852
--------------------
Experimento com random_state=42, k=5:
Acurácia: 0.7222222222222222
--------------------
Experimento com random_state=17, k=3:
Acurácia: 0.7592592592592593
--------------------
Experimento com random_state=17, k=5:
Acurácia: 0.7592592592592593
--------------------
Experimento com random_state=24, k=3:
Acurácia: 0.7037037037037037
--------------------
Experimento com random_state=24, k=5:
Acurácia: 0.6851851851851852
--------------------


In [6]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# URL do dataset Wine (já presente no código anterior)
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'

# Nome das colunas em português BR (já presente no código anterior)
column_names_pt_br = [
    'classe',
    'alcool',
    'acido_malico',
    'cinzas',
    'alcalinidade_de_cinzas',
    'magnesio',
    'fenois_totais',
    'flavanoides',
    'fenois_nao_flavanoides',
    'proantocianinas',
    'intensidade_de_cor',
    'matiz',
    'od280_od315_de_vinhos_diluidos',
    'prolina'
]

# Ler o arquivo CSV com as colunas especificadas (já presente no código anterior)
vinhos = pd.read_csv(url, names=column_names_pt_br, dtype={'classe': object})

# Separando as variáveis (já presente no código anterior)
X = vinhos.drop('classe', axis=1)
y = vinhos['classe']

random_states = [42, 17, 24]
k_values = [3, 5]

for random_state in random_states:
    for k in k_values:
        kf = KFold(n_splits=10, shuffle=True, random_state=random_state)
        accuracies = []
        for train_index, test_index in kf.split(X):
            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]

            knn = KNeighborsClassifier(n_neighbors=k)
            knn.fit(X_train, y_train)
            y_pred = knn.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            accuracies.append(accuracy)

        mean_accuracy = np.mean(accuracies)
        std_accuracy = np.std(accuracies)
        print(f"Experimento com random_state={random_state}, k={k}:")
        print(f"Acurácia Média: {mean_accuracy}")
        print(f"Desvio Padrão da Acurácia: {std_accuracy}")
        print("-" * 20)

Experimento com random_state=42, k=3:
Acurácia Média: 0.70359477124183
Desvio Padrão da Acurácia: 0.13918691592617136
--------------------
Experimento com random_state=42, k=5:
Acurácia Média: 0.6640522875816993
Desvio Padrão da Acurácia: 0.10007472964927165
--------------------
Experimento com random_state=17, k=3:
Acurácia Média: 0.6986928104575163
Desvio Padrão da Acurácia: 0.10403044405660643
--------------------
Experimento com random_state=17, k=5:
Acurácia Média: 0.6875816993464052
Desvio Padrão da Acurácia: 0.11211770116307625
--------------------
Experimento com random_state=24, k=3:
Acurácia Média: 0.7127450980392157
Desvio Padrão da Acurácia: 0.09681644876344142
--------------------
Experimento com random_state=24, k=5:
Acurácia Média: 0.7127450980392157
Desvio Padrão da Acurácia: 0.09397285976504524
--------------------


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
import numpy as np

# URL do dataset Wine
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'

# Nome das colunas em português BR
column_names_pt_br = [
    'classe',
    'alcool',
    'acido_malico',
    'cinzas',
    'alcalinidade_de_cinzas',
    'magnesio',
    'fenois_totais',
    'flavanoides',
    'fenois_nao_flavanoides',
    'proantocianinas',
    'intensidade_de_cor',
    'matiz',
    'od280_od315_de_vinhos_diluidos',
    'prolina'
]

# Ler o arquivo CSV com as colunas especificadas, definindo a coluna 'classe' como object
vinhos = pd.read_csv(url, names=column_names_pt_br, dtype={'classe': object})

# Separando as variáveis
X = vinhos.drop('classe', axis=1)
y = vinhos['classe']

random_states = [42, 17, 24]
k_values = [3, 5]

results = {}

for random_state in random_states:
    results[random_state] = {}
    for k in k_values:
        kf = KFold(n_splits=10, shuffle=True, random_state=random_state)
        accuracies = []
        for train_index, test_index in kf.split(X):
            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]

            knn = KNeighborsClassifier(n_neighbors=k)
            knn.fit(X_train, y_train)
            y_pred = knn.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            accuracies.append(accuracy)

        mean_accuracy = np.mean(accuracies)
        std_accuracy = np.std(accuracies)
        results[random_state][k] = (mean_accuracy, std_accuracy)
        print(f"Experimento com random_state={random_state}, k={k}:")
        print(f"Acurácia Média: {mean_accuracy}")
        print(f"Desvio Padrão da Acurácia: {std_accuracy}")
        print("-" * 20)

# Identificando o melhor valor de k para cada random_state
for random_state, k_results in results.items():
    best_k = None
    best_accuracy = -1
    for k, (mean_accuracy, _) in k_results.items():
        if mean_accuracy > best_accuracy:
            best_accuracy = mean_accuracy
            best_k = k
        elif mean_accuracy == best_accuracy and best_k is not None:
            best_k = "Empate entre k=3 e k=5" # Registrando o empate

    print(f"Melhor valor de k para random_state={random_state}: {best_k} (Acurácia: {best_accuracy})")

Experimento com random_state=42, k=3:
Acurácia Média: 0.70359477124183
Desvio Padrão da Acurácia: 0.13918691592617136
--------------------
Experimento com random_state=42, k=5:
Acurácia Média: 0.6640522875816993
Desvio Padrão da Acurácia: 0.10007472964927165
--------------------
Experimento com random_state=17, k=3:
Acurácia Média: 0.6986928104575163
Desvio Padrão da Acurácia: 0.10403044405660643
--------------------
Experimento com random_state=17, k=5:
Acurácia Média: 0.6875816993464052
Desvio Padrão da Acurácia: 0.11211770116307625
--------------------
Experimento com random_state=24, k=3:
Acurácia Média: 0.7127450980392157
Desvio Padrão da Acurácia: 0.09681644876344142
--------------------
Experimento com random_state=24, k=5:
Acurácia Média: 0.7127450980392157
Desvio Padrão da Acurácia: 0.09397285976504524
--------------------
Melhor valor de k para random_state=42: 3 (Acurácia: 0.70359477124183)
Melhor valor de k para random_state=17: 3 (Acurácia: 0.6986928104575163)
Melhor valor