In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from collections import Counter
from sklearn.decomposition import PCA
from matplotlib.colors import ListedColormap

In [None]:
dset = load_wine()

X = dset.data
y = dset.target

wine = dset.target_names

print(X.shape)
print(y.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
class kNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = np.array(X)
        self.y_train = np.array(y)

    def predict(self, X_test):
        X_test = np.array(X_test)
        predictions = []

        for x in X_test:
            distances = np.linalg.norm(self.X_train - x, axis=1)
            k_indices = np.argsort(distances)[:self.k]
            k_labels = self.y_train[k_indices]
            most_common = Counter(k_labels).most_common(1)[0][0]
            predictions.append(most_common)

        return np.array(predictions)

In [None]:
model_manual = kNN(k=3)
model_manual.fit(X_train_scaled, y_train)
y_pred_manual = model_manual.predict(X_test_scaled)

In [None]:
acc_manual = accuracy_score(y_test, y_pred_manual)
print(f"acurácia: {acc_manual:.2f}")

In [None]:
model_sklearn = KNeighborsClassifier(n_neighbors=3)
model_sklearn.fit(X_train_scaled, y_train)
y_pred_sklearn = model_sklearn.predict(X_test_scaled)

In [None]:
acc_sklearn = accuracy_score(y_test, y_pred_sklearn)
print(f"acurácia: {acc_sklearn:.2f}")

In [None]:
k_values = range(1, 21)
metric_values = []

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train)
    y_pred = knn.predict(X_test_scaled)
    metric_values.append(accuracy_score(y_test, y_pred))

best_k = k_values[np.argmax(metric_values)]
best_acc = max(metric_values)

print(f"\n o melhor valor de k é {best_k}, com acurácia {best_acc:.2f}")

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(k_values, metric_values, marker='o', linewidth=2, markersize=8)
plt.xticks(k_values)
plt.xlabel("valores de k", fontsize=12)
plt.ylabel("acurácia", fontsize=12)
plt.title("acurácia em função de k", fontsize=14)
plt.grid(True, alpha=0.3)
plt.axvline(best_k, color="r", linestyle="--", label=f"o melhor k = {best_k}")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
final_model = KNeighborsClassifier(n_neighbors=best_k)
final_model.fit(X_train_scaled, y_train)

y_pred_final = final_model.predict(X_test_scaled)

acc_final = accuracy_score(y_test, y_pred_final)

print(f"\n acurácia final: {acc_final:.2f}")

In [None]:
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

print(f"variância 2 componentes principais: {sum(pca.explained_variance_ratio_):.2%}")

In [None]:
k = best_k
model_pca = KNeighborsClassifier(n_neighbors=k)

In [None]:
model_pca.fit(X_pca, y)

In [None]:
x_min, x_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
y_min, y_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1

In [None]:
target_names = dset.target_names

cmap_bold = ["#ff1493", "#8a2be2", "#00ced1"]
cmap_light = ListedColormap(["#ffb6c1", "#d8b8ff", "#b2ffff"])

plt.title(f"fronteiras de decisão")

for i, target_name in enumerate(target_names):
    plt.scatter(X_pca[y==i, 0], X_pca[y==i, 1],
                color=cmap_bold[i], edgecolor="black",
                label=target_name, s=50, alpha=0.9)

plt.xlabel("componente 1")
plt.ylabel("componente 2")
plt.legend(title="classe do vinho")
plt.show()