In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris

# 1. 유클리드 거리 함수
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2)**2))

# 2. KNN 예측 함수
def knn_predict(x, X_train, y_train, k): # x 예측 대상, 나머지 학습
    distances = [euclidean_distance(x, xi) for xi in X_train] # distance list
    k_indices = np.argsort(distances)[:k] 
    k_labels = y_train[k_indices]
    
    labels, counts = np.unique(k_labels, return_counts=True)
    return labels[np.argmax(counts)] # return prediction of y

# 3. 정확도 평가 함수
def compute_accuracy(X_val, y_val, X_train, y_train, k):
    correct = 0
    for x, y in zip(X_val, y_val):
        pred = knn_predict(x, X_train, y_train, k)
        if pred == y:
            correct += 1
    return correct / len(y_val)

# 4. 데이터 불러오기 및 전처리
iris = load_iris()
X = iris.data[:130, [2, 3]]  # petal length, petal width만 사용
y = iris.target[:130]        # 130개 샘플만 사용

# 5. 훈련/검증 데이터 분할
np.random.seed(10000)
indices = np.random.permutation(len(X)) # index 섞기
split = int(0.75 * len(X))  # 약 97개 훈련, 33개 검증, split은 분할 index

train_idx = indices[:split]
val_idx = indices[split:]
# 데이터 분할

X_train, y_train = X[train_idx], y[train_idx]
X_val, y_val = X[val_idx], y[val_idx]

# 6. k값 최적화
k_candidates = range(1, 16)
best_k = None
best_acc = 0.0

print("k값별 정확도:")
for k in k_candidates:
    acc = compute_accuracy(X_val, y_val, X_train, y_train, k)
    print(f"k = {k} → 정확도: {acc:.2f}")
    if acc > best_acc:
        best_acc = acc
        best_k = k

print(f"\n✅ 최적의 k값은: {best_k} (정확도: {best_acc:.2f})")

print(f"DATA#131 prediction : {knn_predict(iris.data[130, [2, 3]], X_train, y_train, best_k)}, actual : {iris.target[130]}")
print(f"DATA#132 prediction : {knn_predict(iris.data[131, [2, 3]], X_train, y_train, best_k)}, actual : {iris.target[131]}")

k값별 정확도:
k = 1 → 정확도: 0.91
k = 2 → 정확도: 0.94
k = 3 → 정확도: 0.94
k = 4 → 정확도: 0.94
k = 5 → 정확도: 0.94
k = 6 → 정확도: 0.94
k = 7 → 정확도: 0.94
k = 8 → 정확도: 0.94
k = 9 → 정확도: 0.94
k = 10 → 정확도: 0.94
k = 11 → 정확도: 0.94
k = 12 → 정확도: 0.94
k = 13 → 정확도: 0.94
k = 14 → 정확도: 0.97
k = 15 → 정확도: 0.97

✅ 최적의 k값은: 14 (정확도: 0.97)
DATA#131 prediction : 2, actual : 2
DATA#132 prediction : 2, actual : 2
