In [1]:
# ========== IMPORT LIBRARY ==========
import numpy as np  # Library untuk operasi matematika dan array
from collections import Counter  # Untuk menghitung frekuensi/voting
import matplotlib.pyplot as plt  # Library untuk visualisasi/grafik

class KNN:
    def __init__(self, k=3):
        """
        Inisialisasi KNN classifier
        k: jumlah tetangga terdekat yang digunakan
        """
        self.k = k

    def fit(self, X, y):
        """
        Menyimpan data training
        X: fitur training data (array 2D)
        y: label training data (array 1D)
        """
        self.X_train = X
        self.y_train = y

    def euclidean_distance(self, x1, x2):
        """
        Menghitung jarak Euclidean antara dua titik
        """
        return np.sqrt(np.sum((x1 - x2) ** 2))

    def predict(self, X):
        """
        Memprediksi label untuk data baru
        X: data yang akan diprediksi (array 2D)
        """
        predictions = [self._predict_single(x) for x in X]
        return np.array(predictions)

    def _predict_single(self, x):
        """
        Memprediksi label untuk satu data point
        """
        # Hitung jarak ke semua training data
        distances = [self.euclidean_distance(x, x_train) for x_train in self.X_train]

        # Dapatkan k tetangga terdekat
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        # Voting: pilih label yang paling sering muncul
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]


# ========== CONTOH PENGGUNAAN ==========

# 1. Buat dataset sederhana (contoh klasifikasi buah)
# Fitur: [berat (gram), diameter (cm)]
X_train = np.array([
    [150, 7],   # Apel
    [170, 7.5], # Apel
    [140, 6.8], # Apel
    [130, 6.5], # Apel
    [200, 9],   # Jeruk
    [210, 9.5], # Jeruk
    [190, 8.8], # Jeruk
    [220, 10],  # Jeruk
])

# Label: 0 = Apel, 1 = Jeruk
y_train = np.array([0, 0, 0, 0, 1, 1, 1, 1])

# 2. Data testing
X_test = np.array([
    [145, 7],   # Prediksi: Apel
    [205, 9.2], # Prediksi: Jeruk
    [160, 7.3], # Prediksi: ?
])

# 3. Training dan prediksi
knn = KNN(k=3)
knn.fit(X_train, y_train)
predictions = knn.predict(X_test)

# 4. Tampilkan hasil
print("=" * 50)
print("K-NEAREST NEIGHBORS (KNN) - HASIL PREDIKSI")
print("=" * 50)
print(f"Nilai K: {knn.k}")
print("\nData Training:")
print("Berat (g) | Diameter (cm) | Label")
print("-" * 40)
for i, (x, y) in enumerate(zip(X_train, y_train)):
    label_name = "Apel" if y == 0 else "Jeruk"
    print(f"   {x[0]:3d}    |     {x[1]:4.1f}      | {label_name}")

print("\n" + "=" * 50)
print("HASIL PREDIKSI:")
print("=" * 50)
for i, (x, pred) in enumerate(zip(X_test, predictions)):
    label_name = "Apel" if pred == 0 else "Jeruk"
    print(f"Data {i+1}: Berat={x[0]}g, Diameter={x[1]}cm")
    print(f"â†’ Prediksi: {label_name} (label: {pred})")
    print()

# 5. Visualisasi (opsional)
plt.figure(figsize=(10, 6))

# Plot training data
apel = X_train[y_train == 0]
jeruk = X_train[y_train == 1]

plt.scatter(apel[:, 0], apel[:, 1], c='red', marker='o', s=100,
            label='Apel (Training)', edgecolors='black')
plt.scatter(jeruk[:, 0], jeruk[:, 1], c='orange', marker='o', s=100,
            label='Jeruk (Training)', edgecolors='black')

# Plot testing data
for i, (x, pred) in enumerate(zip(X_test, predictions)):
    color = 'red' if pred == 0 else 'orange'
    plt.scatter(x[0], x[1], c=color, marker='*', s=300,
                edgecolors='blue', linewidths=2,
                label=f'Test Data {i+1}' if i < 3 else '')

plt.xlabel('Berat (gram)', fontsize=12)
plt.ylabel('Diameter (cm)', fontsize=12)
plt.title(f'K-Nearest Neighbors (K={knn.k})', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("\n" + "=" * 50)
print("CARA KERJA KNN:")
print("=" * 50)
print("1. Hitung jarak dari data baru ke semua data training")
print("2. Pilih K tetangga terdekat")
print("3. Voting: ambil label yang paling banyak muncul")
print("4. Label tersebut menjadi hasil prediksi")
print("=" * 50)

K-NEAREST NEIGHBORS (KNN) - HASIL PREDIKSI
Nilai K: 3

Data Training:
Berat (g) | Diameter (cm) | Label
----------------------------------------


ValueError: Unknown format code 'd' for object of type 'float'