In [1]:
from sklearn.datasets import load_breast_cancer
import pandas as pd

data = load_breast_cancer()


data_df = pd.DataFrame(data = data.data,
                       columns = data.feature_names)
data_df['target'] = data.target
data = data_df

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, roc_auc_score
import numpy as np

np.random.seed(42)

class GreyWolfOptimizer:
    def __init__(self):
        self.search_space = None
        self.population = None
        self.fitness = None
        self.leader_position = None
        self.leader_fitness = None
        self.best_solution = None

    def initialize_search_space(self, lower_bound, upper_bound):
        self.search_space = np.vstack((lower_bound, upper_bound))

    def initialize_population(self, population_size=10):
        num_features = self.search_space.shape[1]
        self.population = np.random.uniform(low=self.search_space[0], high=self.search_space[1], size=(population_size, num_features))
        self.fitness = np.zeros(population_size)

    def calculate_fitness(self, X_train, y_train):
        for i, wolf in enumerate(self.population):
            k = int(round(wolf[0]))
            k = max(1, k)

            knn = KNeighborsClassifier(n_neighbors=k)
            knn.fit(X_train, y_train)
            self.fitness[i] = knn.score(X_train, y_train)

    def update_leader_positions(self):
        leader_index = np.argmax(self.fitness)
        self.leader_position = self.population[leader_index]
        self.leader_fitness = self.fitness[leader_index]
        self.best_solution = self.leader_position.copy()

    def update_positions(self, iteration, max_iterations):
        a = 2 - iteration * ((2) / max_iterations)

        num_wolves = self.population.shape[0]
        for i in range(num_wolves):
            A1 = 2 * np.random.rand() - 1
            C1 = 2 * np.random.rand()
            D_alpha = np.abs(C1 * self.leader_position - self.population[i])
            X1 = self.leader_position - A1 * D_alpha

            r1 = np.random.rand()
            A2 = 2 * r1 - 1
            C2 = 2 * np.random.rand()
            D_beta = np.abs(C2 * self.leader_position - self.population[i])
            X2 = self.leader_position - A2 * D_beta

            r2 = np.random.rand()
            A3 = 2 * r2 - 1
            C3 = 2 * np.random.rand()
            D_delta = np.abs(C3 * self.population[i] - self.population[i])
            X3 = self.population[i] - A3 * D_delta

            updated_wolf = (X1 + X2 + X3) / 3
            updated_wolf = np.clip(updated_wolf, self.search_space[0], self.search_space[1])
            self.population[i] = updated_wolf

            # Increment k value
            self.population[i][0] = self.population[i][0] + a

    def get_best_solution(self):
        return self.best_solution


def knn_gwo(X_train, y_train, X_test, y_test, gwo_iterations=10):
    lower_bound = [1, 0.01]
    upper_bound = [10, 0.99]

    gwo = GreyWolfOptimizer()
    gwo.initialize_search_space(lower_bound, upper_bound)
    gwo.initialize_population()

    best_accuracy = 0.0
    best_k = 1

    for iteration in range(gwo_iterations):
        gwo.calculate_fitness(X_train, y_train)
        gwo.update_leader_positions()
        gwo.update_positions(iteration, gwo_iterations)

        best_solution = gwo.get_best_solution()
        k = int(round(best_solution[0]))
        k = max(1, k)

        knn = KNeighborsClassifier(n_neighbors=k)
        knn.fit(X_train, y_train)
        y_pred = knn.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        if accuracy >= best_accuracy:
            best_accuracy = accuracy
            best_k = k

        print("Iterasi:", iteration, "| Akurasi:", accuracy, "| K terbaik:", best_k)

    return best_k


# Contoh penggunaan dengan dataset iris
iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

k_best = knn_gwo(X_train, y_train, X_test, y_test, gwo_iterations=10)
print("Nilai K terbaik:", k_best)


Iterasi: 0 | Akurasi: 1.0 | K terbaik: 1
Iterasi: 1 | Akurasi: 1.0 | K terbaik: 5
Iterasi: 2 | Akurasi: 1.0 | K terbaik: 10
Iterasi: 3 | Akurasi: 1.0 | K terbaik: 10
Iterasi: 4 | Akurasi: 1.0 | K terbaik: 10
Iterasi: 5 | Akurasi: 1.0 | K terbaik: 10
Iterasi: 6 | Akurasi: 1.0 | K terbaik: 10
Iterasi: 7 | Akurasi: 1.0 | K terbaik: 10
Iterasi: 8 | Akurasi: 1.0 | K terbaik: 10
Iterasi: 9 | Akurasi: 1.0 | K terbaik: 10
Nilai K terbaik: 10


In [3]:
X = data.iloc[:,:30].values
y = data['target'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
k_best = knn_gwo(X_train, y_train, X_test, y_test, gwo_iterations=10)
print("Nilai K terbaik:", k_best)

Iterasi: 0 | Akurasi: 0.9298245614035088 | K terbaik: 1
Iterasi: 1 | Akurasi: 0.9385964912280702 | K terbaik: 4
Iterasi: 2 | Akurasi: 0.956140350877193 | K terbaik: 7
Iterasi: 3 | Akurasi: 0.956140350877193 | K terbaik: 8
Iterasi: 4 | Akurasi: 0.956140350877193 | K terbaik: 8
Iterasi: 5 | Akurasi: 0.9824561403508771 | K terbaik: 11
Iterasi: 6 | Akurasi: 0.9824561403508771 | K terbaik: 11
Iterasi: 7 | Akurasi: 0.956140350877193 | K terbaik: 11
Iterasi: 8 | Akurasi: 0.956140350877193 | K terbaik: 11
Iterasi: 9 | Akurasi: 0.9298245614035088 | K terbaik: 11
Nilai K terbaik: 11


In [5]:
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
acc = accuracy_score(y_test, y_pred,)
print(acc)
print(roc_auc_score(y_test, y_pred))

0.9736842105263158
0.969701932525385


In [6]:
knn = KNeighborsClassifier(n_neighbors=11)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
acc = accuracy_score(y_test, y_pred,)
print(acc)
print(roc_auc_score(y_test, y_pred))

0.9824561403508771
0.9767441860465117
