In [19]:
import warnings

from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from bayes_opt import BayesianOptimization

from utils import preprocess_data

In [20]:
warnings.filterwarnings("ignore")

X_train, X_test, y_train, y_test, train_df, test_df = preprocess_data(standardise=True)

In [21]:
def optimize_knn(n_neighbors, leaf_size, p):
    model = KNeighborsClassifier(
        n_neighbors=int(n_neighbors),
        leaf_size=int(leaf_size),
        p=int(p)
    )
    return cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()


In [22]:
param_space = {
    'n_neighbors': (1, 30),
    'leaf_size': (20, 40),
    'p': (1, 2)  # Minkowski metric parameter (1 for Manhattan, 2 for Euclidean)
}


In [23]:
optimizer = BayesianOptimization(
    f=optimize_knn,
    pbounds=param_space,
    random_state=42
)

# Start the optimization process
optimizer.maximize(init_points=5, n_iter=55)


|   iter    |  target   | leaf_size | n_neig... |     p     |
-------------------------------------------------------------
| [39m1        [39m | [39m0.7637   [39m | [39m27.49    [39m | [39m28.57    [39m | [39m1.732    [39m |
| [35m2        [39m | [35m0.7901   [39m | [35m31.97    [39m | [35m5.525    [39m | [35m1.156    [39m |
| [39m3        [39m | [39m0.7698   [39m | [39m21.16    [39m | [39m26.12    [39m | [39m1.601    [39m |
| [39m4        [39m | [39m0.7475   [39m | [39m34.16    [39m | [39m1.597    [39m | [39m1.97     [39m |
| [35m5        [39m | [35m0.7962   [39m | [35m36.65    [39m | [35m7.158    [39m | [35m1.182    [39m |
| [39m6        [39m | [39m0.7901   [39m | [39m32.0     [39m | [39m5.591    [39m | [39m1.182    [39m |
| [39m7        [39m | [39m0.7962   [39m | [39m35.05    [39m | [39m9.67     [39m | [39m1.0      [39m |
| [35m8        [39m | [35m0.7983   [39m | [35m39.27    [39m | [35m10.27    [39m | [

In [24]:
best_params = optimizer.max['params']
best_model = KNeighborsClassifier(
    n_neighbors=int(best_params['n_neighbors']),
    leaf_size=int(best_params['leaf_size']),
    p=int(best_params['p'])
)
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Optimized KNN Accuracy: {accuracy}")


Optimized KNN Accuracy: 0.7967479674796748


In [25]:
model = KNeighborsClassifier()

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Optimized KNN Accuracy: {accuracy}")

Optimized KNN Accuracy: 0.7560975609756098
