In [None]:
import numpy as np
import joblib  # for saving the model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
import optuna
import optuna.visualization as vis

In [None]:
n = 9

X = np.load('Datasets_Train_Test_Split/kryptonite-%s-X.npy' % (n))
y = np.load('Datasets_Train_Test_Split/kryptonite-%s-y.npy' % (n))

X_train_total, X_test, y_train_total, y_test = train_test_split(X, y, test_size=0.1, random_state=42)  # 10% of all data for final model evaluation

# To hold the best model and accuracy score
best_model = None
best_accuracy = -1

In [None]:
def objective(trial):
    global best_model, best_scaler, best_accuracy, best_pca

    # Shuffle and split data
    X_train, X_val, y_train, y_val = train_test_split(X_train_total, y_train_total, test_size=0.8, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    pca = PCA(n_components=trial.suggest_int('n_components', 1, X_train_scaled.shape[1]))
    X_train_scaled = pca.fit_transform(X_train_scaled)
    X_val_scaled = pca.transform(X_val_scaled)

    # KNN Classifier parameters
    classifier = KNeighborsClassifier(
        n_neighbors=trial.suggest_int('n_neighbors', 1, 50),
        weights=trial.suggest_categorical('weights', ['uniform', 'distance']),
        n_jobs=-1
    )

    # Train the classifier
    classifier.fit(X_train_scaled, y_train)

    # Evaluate on the validation set
    y_val_pred = classifier.predict(X_val_scaled)
    val_accuracy = accuracy_score(y_val, y_val_pred)

    # Report validation accuracy for pruning
    trial.report(val_accuracy, step=0)

    # Check if the trial should be pruned
    if trial.should_prune():
        raise optuna.TrialPruned()

    # If this is the best model so far, save it
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        best_model = classifier  # Update the best model
        best_scaler = scaler
        best_pca = pca

        # Save the best model
        joblib.dump(best_model, 'best_knn_model.joblib')
        print(f"New best model saved with accuracy: {best_accuracy:.4f}")

    return val_accuracy

In [None]:
# Create a study object with a pruner
pruner = optuna.pruners.MedianPruner(n_startup_trials=10)
study = optuna.create_study(direction="maximize", pruner=pruner)

# Optimize the objective function
study.optimize(objective, n_trials=100)

print("Best hyperparameters:", study.best_params)
print("Best value:", study.best_value)

# Evaluate on the validation set
X_test_scaled = best_scaler.transform(X_test)
X_test_scaled = best_pca.transform(X_test_scaled)
y_test_pred = best_model.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f'accuracy on test set:{test_accuracy}')

# Optional: Visualize optimization results
vis.plot_optimization_history(study).show()
vis.plot_param_importances(study).show()
vis.plot_slice(study).show()

[I 2024-11-13 22:30:20,830] A new study created in memory with name: no-name-ee940110-43e1-44c6-ad90-f8557f63d72f
[I 2024-11-13 22:30:21,107] Trial 0 finished with value: 0.8283179012345679 and parameters: {'n_components': 4, 'n_neighbors': 3, 'weights': 'uniform'}. Best is trial 0 with value: 0.8283179012345679.
[I 2024-11-13 22:30:21,198] Trial 1 finished with value: 0.6993055555555555 and parameters: {'n_components': 3, 'n_neighbors': 27, 'weights': 'distance'}. Best is trial 0 with value: 0.8283179012345679.
[I 2024-11-13 22:30:21,253] Trial 2 finished with value: 0.7043981481481482 and parameters: {'n_components': 3, 'n_neighbors': 18, 'weights': 'distance'}. Best is trial 0 with value: 0.8283179012345679.


New best model saved with accuracy: 0.8283


[I 2024-11-13 22:30:21,575] Trial 3 finished with value: 0.6345679012345679 and parameters: {'n_components': 3, 'n_neighbors': 41, 'weights': 'uniform'}. Best is trial 0 with value: 0.8283179012345679.
[I 2024-11-13 22:30:22,049] Trial 4 finished with value: 0.2169753086419753 and parameters: {'n_components': 9, 'n_neighbors': 21, 'weights': 'uniform'}. Best is trial 0 with value: 0.8283179012345679.
[I 2024-11-13 22:30:22,294] Trial 5 finished with value: 0.8517746913580246 and parameters: {'n_components': 7, 'n_neighbors': 19, 'weights': 'distance'}. Best is trial 5 with value: 0.8517746913580246.


New best model saved with accuracy: 0.8518


[I 2024-11-13 22:30:22,724] Trial 6 finished with value: 0.3090277777777778 and parameters: {'n_components': 7, 'n_neighbors': 26, 'weights': 'uniform'}. Best is trial 5 with value: 0.8517746913580246.
[I 2024-11-13 22:30:23,040] Trial 7 finished with value: 0.625 and parameters: {'n_components': 3, 'n_neighbors': 49, 'weights': 'uniform'}. Best is trial 5 with value: 0.8517746913580246.
[I 2024-11-13 22:30:23,370] Trial 8 finished with value: 0.6787808641975308 and parameters: {'n_components': 5, 'n_neighbors': 20, 'weights': 'uniform'}. Best is trial 5 with value: 0.8517746913580246.
[I 2024-11-13 22:30:23,639] Trial 9 finished with value: 0.5528549382716049 and parameters: {'n_components': 2, 'n_neighbors': 9, 'weights': 'uniform'}. Best is trial 5 with value: 0.8517746913580246.
[I 2024-11-13 22:30:23,924] Trial 10 finished with value: 0.7659722222222223 and parameters: {'n_components': 7, 'n_neighbors': 33, 'weights': 'distance'}. Best is trial 5 with value: 0.8517746913580246.
[I

New best model saved with accuracy: 0.9340


[I 2024-11-13 22:30:24,210] Trial 14 finished with value: 0.9232253086419753 and parameters: {'n_components': 9, 'n_neighbors': 1, 'weights': 'distance'}. Best is trial 11 with value: 0.9339506172839506.
[I 2024-11-13 22:30:24,481] Trial 15 finished with value: 0.8863425925925926 and parameters: {'n_components': 9, 'n_neighbors': 11, 'weights': 'distance'}. Best is trial 11 with value: 0.9339506172839506.
[I 2024-11-13 22:30:24,596] Trial 16 finished with value: 0.9305555555555556 and parameters: {'n_components': 8, 'n_neighbors': 6, 'weights': 'distance'}. Best is trial 11 with value: 0.9339506172839506.
[I 2024-11-13 22:30:24,751] Trial 17 finished with value: 0.9202160493827161 and parameters: {'n_components': 8, 'n_neighbors': 8, 'weights': 'distance'}. Best is trial 11 with value: 0.9339506172839506.
[I 2024-11-13 22:30:24,859] Trial 18 finished with value: 0.8471450617283951 and parameters: {'n_components': 5, 'n_neighbors': 14, 'weights': 'distance'}. Best is trial 11 with value

New best model saved with accuracy: 0.9363


[I 2024-11-13 22:30:25,634] Trial 24 finished with value: 0.9255401234567902 and parameters: {'n_components': 8, 'n_neighbors': 7, 'weights': 'distance'}. Best is trial 22 with value: 0.9362654320987654.
[I 2024-11-13 22:30:25,897] Trial 25 pruned. 
[I 2024-11-13 22:30:25,997] Trial 26 finished with value: 0.9330246913580247 and parameters: {'n_components': 7, 'n_neighbors': 6, 'weights': 'distance'}. Best is trial 22 with value: 0.9362654320987654.
[I 2024-11-13 22:30:26,092] Trial 27 finished with value: 0.937037037037037 and parameters: {'n_components': 7, 'n_neighbors': 5, 'weights': 'distance'}. Best is trial 27 with value: 0.937037037037037.
[I 2024-11-13 22:30:26,233] Trial 28 finished with value: 0.8925154320987654 and parameters: {'n_components': 6, 'n_neighbors': 13, 'weights': 'distance'}. Best is trial 27 with value: 0.937037037037037.


New best model saved with accuracy: 0.9370


[I 2024-11-13 22:30:26,293] Trial 29 pruned. 
[I 2024-11-13 22:30:26,368] Trial 30 finished with value: 0.9010802469135802 and parameters: {'n_components': 5, 'n_neighbors': 4, 'weights': 'distance'}. Best is trial 27 with value: 0.937037037037037.
[I 2024-11-13 22:30:26,445] Trial 31 finished with value: 0.9368827160493827 and parameters: {'n_components': 7, 'n_neighbors': 4, 'weights': 'distance'}. Best is trial 27 with value: 0.937037037037037.
[I 2024-11-13 22:30:26,514] Trial 32 finished with value: 0.939429012345679 and parameters: {'n_components': 7, 'n_neighbors': 3, 'weights': 'distance'}. Best is trial 32 with value: 0.939429012345679.
[I 2024-11-13 22:30:26,669] Trial 33 finished with value: 0.908641975308642 and parameters: {'n_components': 7, 'n_neighbors': 10, 'weights': 'distance'}. Best is trial 32 with value: 0.939429012345679.


New best model saved with accuracy: 0.9394


[I 2024-11-13 22:30:26,751] Trial 34 pruned. 
[I 2024-11-13 22:30:26,981] Trial 35 pruned. 
[I 2024-11-13 22:30:27,365] Trial 36 pruned. 
[I 2024-11-13 22:30:27,464] Trial 37 finished with value: 0.9361882716049382 and parameters: {'n_components': 9, 'n_neighbors': 4, 'weights': 'distance'}. Best is trial 32 with value: 0.939429012345679.
[I 2024-11-13 22:30:27,566] Trial 38 pruned. 
[I 2024-11-13 22:30:28,055] Trial 39 pruned. 
[I 2024-11-13 22:30:28,343] Trial 40 pruned. 
[I 2024-11-13 22:30:28,437] Trial 41 finished with value: 0.9361882716049382 and parameters: {'n_components': 9, 'n_neighbors': 4, 'weights': 'distance'}. Best is trial 32 with value: 0.939429012345679.
[I 2024-11-13 22:30:28,549] Trial 42 finished with value: 0.9362654320987654 and parameters: {'n_components': 9, 'n_neighbors': 5, 'weights': 'distance'}. Best is trial 32 with value: 0.939429012345679.
[I 2024-11-13 22:30:28,786] Trial 43 pruned. 
[I 2024-11-13 22:30:28,845] Trial 44 finished with value: 0.923225308

New best model saved with accuracy: 0.9400


[I 2024-11-13 22:30:29,404] Trial 47 pruned. 
[I 2024-11-13 22:30:29,529] Trial 48 pruned. 
[I 2024-11-13 22:30:29,650] Trial 49 finished with value: 0.9277006172839506 and parameters: {'n_components': 7, 'n_neighbors': 7, 'weights': 'distance'}. Best is trial 46 with value: 0.9399691358024691.
[I 2024-11-13 22:30:29,718] Trial 50 finished with value: 0.9399691358024691 and parameters: {'n_components': 8, 'n_neighbors': 3, 'weights': 'distance'}. Best is trial 46 with value: 0.9399691358024691.
[I 2024-11-13 22:30:29,787] Trial 51 finished with value: 0.9399691358024691 and parameters: {'n_components': 8, 'n_neighbors': 3, 'weights': 'distance'}. Best is trial 46 with value: 0.9399691358024691.
[I 2024-11-13 22:30:29,857] Trial 52 finished with value: 0.9218364197530864 and parameters: {'n_components': 8, 'n_neighbors': 2, 'weights': 'distance'}. Best is trial 46 with value: 0.9399691358024691.
[I 2024-11-13 22:30:29,926] Trial 53 finished with value: 0.939429012345679 and parameters: 

Best hyperparameters: {'n_components': 8, 'n_neighbors': 3, 'weights': 'distance'}
Best value: 0.9399691358024691
accuracy on test set:0.9383333333333334
