In [17]:
import numpy as np
import joblib  # for saving the model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.decomposition import KernelPCA
from sklearn.neighbors import KNeighborsClassifier
import optuna
import optuna.visualization as vis

n = 9

X = np.load('Datasets/kryptonite-%s-X.npy' % (n))
y = np.load('Datasets/kryptonite-%s-y.npy' % (n))

X_train_total, X_test, y_train_total, y_test = train_test_split(X, y, test_size=0.1, random_state=42)  # 10% of all data for final model evaluation

# To hold the best model and accuracy score
best_model = None
best_accuracy = -1

def objective(trial):
    global best_model, best_scaler, best_accuracy

    # Shuffle and split data
    X_train, X_val, y_train, y_val = train_test_split(X_train_total, y_train_total, test_size=0.8, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    # # Suggest dimensionality reduction and kernel parameters
    # use_pca = trial.suggest_categorical('use_pca', ['True', 'False'])

    # if use_pca == 'True':
    #     kernel_pca = trial.suggest_categorical('pca_kernel', ['linear', 'poly', 'rbf', 'sigmoid', 'cosine'])
    #     params_pca = {}

    #     if kernel_pca == 'poly':
    #         params_pca['degree'] = trial.suggest_int('degree_pca', 1, 10)
    #     if kernel_pca in ['poly', 'rbf', 'sigmoid']:
    #         params_pca['gamma'] = trial.suggest_float('gamma_pca', 0.1, 10)

    #     params_pca['n_jobs'] = -1
    #     params_pca['n_components'] = trial.suggest_int('n_pca_components', 1, max(1, X_train_scaled.shape[1]))

    #     pca = KernelPCA(kernel=kernel_pca, **params_pca)
    #     X_train_scaled = pca.fit_transform(X_train_scaled)
    #     X_val_scaled = pca.transform(X_val_scaled)

    # KNN Classifier parameters
    classifier = KNeighborsClassifier(
        n_neighbors=trial.suggest_int('n_neighbors', 1, 50),
        weights=trial.suggest_categorical('weights', ['uniform', 'distance']),
        n_jobs=-1
    )

    # Train the classifier
    classifier.fit(X_train_scaled, y_train)

    # Evaluate on the validation set
    y_val_pred = classifier.predict(X_val_scaled)
    val_accuracy = accuracy_score(y_val, y_val_pred)

    # Report validation accuracy for pruning
    trial.report(val_accuracy, step=0)

    # Check if the trial should be pruned
    if trial.should_prune():
        raise optuna.TrialPruned()

    # If this is the best model so far, save it
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        best_model = classifier  # Update the best model
        best_scaler = scaler
        # Save the best model
        joblib.dump(best_model, 'best_knn_model.joblib')
        print(f"New best model saved with accuracy: {best_accuracy:.4f}")

    return val_accuracy

# Create a study object with a pruner
pruner = optuna.pruners.MedianPruner()
study = optuna.create_study(direction="maximize", pruner=pruner)

# Optimize the objective function
study.optimize(objective, n_trials=100)

print("Best hyperparameters:", study.best_params)
print("Best value:", study.best_value)
print("Best model saved as 'best_knn_model.joblib'.")

# Optional: Visualize optimization results
vis.plot_optimization_history(study).show()
vis.plot_param_importances(study).show()
vis.plot_slice(study).show()

# Evaluate on the validation set
X_test_scaled = best_scaler.transform(X_test)
y_test_pred = best_model.predict(X_test_scaled)
val_accuracy = accuracy_score(y_test, y_test_pred)


[I 2024-11-12 18:57:22,475] A new study created in memory with name: no-name-89367883-5d49-4b42-a7cd-90fe849b3569
[I 2024-11-12 18:57:23,167] Trial 0 finished with value: 0.25887345679012347 and parameters: {'n_neighbors': 33, 'weights': 'uniform'}. Best is trial 0 with value: 0.25887345679012347.


New best model saved with accuracy: 0.2589


[I 2024-11-12 18:57:23,786] Trial 1 finished with value: 0.49691358024691357 and parameters: {'n_neighbors': 13, 'weights': 'uniform'}. Best is trial 1 with value: 0.49691358024691357.


New best model saved with accuracy: 0.4969


[I 2024-11-12 18:57:24,503] Trial 2 finished with value: 0.2740740740740741 and parameters: {'n_neighbors': 35, 'weights': 'uniform'}. Best is trial 1 with value: 0.49691358024691357.
[I 2024-11-12 18:57:24,642] Trial 3 finished with value: 0.9238425925925926 and parameters: {'n_neighbors': 7, 'weights': 'distance'}. Best is trial 3 with value: 0.9238425925925926.


New best model saved with accuracy: 0.9238


[I 2024-11-12 18:57:25,280] Trial 4 finished with value: 0.3358024691358025 and parameters: {'n_neighbors': 16, 'weights': 'uniform'}. Best is trial 3 with value: 0.9238425925925926.
[I 2024-11-12 18:57:25,901] Trial 5 pruned. 
[I 2024-11-12 18:57:26,501] Trial 6 finished with value: 0.6261574074074074 and parameters: {'n_neighbors': 11, 'weights': 'uniform'}. Best is trial 3 with value: 0.9238425925925926.
[I 2024-11-12 18:57:26,628] Trial 7 finished with value: 0.9238425925925926 and parameters: {'n_neighbors': 7, 'weights': 'distance'}. Best is trial 3 with value: 0.9238425925925926.
[I 2024-11-12 18:57:26,785] Trial 8 finished with value: 0.8006944444444445 and parameters: {'n_neighbors': 19, 'weights': 'distance'}. Best is trial 3 with value: 0.9238425925925926.
[I 2024-11-12 18:57:26,944] Trial 9 finished with value: 0.7631944444444444 and parameters: {'n_neighbors': 23, 'weights': 'distance'}. Best is trial 3 with value: 0.9238425925925926.
[I 2024-11-12 18:57:27,157] Trial 10 f

New best model saved with accuracy: 0.9301


[I 2024-11-12 18:57:27,986] Trial 18 finished with value: 0.9361882716049382 and parameters: {'n_neighbors': 4, 'weights': 'distance'}. Best is trial 18 with value: 0.9361882716049382.


New best model saved with accuracy: 0.9362


[I 2024-11-12 18:57:28,188] Trial 19 pruned. 
[I 2024-11-12 18:57:28,232] Trial 20 finished with value: 0.9232253086419753 and parameters: {'n_neighbors': 1, 'weights': 'distance'}. Best is trial 18 with value: 0.9361882716049382.
[I 2024-11-12 18:57:28,421] Trial 21 pruned. 
[I 2024-11-12 18:57:28,519] Trial 22 finished with value: 0.9362654320987654 and parameters: {'n_neighbors': 5, 'weights': 'distance'}. Best is trial 22 with value: 0.9362654320987654.
[I 2024-11-12 18:57:28,585] Trial 23 finished with value: 0.9361882716049382 and parameters: {'n_neighbors': 4, 'weights': 'distance'}. Best is trial 22 with value: 0.9362654320987654.


New best model saved with accuracy: 0.9363


[I 2024-11-12 18:57:28,737] Trial 24 pruned. 
[I 2024-11-12 18:57:28,816] Trial 25 finished with value: 0.9398148148148148 and parameters: {'n_neighbors': 3, 'weights': 'distance'}. Best is trial 25 with value: 0.9398148148148148.
[I 2024-11-12 18:57:28,956] Trial 26 pruned. 


New best model saved with accuracy: 0.9398


[I 2024-11-12 18:57:29,021] Trial 27 finished with value: 0.9361882716049382 and parameters: {'n_neighbors': 4, 'weights': 'distance'}. Best is trial 25 with value: 0.9398148148148148.
[I 2024-11-12 18:57:29,207] Trial 28 pruned. 
[I 2024-11-12 18:57:29,844] Trial 29 pruned. 
[I 2024-11-12 18:57:30,042] Trial 30 pruned. 
[I 2024-11-12 18:57:30,109] Trial 31 finished with value: 0.9361882716049382 and parameters: {'n_neighbors': 4, 'weights': 'distance'}. Best is trial 25 with value: 0.9398148148148148.
[I 2024-11-12 18:57:30,176] Trial 32 finished with value: 0.9361882716049382 and parameters: {'n_neighbors': 4, 'weights': 'distance'}. Best is trial 25 with value: 0.9398148148148148.
[I 2024-11-12 18:57:30,307] Trial 33 pruned. 
[I 2024-11-12 18:57:30,899] Trial 34 pruned. 
[I 2024-11-12 18:57:30,955] Trial 35 finished with value: 0.9361882716049382 and parameters: {'n_neighbors': 4, 'weights': 'distance'}. Best is trial 25 with value: 0.9398148148148148.
[I 2024-11-12 18:57:31,509] Tr

Best hyperparameters: {'n_neighbors': 3, 'weights': 'distance'}
Best value: 0.9398148148148148
Best model saved as 'best_knn_model.joblib'.
