In [3]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.manifold import TSNE
from sklearn.neighbors import KNeighborsClassifier
import optuna
import optuna.visualization as vis

n = 9

# Load dataset
X = np.load(f'Datasets/kryptonite-{n}-X.npy')
y = np.load(f'Datasets/kryptonite-{n}-y.npy')

def objective(trial):
    ### ========== Shuffle and Split Data ========== ###
    
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.8, random_state=42)
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    
    # Suggest whether to use t-SNE and its parameters
    use_tsne = trial.suggest_categorical('use_tsne', ['True', 'False'])
    
    if use_tsne == 'True':
        tsne_perplexity = trial.suggest_float('tsne_perplexity', 5, 50)
        tsne_learning_rate = trial.suggest_float('tsne_learning_rate', 10, 1000)
        
        # Perform t-SNE dimensionality reduction to 2 components
        tsne = TSNE(
            n_components=2,
            perplexity=tsne_perplexity,
            learning_rate=tsne_learning_rate,
            max_iter=250,
            random_state=42
        )
        
        X_train_scaled = tsne.fit_transform(X_train_scaled)
        X_val_scaled = tsne.fit_transform(X_val_scaled)

    ### ========== KNN Classifier ========== ###
    
    # KNN Classifier parameters
    classifier = KNeighborsClassifier(
        n_neighbors=trial.suggest_int('n_neighbors', 5, 50),
        weights=trial.suggest_categorical('weights', ['uniform', 'distance']),
        n_jobs=-1
    )
    
    # Train the classifier
    classifier.fit(X_train_scaled, y_train)
    
    # Evaluate on the validation set
    y_val_pred = classifier.predict(X_val_scaled)
    val_accuracy = accuracy_score(y_val, y_val_pred)
    
    # Report validation accuracy for pruning
    trial.report(val_accuracy, step=0)
    
    # Check if the trial should be pruned
    if trial.should_prune():
        raise optuna.TrialPruned()
    
    return val_accuracy

# Create a study object with a pruner
pruner = optuna.pruners.MedianPruner()
study = optuna.create_study(direction="maximize", pruner=pruner)

# Optimize the objective function
study.optimize(objective, n_trials=100)

print("Best hyperparameters:", study.best_params)
print("Best value:", study.best_value)

# Visualize the optimization history and parameter importances
vis.plot_optimization_history(study).show()
vis.plot_param_importances(study).show()
vis.plot_slice(study).show()


[I 2024-11-11 19:55:25,311] A new study created in memory with name: no-name-8328c0bb-d1c4-48e3-aa95-55714923acaa
[I 2024-11-11 19:55:34,953] Trial 0 finished with value: 0.4957638888888889 and parameters: {'use_tsne': 'True', 'tsne_perplexity': 24.990599854162944, 'tsne_learning_rate': 816.6523540618197, 'n_neighbors': 48, 'weights': 'uniform'}. Best is trial 0 with value: 0.4957638888888889.
[I 2024-11-11 19:55:35,319] Trial 1 finished with value: 0.7863194444444445 and parameters: {'use_tsne': 'False', 'n_neighbors': 9, 'weights': 'uniform'}. Best is trial 1 with value: 0.7863194444444445.
[I 2024-11-11 19:55:35,702] Trial 2 finished with value: 0.6763194444444445 and parameters: {'use_tsne': 'False', 'n_neighbors': 11, 'weights': 'uniform'}. Best is trial 1 with value: 0.7863194444444445.
[I 2024-11-11 19:55:45,331] Trial 3 finished with value: 0.5169444444444444 and parameters: {'use_tsne': 'True', 'tsne_perplexity': 11.95286250331376, 'tsne_learning_rate': 774.7383861740881, 'n_n

Best hyperparameters: {'use_tsne': 'False', 'n_neighbors': 5, 'weights': 'distance'}
Best value: 0.9518055555555556
