In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings, os, sys, shutil
from typing import Tuple
from utility import *
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score, log_loss
from sklearn.neighbors import KNeighborsClassifier

warnings.filterwarnings("ignore")

In [6]:
train = pd.read_csv('../preprocessed/combined_features_256.csv')
test  = pd.read_csv('../preprocessed/combined_features_256_test.csv')
train = shuffle(train, random_state=42)
y_train = train['label']
x_train = train.drop('label', axis=1)
Y_test = test['label']
X_test = test.drop('label', axis=1)

In [7]:
import optuna
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from optuna.samplers import TPESampler

In [None]:
# 定义优化的目标函数
def objective(trial):
    # 提议参数
    params = {
        'n_neighbors': trial.suggest_int("n_neighbors", 1, 11),
        'weights': trial.suggest_categorical("weights", ["uniform", "distance"]),
        'p': trial.suggest_int("p", 1, 7),
        'algorithm': trial.suggest_categorical("algorithm", ["ball_tree", "kd_tree"]),
        'leaf_size': trial.suggest_int("leaf_size", 1, 30)
    }
    # 创建 SVM 模型
    model = KNeighborsClassifier(**params)
    
    # 进行 k-fold 交叉验证
    scores = cross_val_score(model, x_train, y_train, cv=5)  # 使用5折交叉验证
    accuracy = np.mean(scores)
    return accuracy

if __name__ == "__main__":
    sampler = TPESampler(seed=42)
    pruner = optuna.pruners.MedianPruner(n_warmup_steps=10)
    study = optuna.create_study(pruner=pruner, direction="maximize", sampler=sampler)
    study.optimize(objective, n_trials=50)

    print("Number of finished trials: {}".format(len(study.trials)))
    print("Best trial:")
    trial = study.best_trial
    print("  Value: {}".format(trial.value))
    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

In [None]:
# 创建一个 Optuna 优化器的实例
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# 打印最优的参数和在训练集上的性能
print('Best trial:')
trial = study.best_trial
print(f'  Average CV Accuracy: {trial.value:.3f}')
print('  Params: ')
for key, value in trial.params.items():
    print(f'    {key}: {value}')



In [None]:
# 使用最优参数在测试集上评估模型
best_model = SVC(**trial.params, random_state=42)
best_model.fit(x_train, y_train)
y_pred = best_model.predict(X_test)
test_accuracy = accuracy_score(Y_test, y_pred)
print(f'Accuracy on Test set: {test_accuracy:.3f}')