In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC
import xgboost as xgb
from hyperopt import fmin, tpe, hp, Trials

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

# 加载鸢尾花数据集
iris = datasets.load_iris()
X_iris = iris.data
y_iris = iris.target

# 加载手写数字数据集
digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target

# 划分训练集和测试集
X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(X_iris, y_iris, test_size=0.2, random_state=42)
X_digits_train, X_digits_test, y_digits_train, y_digits_test = train_test_split(X_digits, y_digits, test_size=0.2, random_state=42)

# SVM模型超参数优化
def optimize_svm(X_train, y_train, X_test, y_test):
    param_grid = {'C': [0.1, 1, 10], 'gamma': [0.1, 1, 10]}
    
    # 网格搜索
    grid_search = GridSearchCV(SVC(), param_grid=param_grid)
    grid_search.fit(X_train, y_train)
    grid_search_time = grid_search.cv_results_['mean_fit_time'][grid_search.best_index_]
    grid_search_acc = grid_search.score(X_test, y_test)
    
    # 随机搜索
    random_search = RandomizedSearchCV(SVC(), param_distributions=param_grid, n_iter=3)
    random_search.fit(X_train, y_train)
    random_search_time = random_search.cv_results_['mean_fit_time'][random_search.best_index_]
    random_search_acc = random_search.score(X_test, y_test)
    
    # 贝叶斯优化
    space = {'C': hp.loguniform('C', np.log(0.1), np.log(10)),
             'gamma': hp.loguniform('gamma', np.log(0.1), np.log(10))}
    
    def objective(params):
        clf = SVC(**params)
        clf.fit(X_train, y_train)
        return -clf.score(X_test, y_test)
    
    trials = Trials()
    best = fmin(objective, space, algo=tpe.suggest, max_evals=10, trials=trials)
    bayesian_time = np.mean(trials.trials[-1]['book_time'])
    bayesian_acc = -trials.best_trial['result']['loss']
    
    return grid_search_time, grid_search_acc, random_search_time, random_search_acc, bayesian_time, bayesian_acc

# XGBoost模型超参数优化
def optimize_xgboost(X_train, y_train, X_test, y_test):
    param_grid = {'max_depth': [3, 6, 9], 'learning_rate': [0.1, 0.01, 0.001]}
    
    # 网格搜索
    grid_search = GridSearchCV(xgb.XGBClassifier(), param_grid=param_grid)
    grid_search.fit(X_train, y_train)
    grid_search_time = grid_search.cv_results_['mean_fit_time'][grid_search.best_index_]
    grid_search_acc = grid_search.score(X_test, y_test)
    
    # 随机搜索
    random_search = RandomizedSearchCV(xgb.XGBClassifier(), param_distributions=param_grid, n_iter=3)
    random_search.fit(X_train, y_train)
    random_search_time = random_search.cv_results_['mean_fit_time'][random_search.best_index_]
    random_search_acc = random_search.score(X_test, y_test)
    
    # 贝叶斯优化
    space = {'max_depth': hp.choice('max_depth', [3, 6, 9]),
             'learning_rate': hp.loguniform('learning_rate', np.log(0.001), np.log(0.1))}
    
    def objective(params):
        clf = xgb.XGBClassifier(**params)
        clf.fit(X_train, y_train)
        return -clf.score(X_test, y_test)
    
    trials = Trials()
    best = fmin(objective, space, algo=tpe.suggest, max_evals=10, trials=trials)
    bayesian_time = np.mean(trials.trials[-1]['book_time'])
    bayesian_acc = -trials.best_trial['result']['loss']
    
    return grid_search_time, grid_search_acc, random_search_time, random_search_acc, bayesian_time, bayesian_acc

# 鸢尾花数据集
print("鸢尾花数据集 - SVM超参数优化结果：")
grid_search_time, grid_search_acc, random_search_time, random_search_acc, bayesian_time, bayesian_acc = optimize_svm(
    X_iris_train, y_iris_train, X_iris_test, y_iris_test
)
print("网格搜索 - 时间：{:.4f}s, 准确率：{:.4f}".format(grid_search_time, grid_search_acc))
print("随机搜索 - 时间：{:.4f}s, 准确率：{:.4f}".format(random_search_time, random_search_acc))
print("贝叶斯优化 - 时间：{:.4f}s, 准确率：{:.4f}".format(bayesian_time, bayesian_acc))

print("\n鸢尾花数据集 - XGBoost超参数优化结果：")
grid_search_time, grid_search_acc, random_search_time, random_search_acc, bayesian_time, bayesian_acc = optimize_xgboost(
    X_iris_train, y_iris_train, X_iris_test, y_iris_test
)
print("网格搜索 - 时间：{:.4f}s, 准确率：{:.4f}".format(grid_search_time, grid_search_acc))
print("随机搜索 - 时间：{:.4f}s, 准确率：{:.4f}".format(random_search_time, random_search_acc))
print("贝叶斯优化 - 时间：{:.4f}s, 准确率：{:.4f}".format(bayesian_time, bayesian_acc))

# 手写数字数据集
print("\n手写数字数据集 - SVM超参数优化结果：")
grid_search_time, grid_search_acc, random_search_time, random_search_acc, bayesian_time, bayesian_acc = optimize_svm(
    X_digits_train, y_digits_train, X_digits_test, y_digits_test
)
print("网格搜索 - 时间：{:.4f}s, 准确率：{:.4f}".format(grid_search_time, grid_search_acc))
print("随机搜索 - 时间：{:.4f}s, 准确率：{:.4f}".format(random_search_time, random_search_acc))
print("贝叶斯优化 - 时间：{:.4f}s, 准确率：{:.4f}".format(bayesian_time, bayesian_acc))

print("\n手写数字数据集 - XGBoost超参数优化结果：")
grid_search_time, grid_search_acc, random_search_time, random_search_acc, bayesian_time, bayesian_acc = optimize_xgboost(
    X_digits_train, y_digits_train, X_digits_test, y_digits_test
)
print("网格搜索 - 时间：{:.4f}s, 准确率：{:.4f}".format(grid_search_time, grid_search_acc))
print("随机搜索 - 时间：{:.4f}s, 准确率：{:.4f}".format(random_search_time, random_search_acc))
print("贝叶斯优化 - 时间：{:.4f}s, 准确率：{:.4f}".format(bayesian_time, bayesian_acc))


SyntaxError: invalid syntax (1044996071.py, line 103)

In [2]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVR
import xgboost as xgb
from hyperopt import fmin, tpe, hp, Trials
import time
from sklearn.metrics import mean_squared_error

# 加载糖尿病数据集
diabetes = datasets.load_diabetes()
X_diabetes = diabetes.data
y_diabetes = diabetes.target

# 加载加州房价预测数据集
california = datasets.fetch_california_housing()
X_california = california.data
y_california = california.target

# 划分糖尿病数据集的训练集和测试集
X_diabetes_train, X_diabetes_test, y_diabetes_train, y_diabetes_test = train_test_split(
    X_diabetes, y_diabetes, test_size=0.2, random_state=42
)

# 划分加州房价预测数据集的训练集和测试集
X_california_train, X_california_test, y_california_train, y_california_test = train_test_split(
    X_california, y_california, test_size=0.2, random_state=42
)

# SVM 模型超参数优化
def optimize_svm(X_train, y_train, X_test, y_test):
    param_grid = {'C': [0.1, 1, 10], 'gamma': [0.1, 1, 10]}

    # 网格搜索
    start_time = time.time()
    grid_search = GridSearchCV(SVR(), param_grid=param_grid)
    grid_search.fit(X_train, y_train)
    grid_search_time = time.time() - start_time
    grid_search_pred = grid_search.predict(X_test)
    grid_search_mse = mean_squared_error(y_test, grid_search_pred)

    # 随机搜索
    start_time = time.time()
    random_search = RandomizedSearchCV(SVR(), param_distributions=param_grid, n_iter=3)
    random_search.fit(X_train, y_train)
    random_search_time = time.time() - start_time
    random_search_pred = random_search.predict(X_test)
    random_search_mse = mean_squared_error(y_test, random_search_pred)

    # 贝叶斯优化
    space = {'C': hp.loguniform('C', np.log(0.1), np.log(10)),
             'gamma': hp.loguniform('gamma', np.log(0.1), np.log(10))}

    def objective(params):
        clf = SVR(**params)
        clf.fit(X_train, y_train)
        pred = clf.predict(X_test)
        return mean_squared_error(y_test, pred)

    trials = Trials()
    start_time = time.time()
    best = fmin(objective, space, algo=tpe.suggest, max_evals=10, trials=trials)
    bayesian_time = time.time() - start_time
    bayesian_mse = trials.best_trial['result']['loss']

    return grid_search_time, grid_search_mse, random_search_time, random_search_mse, bayesian_time, bayesian_mse

# XGBoost 模型超参数优化
def optimize_xgboost(X_train, y_train, X_test, y_test):
    param_grid = {'max_depth': [3, 6, 9], 'learning_rate': [0.001, 0.01, 0.1]}

    # 网格搜索
    start_time = time.time()
    grid_search = GridSearchCV(xgb.XGBRegressor(), param_grid=param_grid)
    grid_search.fit(X_train, y_train)
    grid_search_time = time.time() - start_time
    grid_search_pred = grid_search.predict(X_test)
    grid_search_mse = mean_squared_error(y_test, grid_search_pred)

    # 随机搜索
    start_time = time.time()
    random_search = RandomizedSearchCV(xgb.XGBRegressor(), param_distributions=param_grid, n_iter=3)
    random_search.fit(X_train, y_train)
    random_search_time = time.time() - start_time
    random_search_pred = random_search.predict(X_test)
    random_search_mse = mean_squared_error(y_test, random_search_pred)

    # 贝叶斯优化
    space = {'max_depth': hp.choice('max_depth', [3, 6, 9]),
             'learning_rate': hp.loguniform('learning_rate', np.log(0.001), np.log(0.1))}

    def objective(params):
        clf = xgb.XGBRegressor(**params)
        clf.fit(X_train, y_train)
        pred = clf.predict(X_test)
        return mean_squared_error(y_test, pred)

    trials = Trials()
    start_time = time.time()
    best = fmin(objective, space, algo=tpe.suggest, max_evals=10, trials=trials)
    bayesian_time = time.time() - start_time
    
    bayesian_mse = trials.best_trial['result']['loss']

    return grid_search_time, grid_search_mse, random_search_time, random_search_mse, bayesian_time, bayesian_mse

# 糖尿病数据集
print("糖尿病数据集 - SVM 模型超参数优化结果：")
grid_search_time, grid_search_mse, random_search_time, random_search_mse, bayesian_time, bayesian_mse = optimize_svm(
    X_diabetes_train, y_diabetes_train, X_diabetes_test, y_diabetes_test
)
print("网格搜索 - 时间：{:.4f}s，MSE：{:.4f}".format(grid_search_time, grid_search_mse))
print("随机搜索 - 时间：{:.4f}s，MSE：{:.4f}".format(random_search_time, random_search_mse))
print("贝叶斯优化 - 时间：{:.4f}s，MSE：{:.4f}".format(bayesian_time, bayesian_mse))

# 加州房价预测数据集
print("\n加州房价预测数据集 - SVM 模型超参数优化结果：")
grid_search_time, grid_search_mse, random_search_time, random_search_mse, bayesian_time, bayesian_mse = optimize_svm(
    X_california_train, y_california_train, X_california_test, y_california_test
)
print("网格搜索 - 时间：{:.4f}s，MSE：{:.4f}".format(grid_search_time, grid_search_mse))
print("随机搜索 - 时间：{:.4f}s，MSE：{:.4f}".format(random_search_time, random_search_mse))
print("贝叶斯优化 - 时间：{:.4f}s，MSE：{:.4f}".format(bayesian_time, bayesian_mse))

# 加州房价预测数据集 - XGBoost 模型超参数优化
print("\n加州房价预测数据集 - XGBoost 模型超参数优化结果：")
grid_search_time, grid_search_mse, random_search_time, random_search_mse, bayesian_time, bayesian_mse = optimize_xgboost(
    X_california_train, y_california_train, X_california_test, y_california_test
)
print("网格搜索 - 时间：{:.4f}s，MSE：{:.4f}".format(grid_search_time, grid_search_mse))
print("随机搜索 - 时间：{:.4f}s，MSE：{:.4f}".format(random_search_time, random_search_mse))
print("贝叶斯优化 - 时间：{:.4f}s，MSE：{:.4f}".format(bayesian_time, bayesian_mse))


糖尿病数据集 - SVM 模型超参数优化结果：


AttributeError: module 'sys' has no attribute 'setcheckinterval'