In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow import keras

# 导入训练集和测试集
train_data = pd.read_excel(r'C:\Users\k\Desktop\硕士论文\ORP\ORP4\ORP_function_train.xlsx')
test_data = pd.read_excel(r'C:\Users\k\Desktop\硕士论文\ORP\ORP4\ORP_function_test.xlsx')

# 分离特征和目标变量
X_train, y_train = train_data.drop(['E','File'], axis=1), train_data['E']
X_test, y_test = test_data.drop(['E','File'], axis=1), test_data['E']

# 构建神经网络模型
def build_model(input_shape):
    model = keras.Sequential([
        keras.layers.Input(shape=(input_shape,)),
        keras.layers.Dense(128, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),
        keras.layers.Dense(1)
    ])
    model.compile(optimizer='sgd', loss='mean_squared_error')
    return model

# k折交叉验证
kf = KFold(n_splits=5)
fold_no = 1
for train_index, val_index in kf.split(X_train):
    print(f'Fold {fold_no}')

    # 分割训练集和验证集
    X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
    y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]
    
    # 构建模型
    model = build_model(X_train.shape[1])
    
    # 训练模型
    model.fit(X_train_fold, y_train_fold, epochs=2000, batch_size=20, validation_data=(X_val_fold, y_val_fold), verbose=0)  # verbose设置为0，减少输出
    
    # 预测
    y_train_pred = model.predict(X_train_fold)
    y_test_pred = model.predict(X_test)
    
    # 计算RMSE和R2
    train_rmse = np.sqrt(mean_squared_error(y_train_fold, y_train_pred))
    test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
    
    train_r2 = r2_score(y_train_fold, y_train_pred)
    test_r2 = r2_score(y_test, y_test_pred)
    
    print(f"Fold {fold_no} 训练集RMSE: {train_rmse}, 训练集R2: {train_r2}")
    print(f"Fold {fold_no} 测试集RMSE: {test_rmse}, 测试集R2: {test_r2}")
    
    fold_no += 1


Fold 1
Fold 1 训练集RMSE: 0.14739952907244208, 训练集R2: 0.970065792806136
Fold 1 测试集RMSE: 0.41672755077870427, 测试集R2: 0.7435949677920212
Fold 2
Fold 2 训练集RMSE: 0.15772732603513964, 训练集R2: 0.965774832513743
Fold 2 测试集RMSE: 0.4339801778088852, 测试集R2: 0.7219250267677393
Fold 3
Fold 3 训练集RMSE: 0.15239444293413698, 训练集R2: 0.9673664680725699
Fold 3 测试集RMSE: 0.3956737753012719, 测试集R2: 0.7688485346956094
Fold 4
Fold 4 训练集RMSE: 0.16056511006191537, 训练集R2: 0.9609573541065475
Fold 4 测试集RMSE: 0.41054007742668663, 测试集R2: 0.7511525257964689
Fold 5
Fold 5 训练集RMSE: 0.15752610210494283, 训练集R2: 0.9442687937100619
Fold 5 测试集RMSE: 0.6325051378711345, 测试集R2: 0.4093227659294194


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, r2_score
from bayes_opt import BayesianOptimization
import tensorflow as tf
from tensorflow import keras

# 导入训练集和测试集
train_data = pd.read_excel(r'C:\Users\k\Desktop\硕士论文\ORP\ORP4\ORP_function_train.xlsx')
test_data = pd.read_excel(r'C:\Users\k\Desktop\硕士论文\ORP\ORP4\ORP_function_test.xlsx')

# 分离特征和目标变量
X_train, y_train = train_data.drop(['E','File'], axis=1), train_data['E']
X_test, y_test = test_data.drop(['E','File'], axis=1), test_data['E']

# 定义神经网络模型
def build_model(learning_rate, dropout_rate, l2_reg):
    model = keras.Sequential([
        keras.layers.Input(shape=(X_train.shape[1],)),
        keras.layers.Dense(128, activation='relu', kernel_regularizer=keras.regularizers.l2(l2_reg)),
        keras.layers.Dropout(dropout_rate),
        keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(l2_reg)),
        keras.layers.Dense(1)
    ])
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model

# 定义贝叶斯优化目标函数
def bayesian_optimization(learning_rate, dropout_rate, l2_reg):
    model = build_model(learning_rate, dropout_rate, l2_reg)

    kf = KFold(n_splits=5)
    fold_no = 1
    total_rmse = 0.0

    for train_index, val_index in kf.split(X_train):
        X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
        y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]

        model.fit(X_train_fold, y_train_fold, epochs=2000, batch_size=20, verbose=0)
        y_val_pred = model.predict(X_val_fold)

        fold_rmse = np.sqrt(mean_squared_error(y_val_fold, y_val_pred))
        total_rmse += fold_rmse

    return -total_rmse  # Minimize the negative RMSE

# 设置贝叶斯优化的搜索范围
param_bounds = {'learning_rate': (0.001, 0.1), 'dropout_rate': (0.1, 0.5), 'l2_reg': (0.001, 0.1)}

# 初始化贝叶斯优化对象
optimizer = BayesianOptimization(f=bayesian_optimization, pbounds=param_bounds, random_state=1)

# 运行贝叶斯优化
optimizer.maximize(init_points=5, n_iter=10)

# 输出最佳超参数
best_params = optimizer.max['params']
print("Best Hyperparameters:", best_params)

# 使用最佳超参数构建最终模型
best_model = build_model(best_params['learning_rate'], best_params['dropout_rate'], best_params['l2_reg'])

# 训练最终模型
best_model.fit(X_train, y_train, epochs=2000, batch_size=20, verbose=0)

# 使用最终模型进行预测和评估
y_train_pred = best_model.predict(X_train)
y_test_pred = best_model.predict(X_test)

train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))

train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

print(f"训练集RMSE: {train_rmse}, 训练集R2: {train_r2}")
print(f"测试集RMSE: {test_rmse}, 测试集R2: {test_r2}")


|   iter    |  target   | dropou... |  l2_reg   | learni... |
-------------------------------------------------------------
| [0m1        [0m | [0m-2.302   [0m | [0m0.2668   [0m | [0m0.07231  [0m | [0m0.001011 [0m |
| [0m2        [0m | [0m-2.452   [0m | [0m0.2209   [0m | [0m0.01553  [0m | [0m0.01014  [0m |
| [0m3        [0m | [0m-2.575   [0m | [0m0.1745   [0m | [0m0.03521  [0m | [0m0.04028  [0m |
| [0m4        [0m | [0m-2.581   [0m | [0m0.3155   [0m | [0m0.0425   [0m | [0m0.06884  [0m |
| [0m5        [0m | [0m-2.341   [0m | [0m0.1818   [0m | [0m0.08793  [0m | [0m0.003711 [0m |
| [0m6        [0m | [0m-2.496   [0m | [0m0.2503   [0m | [0m0.0792   [0m | [0m0.07767  [0m |
| [0m7        [0m | [0m-2.401   [0m | [0m0.3129   [0m | [0m0.07895  [0m | [0m0.01832  [0m |
| [95m8        [0m | [95m-2.296   [0m | [95m0.2833   [0m | [95m0.08234  [0m | [95m0.001    [0m |
| [0m9        [0m | [0m-2.693   [0m | [0m0.191    