In [1]:
import pandas as pd
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

# 读取数据
file_path_train = 'C:/Users/k/Desktop/硕士论文/ORP/ORP4/ORP_function_train.xlsx'
sheet_name_train = 'Sheet1'
data_train = pd.read_excel(file_path_train, sheet_name=sheet_name_train)
y_train = data_train.iloc[:, -1]
X_train = data_train.iloc[:, 1:-1]

file_path_test = 'C:/Users/k/Desktop/硕士论文/ORP/ORP4/ORP_function_test.xlsx'
sheet_name_test = 'Sheet1'
data_test = pd.read_excel(file_path_test, sheet_name=sheet_name_test)
y_test = data_test.iloc[:, -1]
X_test = data_test.iloc[:, 1:-1]

# 标准化特征（对于弹性网络回归很重要）

X_train_scaled = X_train
X_test_scaled = X_test

# 弹性网络回归
elastic_net = ElasticNet()

# 选择最佳参数（可调整）
param_grid = {
    'alpha': [0.05,0.1, 0.5, 1, 2, 5,8,10],
    'l1_ratio': [0.01,0.03,0.05,0.1, 0.3,0.5, 0.7, 0.9, 1]
}

# 使用网格搜索找到最佳参数
grid_search = GridSearchCV(elastic_net, param_grid, cv=5)
grid_search.fit(X_train_scaled, y_train)

# 最佳参数
best_alpha = grid_search.best_estimator_.alpha
best_l1_ratio = grid_search.best_estimator_.l1_ratio

# 使用最佳参数重新训练模型
elastic_net_best = ElasticNet(alpha=best_alpha, l1_ratio=best_l1_ratio)
elastic_net_best.fit(X_train_scaled, y_train)

# 预测
y_train_pred = elastic_net_best.predict(X_train_scaled)
y_test_pred = elastic_net_best.predict(X_test_scaled)

# 模型评估
rmse_train = sqrt(mean_squared_error(y_train, y_train_pred))
rmse_test = sqrt(mean_squared_error(y_test, y_test_pred))

print(f'RMSE (Training): {rmse_train}')
print(f'RMSE (Testing): {rmse_test}')

# 输出模型系数
coef = elastic_net_best.coef_

# 输出等式
equation = "y = " + str(elastic_net_best.intercept_)
for i, c in enumerate(coef):
    equation += f" + ({c}) * X{i+1}"

print("Equation of the model:")
print(equation)

# 输出使用到的特征变量数量
print("Number of features used:", sum(c != 0 for c in coef))

RMSE (Training): 0.42380484995641254
RMSE (Testing): 0.4901827006503472
Equation of the model:
y = 2.697800758560091 + (-0.14907502724869118) * X1 + (-0.024662143312068535) * X2 + (-0.09600144100109997) * X3 + (-0.11999921364376458) * X4 + (-0.004936855922813607) * X5 + (-0.41105273546215054) * X6 + (0.02588570193849684) * X7 + (0.08829825079686399) * X8 + (-0.08576542090642188) * X9 + (-0.2038577329437094) * X10 + (0.06504921129294565) * X11 + (0.12223562816692972) * X12 + (0.13504435097994874) * X13 + (-0.0453013763261941) * X14 + (0.02662730725394642) * X15 + (0.13575486745812657) * X16 + (-0.12324700904669593) * X17 + (0.08291470677887791) * X18 + (0.0) * X19 + (0.04222491047496284) * X20 + (0.0) * X21 + (-0.06829551104486184) * X22 + (-0.07517317814828603) * X23 + (0.0) * X24 + (-0.11486111417495824) * X25 + (-0.0017139004573443234) * X26 + (-0.0) * X27 + (-0.0) * X28 + (0.15099192451319493) * X29 + (0.0) * X30 + (0.004478794781683056) * X31
Number of features used: 25


In [2]:
from sklearn.metrics import r2_score, mean_absolute_percentage_error

# 计算R2
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)
print(f'R2 (Training): {r2_train}')
print(f'R2 (Testing): {r2_test}')

# 计算MAPE
mape_train = mean_absolute_percentage_error(y_train, y_train_pred)
mape_test = mean_absolute_percentage_error(y_test, y_test_pred)
print(f'MAPE (Training): {mape_train}')
print(f'MAPE (Testing): {mape_test}')

R2 (Training): 0.726319983352018
R2 (Testing): 0.6452371969387255
MAPE (Training): 0.1691329442147229
MAPE (Testing): 0.19020099026252826


In [3]:
import numpy as np

def adjusted_r_squared(r_squared, n, k):
    return 1 - (1 - r_squared) * (n - 1) / (n - k - 1)

def calc_r_squared(y, y_pred):
    ss_res = np.sum((y - y_pred) ** 2)
    ss_tot = np.sum((y - np.mean(y)) ** 2)
    r_squared = 1 - (ss_res / ss_tot)
    return r_squared

# 计算R²值
r_squared_train = calc_r_squared(y_train, y_train_pred)
r_squared_test = calc_r_squared(y_test, y_test_pred)

# 计算使用的特征数量（非零系数）
num_features = sum(c != 0 for c in elastic_net_best.coef_)

# 样本数量
n_train = X_train.shape[0]
n_test = X_test.shape[0]

# 计算校正后的R²
adj_r_squared_train = adjusted_r_squared(r_squared_train, n_train, num_features)
adj_r_squared_test = adjusted_r_squared(r_squared_test, n_test, num_features)

print(f'Adjusted R² (Training): {adj_r_squared_train}')
print(f'Adjusted R² (Testing): {adj_r_squared_test}')


Adjusted R² (Training): 0.6788060915728544
Adjusted R² (Testing): 0.22290052662768445


In [4]:
num_features

25

In [5]:
r_squared_train

0.726319983352018

In [6]:
r_squared_test

0.6452371969387255

In [7]:
best_alpha


0.1

In [8]:
best_l1_ratio

0.01

In [10]:
from sklearn.model_selection import cross_val_score
import numpy as np
from sklearn.model_selection import KFold

kf = KFold(n_splits=10, shuffle=True, random_state=99)
# 执行10次交叉验证
mse_scores = cross_val_score(elastic_net_best, X_train, y_train, scoring='neg_mean_squared_error', cv=kf)

# 将负MSE转换为RMSE
rmse_scores = np.sqrt(-mse_scores)

# 输出每次交叉验证的RMSE
print("10次交叉验证的RMSE：", rmse_scores)

# 输出5次交叉验证的平均RMSE
print("平均RMSE：", np.mean(rmse_scores))

5次交叉验证的RMSE： [0.46043097 0.50420065 0.60264137 0.47076497 0.42547837 0.43308072
 0.40578195 0.37116875 0.48632666 0.48535609]
平均RMSE： 0.46452305065149674
