In [35]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

In [36]:
# 加载CSV数据
data = pd.read_csv("./new_data.csv", header=None)
data

Unnamed: 0,0,1,2,3,4,5,6
0,date(日期),Q(径流),E(蒸发),P(降水),Year,Month,Day
1,1983/1/1,6.5,0.9,0.3,1983,1,1
2,1983/1/2,6.99,0,0.2,1983,1,2
3,1983/1/3,6.99,2.2,2.6,1983,1,3
4,1983/1/4,7.49,0.1,27.7,1983,1,4
...,...,...,...,...,...,...,...
1822,1987/12/27,4.52,0.1,2.4,1987,12,27
1823,1987/12/28,4.59,0.9,1.1,1987,12,28
1824,1987/12/29,1.38,1,1,1987,12,29
1825,1987/12/30,6.84,1.1,0,1987,12,30


In [37]:
data = data[1:]

In [38]:
# 将数据拆分为特征和标签
X = data.iloc[:, [2, 3, 4, 5, 6]].values
y = data.iloc[:, 1].values

In [39]:
# 将数据集拆分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [40]:
# 定义基学习器
models = [RandomForestRegressor(n_estimators=100, random_state=0),
          GradientBoostingRegressor(n_estimators=100, random_state=0),
          LinearRegression()]

In [41]:
# 定义元学习器
meta_model = LinearRegression()

In [42]:
# 定义k-fold交叉验证的参数
kf = KFold(n_splits=5, shuffle=True, random_state=0)

# 定义元特征数组和元标签数组
meta_features = np.zeros((X.shape[0], len(models)))
meta_labels = np.zeros(X.shape[0])

In [43]:
# 对每个fold进行训练和预测
for fold, (train_index, test_index) in enumerate(kf.split(X)):
    print(f'Processing fold {fold+1}')

    # 分割训练和测试集
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # 对每个基学习器进行训练和预测
    for i, model in enumerate(models):
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        meta_features[test_index, i] = y_pred

    # 对元学习器进行训练
    meta_model.fit(meta_features[train_index], y_train)

    # 对元学习器进行预测
    meta_labels[test_index] = meta_model.predict(meta_features[test_index])

Processing fold 1
Processing fold 2
Processing fold 3
Processing fold 4
Processing fold 5


In [44]:
np.set_printoptions(suppress = True)
float_y = np.array([float(x) for x in y])
float_y

array([6.5 , 6.99, 6.99, ..., 1.38, 6.84, 5.74])

In [45]:
# 计算集成预测的MAE和R2
ensemble_mae = mean_absolute_error(float_y, meta_labels)
ensemble_r2 = np.corrcoef(float_y, meta_labels)[0, 1]**2

print(f'Ensemble MAE: {ensemble_mae:.2f}')
print(f'Ensemble R2: {ensemble_r2:.2f}')

Ensemble MAE: 13.92
Ensemble R2: 0.24


In [46]:
# 相对误差绝对值的平均值（Mean Absolute Percentage Error，MAPE）
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [47]:
# Nash效率系数（Nash-Sutcliffe efficiency coefficient，NSE）
def nash_sutcliffe_efficiency(y_true, y_pred):
    numerator = np.sum((y_true - y_pred) ** 2)
    denominator = np.sum((y_true - np.mean(y_true)) ** 2)
    return 1 - numerator / denominator

In [48]:
# 相对均方根误差（Root Mean Square Percentage Error，RMSPE）
def root_mean_square_percentage_error(y_true, y_pred):
    return np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))

In [49]:
# 合格率（Accuracy）
def accuracy(y_true, y_pred, tolerance=0.1):
    errors = np.abs((y_true - y_pred) / y_true)
    return np.sum(errors <= tolerance) / len(errors)

In [50]:
mape = mean_absolute_percentage_error(float_y, meta_labels)
nse = nash_sutcliffe_efficiency(float_y, meta_labels)
rmspe = root_mean_square_percentage_error(float_y, meta_labels)
acc = accuracy(float_y, meta_labels)
print('MAPE:', mape)
print('NSE:', nse)
print('RMSPE:', rmspe)
print('Accuracy:', acc)

MAPE: 167.04729332906422
NSE: 0.22456184305178517
RMSPE: 3.636224768781798
Accuracy: 0.07228915662650602
