In [1]:
# 导入必要的库
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np

In [2]:
# 加载CSV数据
data = pd.read_csv("./new_data.csv", header=None)
data

Unnamed: 0,0,1,2,3,4,5,6
0,date(日期),Q(径流),E(蒸发),P(降水),Year,Month,Day
1,1983/1/1,6.5,0.9,0.3,1983,1,1
2,1983/1/2,6.99,0,0.2,1983,1,2
3,1983/1/3,6.99,2.2,2.6,1983,1,3
4,1983/1/4,7.49,0.1,27.7,1983,1,4
...,...,...,...,...,...,...,...
1822,1987/12/27,4.52,0.1,2.4,1987,12,27
1823,1987/12/28,4.59,0.9,1.1,1987,12,28
1824,1987/12/29,1.38,1,1,1987,12,29
1825,1987/12/30,6.84,1.1,0,1987,12,30


In [3]:
data = data[1:]

In [4]:
# 将数据拆分为特征和标签
X = data.iloc[:, [2, 3, 4, 5, 6]].values
y = data.iloc[:, 1].values

In [5]:
X_float = X.astype(np.float)
y_float = y.astype(np.float)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """Entry point for launching an IPython kernel.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  


In [6]:
# 将数据集拆分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_float, y_float, test_size=0.3, random_state=42)

In [7]:
# 创建随机森林回归模型，并训练模型
rf_reg = RandomForestRegressor(n_estimators=100, random_state=0)
rf_reg.fit(X_train, y_train)

RandomForestRegressor(random_state=0)

In [8]:
# 对测试集进行预测
rf_y_pred = rf_reg.predict(X_test)

In [9]:
# 输出模型的均方误差
rf_mse = mean_squared_error(y_test, rf_y_pred)
print('随机森林回归模型的均方误差：', rf_mse)

随机森林回归模型的均方误差： 432.2376318853466


In [10]:
# 创建多元线性回归模型，并训练模型
multi_linear_reg = LinearRegression()
multi_linear_reg.fit(X_train, y_train)

LinearRegression()

In [11]:
# 对测试集进行预测
multi_linear_y_pred = multi_linear_reg.predict(X_test)

In [12]:
# 输出模型的均方误差
multi_linear_mse = mean_squared_error(y_test, multi_linear_y_pred)
print('多元线性回归模型的均方误差：', multi_linear_mse)

多元线性回归模型的均方误差： 606.0780218365761


In [13]:
# 创建多层感知器模型，并训练模型
mlp_reg = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', max_iter=1000)
mlp_reg.fit(X_train, y_train)

MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=1000)

In [14]:
# 对测试集进行预测
mlp_y_pred = mlp_reg.predict(X_test)

In [15]:
# 输出模型的均方误差
mlp_mse = mean_squared_error(y_test, mlp_y_pred)
print('多层感知器模型的均方误差：', mlp_mse)

多层感知器模型的均方误差： 648.286875185217


In [16]:
# 创建支持向量机模型，并训练模型
svm_reg = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
svm_reg.fit(X_train, y_train)

SVR(C=100, gamma=0.1)

In [17]:
# 对测试集进行预测
svm_y_pred = svm_reg.predict(X_test)

In [18]:
# 输出模型的均方误差
svm_mse = mean_squared_error(y_test, svm_y_pred)
print('多层感知器模型的均方误差：', svm_mse)

多层感知器模型的均方误差： 496.64842679118556


In [19]:
# 相对误差绝对值的平均值（Mean Absolute Percentage Error，MAPE）
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [20]:
# Nash效率系数（Nash-Sutcliffe efficiency coefficient，NSE）
def nash_sutcliffe_efficiency(y_true, y_pred):
    numerator = np.sum((y_true - y_pred) ** 2)
    denominator = np.sum((y_true - np.mean(y_true)) ** 2)
    return 1 - numerator / denominator

In [21]:
# 相对均方根误差（Root Mean Square Percentage Error，RMSPE）
def root_mean_square_percentage_error(y_true, y_pred):
    return np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))

In [22]:
# 合格率（Accuracy）
def accuracy(y_true, y_pred, tolerance=0.1):
    errors = np.abs((y_true - y_pred) / y_true)
    return np.sum(errors <= tolerance) / len(errors)

In [23]:
def index(y_pred = rf_y_pred):
    mape = mean_absolute_percentage_error(y_test, y_pred)
    nse = nash_sutcliffe_efficiency(y_test, y_pred)
    rmspe = root_mean_square_percentage_error(y_test, y_pred)
    acc = accuracy(y_test, y_pred)
    print('MAPE:', mape)
    print('NSE:', nse)
    print('RMSPE:', rmspe)
    print('Accuracy:', acc)

In [24]:
print("Random Forest index: ")
index(rf_y_pred)

Random Forest index: 
MAPE: 81.82744263727048
NSE: 0.3673663914697809
RMSPE: 1.7600697161776628
Accuracy: 0.17335766423357665


In [25]:
print("multi_linear index: ")
index(multi_linear_y_pred)

multi_linear index: 
MAPE: 215.45027694762524
NSE: 0.1129293293300393
RMSPE: 5.20058583895857
Accuracy: 0.06386861313868614


In [26]:
print("MLP index: ")
index(mlp_y_pred)

MLP index: 
MAPE: 142.25900031430947
NSE: 0.05115141543253621
RMSPE: 3.3136801748559463
Accuracy: 0.072992700729927


In [27]:
print("SVM index: ")
index(svm_y_pred)

SVM index: 
MAPE: 92.30887284991672
NSE: 0.2730931708993205
RMSPE: 1.7339562074738426
Accuracy: 0.13138686131386862
