In [54]:
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.metrics import r2_score, mean_absolute_error
from pandas import DataFrame
from pandas import concat
import copy

# 时间序列转换为监督学习
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

# RMSE 均方根误差
def rmse(y, yhat):
    return np.sqrt(metrics.mean_squared_error(y, yhat))

# SMAPE
def smape(y, yhat):
    return 2.0 * np.mean(np.abs(yhat - y) / (np.abs(yhat) + np.abs(y))) * 100


serScada=pd.read_csv('./data/serScada.csv', index_col=0)
# 字符转标准时间索引
serScada.index = pd.to_datetime(serScada.index)
serScada


Unnamed: 0,num,wind speed(m/s),wind direction,theoretical power(kW),active power(kW)
2018-01-01 00:00:00,1,5.311336,259.994904,416.328908,380.047791
2018-01-01 00:10:00,2,5.672167,268.641113,519.917511,453.769196
2018-01-01 00:20:00,3,5.216037,272.564789,390.900016,306.376587
2018-01-01 00:30:00,4,5.659674,271.258087,516.127569,419.645905
2018-01-01 00:40:00,5,5.577941,265.674286,491.702972,380.650696
...,...,...,...,...,...
2018-12-31 23:10:00,52556,11.404030,80.502724,3397.190793,2963.980957
2018-12-31 23:20:00,52557,7.332648,84.062599,1173.055771,1684.353027
2018-12-31 23:30:00,52558,8.435358,84.742500,1788.284755,2201.106934
2018-12-31 23:40:00,52559,9.421366,84.297913,2418.382503,2515.694092


In [56]:
date_pro = serScada.iloc[:,[1,2,3]].values

# 拟合计算放缩比例等参数，并存储在 scaler
data_norm = date_pro  # 字典对象也可以

# to supervised learning
n_timesteps = 10  # 之前的时刻数量
n_features = data_norm.shape[1]  # 数据集中输入特征数量
data_supervised = series_to_supervised(data_norm, n_timesteps, 1)  # 包含 t 时刻之前的 timestep 个时间点，以及 t 时刻数据，以 t 时刻为预测对象

# drop columns we don't want to predict
relain = np.array([3])  # array([3]) 保留第几个变量(从1计数)，即预测对象
data_supervised_droped = copy.deepcopy(data_supervised)  # 深拷贝则不会改变原始字典对象内容
remov = np.array([x for x in range(data_norm.shape[1])]) + n_timesteps*n_features  # array([30, 31, 32]) t 时刻所有参数列序号
remov = np.delete(remov,relain-1)  # array([30, 31]) 要删除的对象
data_supervised_droped.drop(data_supervised_droped.columns[remov], axis=1, inplace=True)  # 去掉输出当中不需要预测的参数列 [-1,-2,-3,-4,-5,-6]

# split into train and test sets
n_train_num = int(len(data_supervised_droped)*0.8)  # 规定训练集数据数量，要小于数据量总数，在后面设置reshape设置样本个数、时间步长要注意数量对应
train = data_supervised_droped.values[:n_train_num, :]  # 按行切片，从0到n-1行，也就是从头开始总共 n 行数据
test = data_supervised_droped.values[n_train_num:, :]  # 按行切片
# y = data_supervised_droped.values[:,n_timesteps*n_features][n_train_num:]  # 测试集的 y，用于后续结果比较

# split into input and outputs
X_train, y_train = train[:, :-1], train[:, -1]  # 按列切片，从起始位置到-2列：-1列
X_test, y_test = test[:, :-1], test[:, -1]  # x 和 y 变量类型都是<class 'numpy.ndarray'>
print(len(y_test))
y_test

10510


array([   0.        ,    0.        ,    0.        , ..., 1788.28475526,
       2418.38250336, 2779.18409628])

In [64]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_3_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))

rmse = 281.25
mae = 169.78
smape = 39.60
r2_score = 0.96


In [66]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_5_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))

rmse = 115.52
mae = 70.76
smape = 32.27
r2_score = 0.99


In [67]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_7_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))

rmse = 153.50
mae = 104.54
smape = 36.81
r2_score = 0.99


In [68]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_9_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))

rmse = 170.99
mae = 103.11
smape = 34.70
r2_score = 0.98


In [69]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_11_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))

rmse = 187.61
mae = 116.96
smape = 36.09
r2_score = 0.98


In [70]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_13_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))

rmse = 202.78
mae = 122.27
smape = 38.39
r2_score = 0.98


In [71]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_15_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))

rmse = 215.09
mae = 130.58
smape = 38.24
r2_score = 0.97


In [72]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_17_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))

rmse = 226.97
mae = 139.54
smape = 37.13
r2_score = 0.97


In [73]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_19_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))

rmse = 236.80
mae = 144.91
smape = 38.96
r2_score = 0.97


In [74]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_21_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))

rmse = 248.04
mae = 151.02
smape = 40.06
r2_score = 0.97


In [75]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_23_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))

rmse = 258.54
mae = 160.96
smape = 39.03
r2_score = 0.96


In [76]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_25_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))

rmse = 266.47
mae = 161.88
smape = 40.73
r2_score = 0.96


In [77]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_27_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))

rmse = 273.91
mae = 169.60
smape = 40.80
r2_score = 0.96


In [79]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_29_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))

rmse = 282.26
mae = 182.46
smape = 40.31
r2_score = 0.95


In [78]:
result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_31_2.csv', index_col=False)

print('rmse = %.2f' % rmse(y_test, result.values[:,1]))
print('mae = %.2f' % mean_absolute_error(y_test, result.values[:,1]))
print('smape = %.2f' % smape(y_test, result.values[:,1]))
print('r2_score = %.2f' % r2_score(y_test, result.values[:,1]))





rmse = 285.25
mae = 175.56
smape = 40.97
r2_score = 0.95


In [None]:
x = (np.array(range(15))+1)*2+1 # w
y = np.array(range(x[-1]-1))+1 # k
x,y = np.meshgrid(x, y)


for i in range(x.shape[1]): # w
    for j in range(x.shape[0]): # k
        if y[j,0]<x[0,i]: # k<n
            result = pd.read_csv('./predict-wind-turbine/compare_filter/SG_' + str(x[0,i]) + '_' + str(y[j,0]) + '.csv', index_col=0)

In [80]:
56.34-32.27

24.07