In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
from datetime import datetime,timedelta

from sklearn.metrics import  mean_squared_error as MSE
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import r2_score

def mbe(y_true, y_pred):

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    y_true = y_true.reshape( len(y_true),1 )
    y_pred = y_pred.reshape( len(y_pred),1 )
   
    diff = ( y_true - y_pred )
    mbe = diff.mean()
    return mbe

In [2]:
%pwd

'D:\\Data\\solar\\code'

In [3]:
#read solar_data
data = pd.read_csv('../data/processed/2019_bon_solar_data.csv')

In [4]:
SAVE_PATH = "../result/m2/0905/gs/"

if os.path.exists(SAVE_PATH) == 0:
    os.makedirs(SAVE_PATH)
    
LAGGED_HOUR = False
PREMODEL = True
maxVal = 1.5 # kt_maxVal


In [5]:
data_ = data[1:].copy()
print('data_.shape:',data_.shape)
data_.head()

data_.shape: (524918, 10)


Unnamed: 0,Time,zen,dw_solar,direct_n,diffuse,dw_ir,temp,rh,windspd,pressure
1,2019-01-01 00:01:00,105.28,-2.1,0.4,-0.1,336.2,5.5,93.1,10.7,978.8
2,2019-01-01 00:02:00,105.46,-2.1,0.4,-0.1,336.3,5.5,93.1,10.5,978.8
3,2019-01-01 00:03:00,105.65,-2.1,0.4,0.0,335.8,5.5,93.2,10.2,978.8
4,2019-01-01 00:04:00,105.83,-2.1,0.4,0.0,335.3,5.4,93.2,10.4,978.8
5,2019-01-01 00:05:00,106.01,-2.1,0.4,0.0,335.1,5.4,93.0,10.5,978.8


In [6]:
time = datetime(2019,1,1,0,1,)
time_= datetime(2020,1,1,0,0,)
date_range = pd.date_range(start=time, end=time_, freq="1min")  # freq="D"表示按天，可以按分钟，月，季度，年等

In [7]:
data_.loc[:,"Time"] = data_.loc[:,"Time"].astype('datetime64[s]')
data_ = data_.set_index("Time")

data_ = data_.reindex(index=date_range) 
print(data_.shape)

(525600, 9)


In [8]:
#
data_[data_<-1000] = np.NaN
data_[data_.loc[:,['dw_solar', 'direct_n', 'diffuse', 'dw_ir']]<0] = 0

In [9]:
#与m1不同的地方
mean_data = data_.resample('10T',origin='start').mean()

In [10]:
mean_data.head()


Unnamed: 0,zen,dw_solar,direct_n,diffuse,dw_ir,temp,rh,windspd,pressure
2019-01-01 00:01:00,106.104,0.0,0.4,0.09,335.19,5.43,92.77,10.55,978.84
2019-01-01 00:11:00,107.942,0.0,0.4,0.27,334.44,5.34,91.08,12.24,979.17
2019-01-01 00:21:00,109.792,0.0,0.4,0.15,333.67,5.13,91.66,12.83,979.57
2019-01-01 00:31:00,111.654,0.0,0.4,0.02,330.9,4.93,91.13,12.84,980.14
2019-01-01 00:41:00,113.526,0.0,0.4,0.02,331.74,4.85,91.06,11.94,980.43


In [11]:
time = datetime(2019,1,1,0,10,)
time_= datetime(2020,1,1,0,0,)
date_range_hour = pd.date_range(start=time, end=time_, freq="10min")  # freq="D"表示按天，可以按分钟，月，季度，年等

In [12]:
#clear_sky
cs = pd.read_csv("../data/bon_clear_sky_2019.csv",index_col = 0)
cs.index = cs.index.astype('datetime64[ns]')

In [13]:
#
cs=cs[1:]
print(cs.shape)

mean_cs = cs.resample('10T',origin='start').mean()
print(mean_cs.shape)
mean_cs.head()

(525599, 4)
(52560, 4)


Unnamed: 0_level_0,DNI_McClear,GHI_McClear,DNI_REST2,GHI_REST2
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-01 00:01:00,0.0,0.0,0.0,0.0
2019-01-01 00:11:00,0.0,0.0,0.0,0.0
2019-01-01 00:21:00,0.0,0.0,0.0,0.0
2019-01-01 00:31:00,0.0,0.0,0.0,0.0
2019-01-01 00:41:00,0.0,0.0,0.0,0.0


In [15]:
ghi_clc=mean_cs['GHI_McClear']
dni_clc=mean_cs['DNI_McClear']

In [16]:
mean_data['ghi_clc'] = ghi_clc.to_numpy()
mean_data['dni_clc'] = dni_clc.to_numpy()

mean_data['kt_ghi'] = mean_data['dw_solar']/(mean_data['ghi_clc']+0.01)
mean_data['kt_dni'] = mean_data['direct_n']/(mean_data['dni_clc']+0.01)

In [17]:
mean_data.loc[mean_data['kt_ghi']>= maxVal,'kt_ghi']= maxVal
mean_data.loc[mean_data['kt_dni']>= maxVal,'kt_dni']= maxVal

In [18]:
#ghi & dni
kt_ghi_target_cols = ['kt_ghi_target10','kt_ghi_target20','kt_ghi_target30','kt_ghi_target40','kt_ghi_target50','kt_ghi_target60','kt_ghi_target70', 'kt_ghi_target80','kt_ghi_target90','kt_ghi_target100','kt_ghi_target110','kt_ghi_target120','kt_ghi_target130','kt_ghi_target140','kt_ghi_target150','kt_ghi_target160','kt_ghi_target170','kt_ghi_target180']
kt_dni_target_cols = ['kt_dni_target10','kt_dni_target20','kt_dni_target30','kt_dni_target40','kt_dni_target50','kt_dni_target60','kt_dni_target70', 'kt_dni_target80','kt_dni_target90','kt_dni_target100','kt_dni_target110','kt_dni_target120','kt_dni_target130','kt_dni_target140','kt_dni_target150','kt_dni_target160','kt_dni_target170','kt_dni_target180']
ghi_lag_cols=['ghi_lag0','ghi_lag10','ghi_lag20','ghi_lag30','ghi_lag40','ghi_lag50','ghi_lag60','ghi_lag70','ghi_lag80','ghi_lag90','ghi_lag100','ghi_lag110','ghi_lag120','ghi_lag130','ghi_lag140','ghi_lag150','ghi_lag160','ghi_lag170','ghi_lag180']
dni_lag_cols=['dni_lag0','dni_lag10','dni_lag20','dni_lag30','dni_lag40','dni_lag50','dni_lag60','dni_lag70','dni_lag80','dni_lag90','dni_lag100','dni_lag110','dni_lag120','dni_lag130','dni_lag140','dni_lag150','dni_lag160','dni_lag170','dni_lag180']

In [19]:
kt_ghi = pd.DataFrame(np.full((mean_data.shape[0],len(kt_ghi_target_cols)), -9999), columns=kt_ghi_target_cols)
kt_dni = pd.DataFrame(np.full((mean_data.shape[0],len(kt_dni_target_cols)), -9999),columns=kt_dni_target_cols)
ghi_lag = pd.DataFrame(np.full((mean_data.shape[0],len(ghi_lag_cols)), -9999),columns=ghi_lag_cols)
dni_lag = pd.DataFrame(np.full((mean_data.shape[0],len(dni_lag_cols)), -9999),columns=dni_lag_cols)

In [20]:
# 与m1不同的地方
for num in range(len(kt_ghi_target_cols)):
  mean_data[kt_ghi_target_cols[num]] = mean_data.loc[:,'kt_ghi'].shift(periods=-1-num)

In [21]:
for num in range(len(kt_dni_target_cols)):
  mean_data[kt_dni_target_cols[num]] = mean_data.loc[:,'kt_dni'].shift(periods=-1-num)

In [22]:
for num in range(len(ghi_lag_cols)):
  mean_data[ghi_lag_cols[num]] = mean_data.loc[:,'kt_ghi'].shift(periods=num)

In [23]:
for num in range(len(dni_lag_cols)):
  mean_data[dni_lag_cols[num]] = mean_data.loc[:,'kt_dni'].shift(periods=num)

In [44]:
mean_data.to_csv(SAVE_PATH+'solar_mean_data.csv')

In [25]:
mean_data.loc[:,dni_lag_cols]

Unnamed: 0,dni_lag0,dni_lag10,dni_lag20,dni_lag30,dni_lag40,dni_lag50,dni_lag60,dni_lag70,dni_lag80,dni_lag90,dni_lag100,dni_lag110,dni_lag120,dni_lag130,dni_lag140,dni_lag150,dni_lag160,dni_lag170,dni_lag180
2019-01-01 00:01:00,1.5,,,,,,,,,,,,,,,,,,
2019-01-01 00:11:00,1.5,1.5,,,,,,,,,,,,,,,,,
2019-01-01 00:21:00,1.5,1.5,1.5,,,,,,,,,,,,,,,,
2019-01-01 00:31:00,1.5,1.5,1.5,1.5,,,,,,,,,,,,,,,
2019-01-01 00:41:00,1.5,1.5,1.5,1.5,1.5,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-31 23:11:00,1.5,1.5,1.5,1.5,0.0,0.0,0.0,0.0,0.0,0.000122,0.000163,0.000465,0.000434,0.007256,0.023464,0.000501,0.072720,0.023693,0.004613
2019-12-31 23:21:00,1.5,1.5,1.5,1.5,1.5,0.0,0.0,0.0,0.0,0.000000,0.000122,0.000163,0.000465,0.000434,0.007256,0.023464,0.000501,0.072720,0.023693
2019-12-31 23:31:00,1.5,1.5,1.5,1.5,1.5,1.5,0.0,0.0,0.0,0.000000,0.000000,0.000122,0.000163,0.000465,0.000434,0.007256,0.023464,0.000501,0.072720
2019-12-31 23:41:00,1.5,1.5,1.5,1.5,1.5,1.5,1.5,0.0,0.0,0.000000,0.000000,0.000000,0.000122,0.000163,0.000465,0.000434,0.007256,0.023464,0.000501


In [26]:
data_2 = mean_data.copy()

data_2['No'] = [i for i in range(len(data_2))]
data_2.index = mean_data.index

data_2.dropna(axis=0,inplace = True)

print(data_2.shape)

(51201, 88)


In [27]:
#drop zenith > 85
drop_zen_time = data_2[data_2.loc[:,'zen']>85].index.to_list()
len(drop_zen_time)

27643

In [28]:
#检索/保存目标zen_time在3小时内的索引
#时间太长，可优化
drop_zen_time_3h = list()
for i in tqdm(range(len(drop_zen_time))):
    start = drop_zen_time[i] +timedelta(hours=-3)
    end= drop_zen_time[i] + timedelta(hours=3)
    if (drop_zen_time[i] not in drop_zen_time_3h) or (start not in drop_zen_time_3h) or (end not in drop_zen_time_3h):
        range_hour = pd.date_range(start=start, end=end, freq="10min") #.to_list()
        for t in range_hour:
            if t not in drop_zen_time_3h:
                drop_zen_time_3h.append(t)

100%|████████████████████████████████████████████████████████████████████████████| 27643/27643 [25:48<00:00, 17.85it/s]


In [29]:
#drop drop_zen_time_3h
for i in tqdm(pd.DatetimeIndex(drop_zen_time_3h)):
    if  i in data_2.index:
      data_2.drop(i,axis=0,inplace = True)

data_3 = data_2.copy()
print(data_3.shape)

#检查是否存在空值
data_3.isna().any().sum()
print(data_3.shape)

100%|████████████████████████████████████████████████████████████████████████████| 40870/40870 [07:51<00:00, 86.63it/s]

(10910, 88)
(10910, 88)





In [30]:
data_4 = data_3.copy()
data_4['time']  = data_4.index

In [31]:
gm1 = []
for i in range(len(data_4[:])):
    if data_4.iloc[i,-1].month ==1:
        gm1.append(i)
gm2 = []
for i in range(len(data_4[:])):
    if data_4.iloc[i,-1].month ==2:
        gm2.append(i)
gm3 = []
for i in range(len(data_4[:])):
    if data_4.iloc[i,-1].month ==3:
        gm3.append(i)
gm4 = []
for i in range(len(data_4[:])):
    if data_4.iloc[i,-1].month ==4:
        gm4.append(i)
gm5 = []
for i in range(len(data_4[:])):
    if data_4.iloc[i,-1].month ==5:
        gm5.append(i)
gm6 = []
for i in range(len(data_4[:])):
    if data_4.iloc[i,-1].month ==6:
        gm6.append(i)
gm7 = []
for i in range(len(data_4[:])):
    if data_4.iloc[i,-1].month ==7:
        gm7.append(i)
gm8 = []
for i in range(len(data_4[:])):
    if data_4.iloc[i,-1].month ==8:
        gm8.append(i)
gm9 = []
for i in range(len(data_4[:])):
    if data_4.iloc[i,-1].month ==9:
        gm9.append(i)
gm10 = []
for i in range(len(data_4[:])):
    if data_4.iloc[i,-1].month ==10:
        gm10.append(i)
gm11 = []
for i in range(len(data_4[:])):
    if data_4.iloc[i,-1].month ==11:
        gm11.append(i)
gm12 = []
for i in range(len(data_4[:])):
    if data_4.iloc[i,-1].month ==12:
        gm12.append(i)

In [32]:
train_index = gm1[:int(len(gm1)*0.7)]+gm2[:int(len(gm2)*0.7)]+gm3[:int(len(gm3)*0.7)]+gm4[:int(len(gm4)*0.7)]+gm5[:int(len(gm5)*0.7)]+gm6[:int(len(gm6)*0.7)]+gm7[:int(len(gm7)*0.7)]+gm8[:int(len(gm8)*0.7)]+gm9[:int(len(gm9)*0.7)]+gm10[:int(len(gm10)*0.7)]+gm11[:int(len(gm11)*0.7)]+gm12[:int(len(gm12)*0.7)]
test_index = gm1[int(len(gm1)*0.7):]+gm2[int(len(gm2)*0.7):]+gm3[int(len(gm3)*0.7):]+gm4[int(len(gm4)*0.7):]+gm5[int(len(gm5)*0.7):]+gm6[int(len(gm6)*0.7):]+gm7[int(len(gm7)*0.7):]+gm8[int(len(gm8)*0.7):]+gm9[int(len(gm9)*0.7):]+gm10[int(len(gm10)*0.7):]+gm11[int(len(gm11)*0.7):]+gm12[int(len(gm12)*0.7):]

In [33]:


#save_InputandOutput
data_4.to_csv(SAVE_PATH+"bon_10m.csv",index=0)
data_4.iloc[train_index].to_csv(SAVE_PATH+"bon_train_10m.csv",index=0)
data_4.iloc[test_index].to_csv(SAVE_PATH+"bon_test_10m.csv",index=0)

In [55]:
#clc_target

In [35]:
ghi_clc_cols = ['ghi_clc1','ghi_clc2','ghi_clc3','ghi_clc4','ghi_clc5','ghi_clc6','ghi_clc7','ghi_clc8','ghi_clc9','ghi_clc10','ghi_clc11','ghi_clc12','ghi_clc13','ghi_clc14','ghi_clc15','ghi_clc16','ghi_clc17','ghi_clc18']
dni_clc_cols = ['dni_clc1','dni_clc2','dni_clc3','dni_clc4','dni_clc5','dni_clc6','dni_clc7','dni_clc8','dni_clc9','dni_clc10','dni_clc11','dni_clc12','dni_clc13','dni_clc14','dni_clc15','dni_clc16','dni_clc17','dni_clc18']

In [202]:
test_set = data_4.iloc[test_index]
train_set = data_4.iloc[train_index]

In [203]:
train_set.shape

(7631, 89)

In [204]:
data_4.head()

Unnamed: 0,zen,dw_solar,direct_n,diffuse,dw_ir,temp,rh,windspd,pressure,ghi_clc,...,dni_lag110,dni_lag120,dni_lag130,dni_lag140,dni_lag150,dni_lag160,dni_lag170,dni_lag180,No,time
2019-01-01 16:51:00,64.639,40.67,0.4,40.97,317.25,1.27,95.3,3.67,999.69,439.2384,...,0.000589,0.000613,0.000646,0.00069,0.000749,0.000832,0.000957,0.001167,101,2019-01-01 16:51:00
2019-01-01 17:01:00,64.159,41.97,0.4,42.09,317.13,1.21,95.66,3.75,999.45,448.1874,...,0.000571,0.000589,0.000613,0.000646,0.00069,0.000749,0.000832,0.000957,102,2019-01-01 17:01:00
2019-01-01 17:11:00,63.76,41.36,0.4,41.42,316.93,1.17,95.82,3.8,999.4,455.5284,...,0.000668,0.000571,0.000589,0.000613,0.000646,0.00069,0.000749,0.000832,103,2019-01-01 17:11:00
2019-01-01 17:21:00,63.442,42.36,0.4,42.72,316.92,1.2,95.95,3.71,999.4,461.244,...,0.000967,0.000668,0.000571,0.000589,0.000613,0.000646,0.00069,0.000749,104,2019-01-01 17:21:00
2019-01-01 17:31:00,63.212,46.31,0.4,46.37,316.91,1.24,95.71,3.78,999.25,465.3126,...,0.000922,0.000967,0.000668,0.000571,0.000589,0.000613,0.000646,0.00069,105,2019-01-01 17:31:00


In [205]:
mean_data = pd.read_csv(SAVE_PATH+'solar_mean_data.csv')

In [206]:
df_clc = pd.DataFrame()
for num in range(len(kt_ghi_target_cols)):
  df_clc[ghi_clc_cols[num]] = mean_data.loc[:,'ghi_clc'].shift(periods=-1-num)
for num in range(len(kt_dni_target_cols)):
  df_clc[dni_clc_cols[num]] = mean_data.loc[:,'dni_clc'].shift(periods=-1-num)

In [207]:
data_target_clc = df_clc.loc[test_set.No]
data_target_clc.to_csv(SAVE_PATH+'/target_clc_10m.csv')

In [184]:
s_data2 = pd.read_csv('../data/BON.csv')
s_data2.rename(columns={'Unnamed: 0': 'time'},inplace =True)
datetimes = s_data2["time"]
datetimes =pd.DatetimeIndex(datetimes)

In [208]:
#s_data2.to_csv("concatdata\\bon\\BON.csv",index = 0)
s_data2["time"] = pd.DatetimeIndex(datetimes)
s_data2.set_index('time')
s_data2.index = s_data2["time"]
s_data2.drop('time',axis = 1,inplace = True)

In [209]:
def ch_cols(Data,target_cols,name):
    target_ =list()
    for i in range(len(Data.columns)):
        col_name = Data.columns[i]
        col_split = col_name.split('_')[0]
        if col_split == name:
            target_.append(col_name)
    return target_

In [210]:
ch_01 = ch_cols(s_data2,s_data2.columns,'C01')
ch_02 = ch_cols(s_data2,s_data2.columns,'C02')
ch_03 = ch_cols(s_data2,s_data2.columns,'C03')
ch_04 = ch_cols(s_data2,s_data2.columns,'C04')
ch_05 = ch_cols(s_data2,s_data2.columns,'C05')

In [211]:
from sklearn.preprocessing import MinMaxScaler
def MinMax_Ch(data):
    scaler = MinMaxScaler()
    scaler.fit(data)
    return scaler.transform(data)
    

In [212]:
target_ch_cols = ch_01+ch_02+ch_03+ch_04+ch_05
s_data3 = s_data2.loc[:,target_ch_cols]

In [213]:
s_data3.loc[:,ch_01]= MinMax_Ch(s_data3.loc[:,ch_01])
s_data3.loc[:,ch_02]= MinMax_Ch(s_data3.loc[:,ch_02])
s_data3.loc[:,ch_03]= MinMax_Ch(s_data3.loc[:,ch_03])
s_data3.loc[:,ch_04]= MinMax_Ch(s_data3.loc[:,ch_04])
s_data3.loc[:,ch_05]= MinMax_Ch(s_data3.loc[:,ch_05])

In [214]:
#train_set = pd.read_csv("/content/drive/MyDrive/solar/test/0901_ini/gs/bon_train_10m_MC.csv",index_col = 'time')
#test_set = pd.read_csv('/content/drive/MyDrive/solar/test/0901_ini/gs/bon_test_10m_MC.csv',index_col = 'time')
#data_target_clc =  pd.read_csv("/content/drive/MyDrive/solar/test/0901_ini/gs/target_clc_10m_MC.csv",index_col = 'time')
persistence = pd.read_csv('../data/persistence_MC.csv')

#train_set.index = pd.DatetimeIndex(train_set.index)
#test_set.index = pd.DatetimeIndex(test_set.index)

In [215]:
s_data4 = s_data3.iloc[train_set.No,:]
s_data5 = s_data3.iloc[test_set.No,:]

In [216]:
Reindex_train = list(map(lambda x: x +timedelta(minutes=9) , train_set.index.to_list()))  # 列表每个元素+9min
Reindex_test = list(map(lambda x: x +timedelta(minutes=9) , test_set.index.to_list()))  # 列表每个元素+9min

In [217]:
train_set.index = Reindex_train
test_set.index = Reindex_test

In [218]:
train_set = pd.concat([train_set,s_data4],axis =1)
test_set = pd.concat([test_set,s_data5],axis =1)

In [219]:
print("train_set",train_set.shape)
print("test_set",test_set.shape)

train_set (7631, 694)
test_set (3279, 694)


In [221]:
print("train_set",train_set.shape)
print("test_set",test_set.shape)
train_set_drop = train_set.dropna(axis=0)
test_set_drop = test_set.dropna(axis=0)
test_set['No'] = range(len(test_set))

print("train_set",train_set_drop.shape)
print("test_set",test_set_drop.shape)

train_set (7631, 694)
test_set (3279, 694)
train_set (5746, 694)
test_set (2296, 694)


In [223]:
kt_ghi_target_cols = ['kt_ghi_target10','kt_ghi_target20','kt_ghi_target30','kt_ghi_target40','kt_ghi_target50','kt_ghi_target60','kt_ghi_target70', 'kt_ghi_target80','kt_ghi_target90','kt_ghi_target100','kt_ghi_target110','kt_ghi_target120','kt_ghi_target130','kt_ghi_target140','kt_ghi_target150','kt_ghi_target160','kt_ghi_target170','kt_ghi_target180']
kt_dni_target_cols = ['kt_dni_target10','kt_dni_target20','kt_dni_target30','kt_dni_target40','kt_dni_target50','kt_dni_target60','kt_dni_target70', 'kt_dni_target80','kt_dni_target90','kt_dni_target100','kt_dni_target110','kt_dni_target120','kt_dni_target130','kt_dni_target140','kt_dni_target150','kt_dni_target160','kt_dni_target170','kt_dni_target180']
ghi_lag_cols=['ghi_lag0','ghi_lag10','ghi_lag20','ghi_lag30','ghi_lag40','ghi_lag50','ghi_lag60','ghi_lag70','ghi_lag80','ghi_lag90','ghi_lag100','ghi_lag110','ghi_lag120','ghi_lag130','ghi_lag140','ghi_lag150','ghi_lag160','ghi_lag170','ghi_lag180']
dni_lag_cols=['dni_lag0','dni_lag10','dni_lag20','dni_lag30','dni_lag40','dni_lag50','dni_lag60','dni_lag70','dni_lag80','dni_lag90','dni_lag100','dni_lag110','dni_lag120','dni_lag130','dni_lag140','dni_lag150','dni_lag160','dni_lag170','dni_lag180']

In [224]:
print("train_set",train_set.shape)
print("test_set",test_set.shape)
test_set['No'] = range(len(test_set))
train_set_drop = train_set.dropna(axis=0)
test_set_drop = test_set.dropna(axis=0)

print("train_set",train_set_drop.shape)
print("test_set",test_set_drop.shape)

train_set (7631, 694)
test_set (3279, 694)
train_set (5746, 694)
test_set (2296, 694)


In [225]:
target_cols = kt_ghi_target_cols + kt_dni_target_cols
train_cols = ghi_lag_cols +dni_lag_cols +target_ch_cols

train_X = train_set_drop[train_cols]
train_Y = train_set_drop[target_cols]

test_X = test_set_drop[train_cols]
test_Y = test_set_drop[target_cols]


print("train_X",train_X.shape)
print("train_Y",train_Y.shape)

print("test_X",test_X.shape)
print("test_Y",test_Y.shape)


train_X (5746, 643)
train_Y (5746, 36)
test_X (2296, 643)
test_Y (2296, 36)


In [226]:
target_clc = data_target_clc.iloc[test_set_drop.No]

In [227]:
train_set_drop[target_cols+train_cols].to_csv(SAVE_PATH+'train_set_drop.csv')
test_set_drop[target_cols+train_cols].to_csv(SAVE_PATH+'test_set_drop.csv')

In [228]:
per_rmse = persistence['rmse']

In [229]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import RMSprop, Adam

from sklearn.preprocessing import MinMaxScaler
#from keras.wrappers.scikit_learn import KerasRegressor
#import scikeras
#from scikeras.wrappers import KerasRegressor
#from sklearn.model_selection import GridSearchCV

In [230]:
target_cols = kt_ghi_target_cols + kt_dni_target_cols
lagged_cols = ghi_lag_cols +dni_lag_cols + target_ch_cols

df_for_training = train_set_drop[target_cols+lagged_cols]
df_for_testing = test_set_drop[target_cols+lagged_cols]

print("df_for_training",df_for_training.shape)
print("df_for_testing",df_for_testing.shape)



df_for_training (5746, 679)
df_for_testing (2296, 679)


In [231]:
scaler = MinMaxScaler(feature_range=(0,1))
df_for_training_scaled = scaler.fit_transform(df_for_training)
df_for_testing_scaled = scaler.transform(df_for_testing)

In [232]:
Just_ghi = False
if Just_ghi:
    target_num  = int(len(target_cols)/2)
else:
    target_num  = len(target_cols)

In [233]:
def createXY(dataset,n_past):
    dataX = []
    dataY = []
    for i in range(n_past,len(dataset)):
            dataX.append(dataset[i - n_past:i, len(target_cols):dataset.shape[1]])
            dataY.append(dataset[i,:target_num])
    return np.array(dataX),np.array(dataY)
trainX,trainY=createXY(df_for_training_scaled,1)
testX,testY=createXY(df_for_testing_scaled,1)

In [234]:
print("trainX",trainX.shape)
print("trainY",trainY.shape)
print("testX",testX.shape)
print("testY",testY.shape)

trainX (5745, 1, 643)
trainY (5745, 36)
testX (2295, 1, 643)
testY (2295, 36)


In [235]:
target_clc = data_target_clc.iloc[test_set_drop.No].iloc[:testY.shape[0],:target_num].to_numpy()
print(target_clc.shape)

(2295, 36)


In [236]:
grid_model = Sequential()
grid_model.add(LSTM(200,return_sequences=True,input_shape=(trainX.shape[1],trainX.shape[2])))
grid_model.add(LSTM(50))
#grid_model.add(Dropout(0.2))
grid_model.add(Dense(target_num))
    #adam=Adam(learning_rate=0.03)
grid_model.compile(loss = 'mse',optimizer = 'adam')
grid_model.fit(trainX,trainY,batch_size =1000,epochs = 200)

prediction=grid_model.predict(testX)
print("prediction\n", prediction)
print("\nPrediction Shape-",prediction.shape)
print(r2_score(testY,prediction))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [237]:
per_rmse = per_rmse[:target_num]

In [238]:
#1001
mbe_list = list() 
mae = list()
rmse = list()
r2 =  list()

y_pre_clc = prediction*target_clc
y_t_clc = testY*target_clc


for i in range(target_num):
    mae.append(MAE(y_t_clc[:,i],y_pre_clc[:,i]))
    rmse.append(np.sqrt(MSE(y_t_clc[:,i],y_pre_clc[:,i])))
    r2.append(r2_score(y_t_clc[:,i],y_pre_clc[:,i]))
    mbe_list.append(mbe(y_t_clc[:,i],y_pre_clc[:,i]))

s = 1-(np.array(rmse)/np.array(per_rmse))
data_lstm = pd.DataFrame([mbe_list,mae,rmse,r2,s],index=["mbe","mae", "rmse", "r2",'s'])
data_lstm=data_lstm.T
#s_score.append(data_lstm.loc[:,'s'])
data_lstm

Unnamed: 0,mbe,mae,rmse,r2,s
0,-11.1584,60.92733,87.113589,0.850821,0.223152
1,-29.379732,69.916724,97.407918,0.81349,0.295064
2,-30.321568,72.402429,100.986959,0.799824,0.319598
3,-31.506658,76.243952,105.284491,0.783457,0.324178
4,-25.338214,76.743586,106.158887,0.779953,0.352754
5,-32.701117,77.694756,106.743066,0.755563,0.376442
6,-34.721633,78.984216,108.052859,0.749999,0.389467
7,-39.351484,81.537531,111.469415,0.734325,0.376429
8,-43.151733,82.272128,113.62212,0.724738,0.374014
9,-36.154595,81.594189,113.295669,0.727122,0.379611


In [None]:
data_lstm.to_csv(SAVE_PATH+'result/lstm'+'_10m.csv',index = 0)