In [1]:
import numpy as np
import pandas as pd
import lightgbm as lgb
import warnings
warnings.filterwarnings('ignore')

In [2]:
def NSE(y_pred, y_true):
    y_mean = np.mean(y_true)
    a = np.sum(np.square(y_true[:16] - y_pred[:16]))
    b = np.sum(np.square(y_true[:16] - y_mean))
    c = np.sum(np.square(y_true[16:] - y_pred[16:]))
    d = np.sum(np.square(y_true[16:] - y_mean))
    return 100 * (1 - 0.65 * a / b - 0.35 * c / d)

In [3]:
water = pd.read_excel('../入库流量数据.xlsx')
water['dt'] = water['TimeStample'].dt.strftime('%Y-%m-%d')
water.head()

Unnamed: 0,TimeStample,Qi,dt
0,2013-01-01 02:00:00,0.018201,2013-01-01
1,2013-01-01 05:00:00,0.018196,2013-01-01
2,2013-01-01 08:00:00,0.030095,2013-01-01
3,2013-01-01 11:00:00,0.123196,2013-01-01
4,2013-01-01 14:00:00,0.133178,2013-01-01


In [4]:
train1 = water.loc[(water['TimeStample'] >= '2018-01-02 02:00:00') & (water['TimeStample'] < '2018-02-01 00:00:00')]
train2 = water.loc[(water['TimeStample'] >= '2018-07-02 02:00:00') & (water['TimeStample'] < '2018-08-01 00:00:00')]
train3 = water.loc[(water['TimeStample'] >= '2018-10-02 02:00:00') & (water['TimeStample'] < '2018-11-01 00:00:00')]
print(train1.shape, train2.shape, train3.shape)

(240, 3) (240, 3) (240, 3)


In [5]:
train = pd.concat([train1, train2, train3])
print(train.shape)

(720, 3)


In [6]:
rain = pd.read_excel('../降雨预报数据.xlsx')
rain['dt'] = rain['TimeStample'].dt.strftime('%Y-%m-%d')
rain.drop('TimeStample', axis=1, inplace=True)
rain.head()

Unnamed: 0,D1,D2,D3,D4,D5,dt
0,0.0625,0.018182,0.014286,0.1,0.1,2013-03-11
1,0.125,0.0,0.014286,0.2,0.1,2013-03-12
2,0.025,0.090909,0.142857,0.1,0.0,2013-03-13
3,0.0375,0.181818,0.071429,0.0,0.1,2013-03-14
4,0.1,0.036364,0.0,0.16,0.1,2013-03-15


In [7]:
rain.drop(['D2', 'D3', 'D4', 'D5'], axis=1, inplace=True)
rain['D1_shift'] = rain['D1'].shift(1)

In [8]:
environment = pd.read_excel('../环境表.xlsx')
environment.rename(columns={'TimeStample': 'dt'}, inplace=True)
environment.head()

Unnamed: 0,dt,T,w,wd
0,2013-01-01,0.168571,0.173913,999012
1,2013-01-02,0.157143,0.478261,999004
2,2013-01-03,0.128571,0.717391,999004
3,2013-01-04,0.037143,0.304348,999003
4,2013-01-05,0.071429,0.23913,999003


In [9]:
environment['T_shift'] = environment['T'].shift(1)
environment['w_shift'] = environment['w'].shift(1)
environment['wd_shift'] = environment['wd'].shift(1)

In [13]:
station_rain = pd.read_excel('../遥测站降雨数据.xlsx', index_col='TimeStample')

In [14]:
station_rain['rain_mean'] = station_rain.mean(axis=1)
station_rain['rain_median'] = station_rain.median(axis=1)
station_rain['rain_max'] = station_rain.max(axis=1)
station_rain['rain_min'] = station_rain.min(axis=1)
station_rain['rain_std'] = station_rain.std(axis=1)
station_rain['rain_skew'] = station_rain.skew(axis=1)

Unnamed: 0_level_0,R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,...,R31,R32,R33,R34,R35,R36,R37,R38,R39,rain_mean
TimeStample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013-01-01 01:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013-01-01 02:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013-01-01 03:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013-01-01 04:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
station_rain.drop(['R{}'.format(i) for i in range(1, 40)], axis=1, inplace=True)
station_rain.reset_index(inplace=True)

In [None]:
train = train.merge(rain, how='left', on='dt')
train = train.merge(environment, how='left', on='dt')
train = train.merge(station_rain, how='left', on='TimeStample')
train.head()