### 라이브러리

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from windpowerlib.wind_speed import logarithmic_profile

### 데이터 불러오기

In [8]:
gj_train = pd.read_parquet('train_ldaps_gyeongju.parquet')
# meta = pd.read_excel('windfarm_metadata.xlsx', sheet_name = 1, header = 1, index_col = None)
# meta.rename(columns = {'발전기 번호': 'turbine_id','정격 출력 [kW]': '정격출력', '소재지표고(지표) [m]': '소재지표고', '허브 높이(지표) [m]': '허브높이', '로터 반경 [m]': '로터반경'}, inplace = True)
# meta = meta.iloc[:, 1:11]

### 데이터 확인

In [6]:
display(gj_train.head())
# display(meta.head())


Unnamed: 0,dt,elevation,land_cover,surf_rough,frictional_vmax_50m,frictional_vmin_50m,pressure,relative_humid,specific_humid,temp_air,storm_u_5m,storm_v_5m,wind_u_10m,wind_v_10m,turbine_id
0,2020-01-02 00:00:00+09:00,387.640625,1.0,0.286911,10.428498,10.05958,97974.59375,91.796478,0.002686,269.46756,0.021201,-0.375756,7.353266,-2.640615,WTG01
1,2020-01-02 01:00:00+09:00,387.640625,1.0,0.286911,10.472921,10.044404,97970.132812,91.729774,0.002686,269.430847,0.020706,-0.371947,7.578446,-2.414076,WTG01
2,2020-01-02 02:00:00+09:00,387.640625,1.0,0.286911,10.682985,10.478634,97951.546875,92.788666,0.002686,269.37439,0.020556,-0.354397,7.654554,-2.259913,WTG01
3,2020-01-02 03:00:00+09:00,387.640625,1.0,0.286911,10.676681,10.090029,97908.96875,92.494576,0.002686,269.352112,0.021635,-0.328609,7.275844,-2.280371,WTG01
4,2020-01-02 04:00:00+09:00,387.640625,1.0,0.286911,10.079557,9.67262,97858.398438,88.986443,0.002686,269.413269,0.024231,-0.265124,6.911751,-1.920128,WTG01


NameError: name 'meta' is not defined

In [9]:
print(gj_train.shape)

(235818, 15)


In [4]:
# gj_train = pd.merge(gj_train, meta[['turbine_id', '소재지표고']], on = 'turbine_id')
# gj_train.set_index('dt', inplace = True)
# gj_train

#### u, v벡터로 풍속, 풍향 구하기

In [10]:
## u, v벡터로 풍속, 풍향 구하기

def uv_to_wsd(u_wind_speed, v_wind_speed):
    """ 
        Convert u, v vector to wind speed and direction.
    """
    u_ws = u_wind_speed.to_numpy()
    v_ws = v_wind_speed.to_numpy()

    # NOTE: http://colaweb.gmu.edu/dev/clim301/lectures/wind/wind-uv
    wind_speed = np.nansum([u_ws**2, v_ws**2], axis=0)**(1/2.)

    # math degree
    wind_direction = np.rad2deg(np.arctan2(v_ws, u_ws+1e-6))
    wind_direction[wind_direction < 0] += 360

    # meteorological degree
    wind_direction = 270 - wind_direction
    wind_direction[wind_direction < 0] += 360

    return wind_speed, wind_direction


gj_train["wind_speed"], gj_train["wind_direction"] = uv_to_wsd(
    gj_train["wind_u_10m"], gj_train["wind_v_10m"]
)

gj_train.head()

Unnamed: 0,dt,elevation,land_cover,surf_rough,frictional_vmax_50m,frictional_vmin_50m,pressure,relative_humid,specific_humid,temp_air,storm_u_5m,storm_v_5m,wind_u_10m,wind_v_10m,turbine_id,wind_speed,wind_direction
0,2020-01-02 00:00:00+09:00,387.640625,1.0,0.286911,10.428498,10.05958,97974.59375,91.796478,0.002686,269.46756,0.021201,-0.375756,7.353266,-2.640615,WTG01,7.813025,289.753601
1,2020-01-02 01:00:00+09:00,387.640625,1.0,0.286911,10.472921,10.044404,97970.132812,91.729774,0.002686,269.430847,0.020706,-0.371947,7.578446,-2.414076,WTG01,7.953654,287.669006
2,2020-01-02 02:00:00+09:00,387.640625,1.0,0.286911,10.682985,10.478634,97951.546875,92.788666,0.002686,269.37439,0.020556,-0.354397,7.654554,-2.259913,WTG01,7.981191,286.448578
3,2020-01-02 03:00:00+09:00,387.640625,1.0,0.286911,10.676681,10.090029,97908.96875,92.494576,0.002686,269.352112,0.021635,-0.328609,7.275844,-2.280371,WTG01,7.624827,287.401855
4,2020-01-02 04:00:00+09:00,387.640625,1.0,0.286911,10.079557,9.67262,97858.398438,88.986443,0.002686,269.413269,0.024231,-0.265124,6.911751,-1.920128,WTG01,7.173506,285.525635


In [11]:
# dt가 인덱스로 설정되어 있어야 아래 logarithmic으로 변경가능
gj_train.set_index('dt', inplace = True)

#### logarithmic_profile 함수로 터빈 높이에 따라 풍속 보정하기

In [12]:
gj_x = gj_train[['temp_air', 'wind_speed', 'wind_direction', 'surf_rough', 'turbine_id']]
# 터빈 허브 높이의 풍속을 log-wall wind profile을 적용해 산출 (경주풍력의 터빈 지표고도 = 100m, 메타데이터 참고)
x_windspeed_100m = gj_x.groupby("turbine_id").apply(
    lambda x:logarithmic_profile(x.wind_speed, 10, 100, x.surf_rough)
).T.reset_index().melt(
    value_vars = gj_x.turbine_id.unique().tolist(), id_vars="dt", value_name="wind_speed_100m"
)

gj_x = pd.merge(gj_x.reset_index(), x_windspeed_100m, on=["dt", "turbine_id"])
gj_x.set_index("dt", inplace=True)
gj_x

  x_windspeed_100m = gj_x.groupby("turbine_id").apply(


Unnamed: 0_level_0,temp_air,wind_speed,wind_direction,surf_rough,turbine_id,wind_speed_100m
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02 00:00:00+09:00,269.467560,7.813025,289.753601,0.286911,WTG01,12.879008
2020-01-02 01:00:00+09:00,269.430847,7.953654,287.669006,0.286911,WTG01,13.110820
2020-01-02 02:00:00+09:00,269.374390,7.981191,286.448578,0.286911,WTG01,13.156212
2020-01-02 03:00:00+09:00,269.352112,7.624827,287.401855,0.286911,WTG01,12.568782
2020-01-02 04:00:00+09:00,269.413269,7.173506,285.525635,0.286911,WTG01,11.824823
...,...,...,...,...,...,...
2022-12-31 19:00:00+09:00,271.486450,6.411052,298.820953,0.297867,WTG09,10.612328
2022-12-31 20:00:00+09:00,271.668732,6.884773,304.600739,0.297867,WTG09,11.396485
2022-12-31 21:00:00+09:00,270.929230,6.105609,307.490143,0.297867,WTG09,10.106722
2022-12-31 22:00:00+09:00,270.055725,4.332157,310.975128,0.297867,WTG09,7.171095


#### turbine_id별로 풍속, 풍향, 기온 변수 따로 만들기(세로로 길게 된 데이터를 가로로 배치)

In [13]:
# index == dt
gj_feature = gj_x.copy()

In [14]:
gj_feature.index.unique()

DatetimeIndex(['2020-01-02 00:00:00+09:00', '2020-01-02 01:00:00+09:00',
               '2020-01-02 02:00:00+09:00', '2020-01-02 03:00:00+09:00',
               '2020-01-02 04:00:00+09:00', '2020-01-02 05:00:00+09:00',
               '2020-01-02 06:00:00+09:00', '2020-01-02 07:00:00+09:00',
               '2020-01-02 08:00:00+09:00', '2020-01-02 09:00:00+09:00',
               ...
               '2022-12-31 14:00:00+09:00', '2022-12-31 15:00:00+09:00',
               '2022-12-31 16:00:00+09:00', '2022-12-31 17:00:00+09:00',
               '2022-12-31 18:00:00+09:00', '2022-12-31 19:00:00+09:00',
               '2022-12-31 20:00:00+09:00', '2022-12-31 21:00:00+09:00',
               '2022-12-31 22:00:00+09:00', '2022-12-31 23:00:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', name='dt', length=26202, freq=None)

In [15]:
gj_feature.reset_index(drop = False, inplace = True)

# dt2에 dt.unique값 저장해놓기!
dt2 = gj_feature['dt'].unique()
gj_feature

Unnamed: 0,dt,temp_air,wind_speed,wind_direction,surf_rough,turbine_id,wind_speed_100m
0,2020-01-02 00:00:00+09:00,269.467560,7.813025,289.753601,0.286911,WTG01,12.879008
1,2020-01-02 01:00:00+09:00,269.430847,7.953654,287.669006,0.286911,WTG01,13.110820
2,2020-01-02 02:00:00+09:00,269.374390,7.981191,286.448578,0.286911,WTG01,13.156212
3,2020-01-02 03:00:00+09:00,269.352112,7.624827,287.401855,0.286911,WTG01,12.568782
4,2020-01-02 04:00:00+09:00,269.413269,7.173506,285.525635,0.286911,WTG01,11.824823
...,...,...,...,...,...,...,...
235813,2022-12-31 19:00:00+09:00,271.486450,6.411052,298.820953,0.297867,WTG09,10.612328
235814,2022-12-31 20:00:00+09:00,271.668732,6.884773,304.600739,0.297867,WTG09,11.396485
235815,2022-12-31 21:00:00+09:00,270.929230,6.105609,307.490143,0.297867,WTG09,10.106722
235816,2022-12-31 22:00:00+09:00,270.055725,4.332157,310.975128,0.297867,WTG09,7.171095


In [16]:
turbine_ids = [f'WTG0{i}' for i in range(1, 10)]
turbine_ids

for turbine in turbine_ids:
    gj_feature[f'wind_speed_{turbine}'] = gj_feature.loc[gj_feature['turbine_id'] == turbine, 'wind_speed_100m'].reset_index(drop = True) 
    gj_feature[f'wind_direcion_{turbine}'] = gj_feature.loc[gj_feature['turbine_id'] == turbine, 'wind_direction'].reset_index(drop = True)
    gj_feature[f'temp_air_{turbine}'] = gj_feature.loc[gj_feature['turbine_id'] == turbine, 'temp_air'].reset_index(drop = True)
gj_feature
# print(gj_train.columns)

Unnamed: 0,dt,temp_air,wind_speed,wind_direction,surf_rough,turbine_id,wind_speed_100m,wind_speed_WTG01,wind_direcion_WTG01,temp_air_WTG01,...,temp_air_WTG06,wind_speed_WTG07,wind_direcion_WTG07,temp_air_WTG07,wind_speed_WTG08,wind_direcion_WTG08,temp_air_WTG08,wind_speed_WTG09,wind_direcion_WTG09,temp_air_WTG09
0,2020-01-02 00:00:00+09:00,269.467560,7.813025,289.753601,0.286911,WTG01,12.879008,12.879008,289.753601,269.467560,...,269.935577,7.903188,320.240692,269.935577,7.668751,327.180023,269.870636,7.668751,327.180023,269.870636
1,2020-01-02 01:00:00+09:00,269.430847,7.953654,287.669006,0.286911,WTG01,13.110820,13.110820,287.669006,269.430847,...,269.905701,7.574843,317.785431,269.905701,7.227797,325.506805,269.864441,7.227797,325.506805,269.864441
2,2020-01-02 02:00:00+09:00,269.374390,7.981191,286.448578,0.286911,WTG01,13.156212,13.156212,286.448578,269.374390,...,269.889282,7.480773,314.699219,269.889282,7.072643,322.184937,269.874146,7.072643,322.184937,269.874146
3,2020-01-02 03:00:00+09:00,269.352112,7.624827,287.401855,0.286911,WTG01,12.568782,12.568782,287.401855,269.352112,...,269.808411,6.933155,311.205841,269.808411,6.440182,316.832153,269.763245,6.440182,316.832153,269.763245
4,2020-01-02 04:00:00+09:00,269.413269,7.173506,285.525635,0.286911,WTG01,11.824823,11.824823,285.525635,269.413269,...,269.755310,5.667011,311.076782,269.755310,5.369712,316.859711,269.712097,5.369712,316.859711,269.712097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235813,2022-12-31 19:00:00+09:00,271.486450,6.411052,298.820953,0.297867,WTG09,10.612328,,,,...,,,,,,,,,,
235814,2022-12-31 20:00:00+09:00,271.668732,6.884773,304.600739,0.297867,WTG09,11.396485,,,,...,,,,,,,,,,
235815,2022-12-31 21:00:00+09:00,270.929230,6.105609,307.490143,0.297867,WTG09,10.106722,,,,...,,,,,,,,,,
235816,2022-12-31 22:00:00+09:00,270.055725,4.332157,310.975128,0.297867,WTG09,7.171095,,,,...,,,,,,,,,,


In [17]:
gj_feature.dropna(axis = 0, inplace = True)
gj_feature['dt'] = dt2

In [18]:
gj_feature

Unnamed: 0,dt,temp_air,wind_speed,wind_direction,surf_rough,turbine_id,wind_speed_100m,wind_speed_WTG01,wind_direcion_WTG01,temp_air_WTG01,...,temp_air_WTG06,wind_speed_WTG07,wind_direcion_WTG07,temp_air_WTG07,wind_speed_WTG08,wind_direcion_WTG08,temp_air_WTG08,wind_speed_WTG09,wind_direcion_WTG09,temp_air_WTG09
0,2020-01-02 00:00:00+09:00,269.467560,7.813025,289.753601,0.286911,WTG01,12.879008,12.879008,289.753601,269.467560,...,269.935577,7.903188,320.240692,269.935577,7.668751,327.180023,269.870636,7.668751,327.180023,269.870636
1,2020-01-02 01:00:00+09:00,269.430847,7.953654,287.669006,0.286911,WTG01,13.110820,13.110820,287.669006,269.430847,...,269.905701,7.574843,317.785431,269.905701,7.227797,325.506805,269.864441,7.227797,325.506805,269.864441
2,2020-01-02 02:00:00+09:00,269.374390,7.981191,286.448578,0.286911,WTG01,13.156212,13.156212,286.448578,269.374390,...,269.889282,7.480773,314.699219,269.889282,7.072643,322.184937,269.874146,7.072643,322.184937,269.874146
3,2020-01-02 03:00:00+09:00,269.352112,7.624827,287.401855,0.286911,WTG01,12.568782,12.568782,287.401855,269.352112,...,269.808411,6.933155,311.205841,269.808411,6.440182,316.832153,269.763245,6.440182,316.832153,269.763245
4,2020-01-02 04:00:00+09:00,269.413269,7.173506,285.525635,0.286911,WTG01,11.824823,11.824823,285.525635,269.413269,...,269.755310,5.667011,311.076782,269.755310,5.369712,316.859711,269.712097,5.369712,316.859711,269.712097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26197,2022-12-31 19:00:00+09:00,301.399658,3.397750,244.971313,0.307016,WTG03,5.643694,14.597685,288.561768,271.441528,...,271.768188,11.129124,295.321136,271.768188,10.612328,298.820953,271.486450,10.612328,298.820953,271.486450
26198,2022-12-31 20:00:00+09:00,299.816162,4.987082,215.429626,0.307016,WTG03,8.283588,15.118364,291.168121,271.384308,...,271.739532,12.008143,300.869934,271.739532,11.396485,304.600739,271.668732,11.396485,304.600739,271.668732
26199,2022-12-31 21:00:00+09:00,298.253174,6.073834,206.931152,0.307016,WTG03,10.088695,15.047703,288.055420,271.120636,...,271.320099,11.097052,301.189056,271.320099,10.106722,307.490143,270.929230,10.106722,307.490143,270.929230
26200,2022-12-31 22:00:00+09:00,294.867188,5.416467,202.261139,0.307016,WTG03,8.996800,13.454048,284.415344,270.192200,...,270.213684,8.153318,302.675110,270.213684,7.171095,310.975128,270.055725,7.171095,310.975128,270.055725


In [19]:
gj_xx = gj_feature.drop(['temp_air', 'wind_speed', 'wind_direction', 'surf_rough', 'turbine_id'], axis = 1)
gj_xx

Unnamed: 0,dt,wind_speed_100m,wind_speed_WTG01,wind_direcion_WTG01,temp_air_WTG01,wind_speed_WTG02,wind_direcion_WTG02,temp_air_WTG02,wind_speed_WTG03,wind_direcion_WTG03,...,temp_air_WTG06,wind_speed_WTG07,wind_direcion_WTG07,temp_air_WTG07,wind_speed_WTG08,wind_direcion_WTG08,temp_air_WTG08,wind_speed_WTG09,wind_direcion_WTG09,temp_air_WTG09
0,2020-01-02 00:00:00+09:00,12.879008,12.879008,289.753601,269.467560,7.922320,307.658264,269.897491,7.903188,320.240692,...,269.935577,7.903188,320.240692,269.935577,7.668751,327.180023,269.870636,7.668751,327.180023,269.870636
1,2020-01-02 01:00:00+09:00,13.110820,13.110820,287.669006,269.430847,7.825646,304.472595,269.871521,7.574843,317.785431,...,269.905701,7.574843,317.785431,269.905701,7.227797,325.506805,269.864441,7.227797,325.506805,269.864441
2,2020-01-02 02:00:00+09:00,13.156212,13.156212,286.448578,269.374390,7.808031,302.931213,269.824097,7.480773,314.699219,...,269.889282,7.480773,314.699219,269.889282,7.072643,322.184937,269.874146,7.072643,322.184937,269.874146
3,2020-01-02 03:00:00+09:00,12.568782,12.568782,287.401855,269.352112,7.453205,301.906036,269.807190,6.933155,311.205841,...,269.808411,6.933155,311.205841,269.808411,6.440182,316.832153,269.763245,6.440182,316.832153,269.763245
4,2020-01-02 04:00:00+09:00,11.824823,11.824823,285.525635,269.413269,6.181468,300.397858,269.730896,5.667011,311.076782,...,269.755310,5.667011,311.076782,269.755310,5.369712,316.859711,269.712097,5.369712,316.859711,269.712097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26197,2022-12-31 19:00:00+09:00,5.643694,14.597685,288.561768,271.441528,11.412551,292.386078,271.999634,11.129124,295.321136,...,271.768188,11.129124,295.321136,271.768188,10.612328,298.820953,271.486450,10.612328,298.820953,271.486450
26198,2022-12-31 20:00:00+09:00,8.283588,15.118364,291.168121,271.384308,12.181539,296.365845,271.774933,12.008143,300.869934,...,271.739532,12.008143,300.869934,271.739532,11.396485,304.600739,271.668732,11.396485,304.600739,271.668732
26199,2022-12-31 21:00:00+09:00,10.088695,15.047703,288.055420,271.120636,11.648945,295.193634,271.638702,11.097052,301.189056,...,271.320099,11.097052,301.189056,271.320099,10.106722,307.490143,270.929230,10.106722,307.490143,270.929230
26200,2022-12-31 22:00:00+09:00,8.996800,13.454048,284.415344,270.192200,9.071298,294.490845,270.375549,8.153318,302.675110,...,270.213684,8.153318,302.675110,270.213684,7.171095,310.975128,270.055725,7.171095,310.975128,270.055725


#### train_y 불러오기

In [20]:
gj_y = pd.read_csv('train_y.csv')
gj_y

Unnamed: 0,plant_name,end_datetime,period_hours,energy_kwh
0,경주풍력,2020-01-01T01:00:00+09:00,1,9767.578125
1,경주풍력,2020-01-01T02:00:00+09:00,1,5381.835938
2,경주풍력,2020-01-01T03:00:00+09:00,1,3021.484375
3,경주풍력,2020-01-01T04:00:00+09:00,1,4400.390625
4,경주풍력,2020-01-01T05:00:00+09:00,1,4501.953125
...,...,...,...,...
52603,영광풍력,2022-12-31T20:00:00+09:00,1,1708.529000
52604,영광풍력,2022-12-31T21:00:00+09:00,1,67.645000
52605,영광풍력,2022-12-31T22:00:00+09:00,1,0.000000
52606,영광풍력,2022-12-31T23:00:00+09:00,1,0.000000


In [21]:
# 데이터 타입 변경
gj_y['end_datetime'] = pd.to_datetime(gj_y['end_datetime']).dt.tz_convert('Asia/Seoul')
gj_y = gj_y.loc[gj_y['plant_name'] == '경주풍력', ['end_datetime', 'energy_kwh']]
gj_y.rename(columns = {'end_datetime': 'dt'}, inplace = True)
gj_y

Unnamed: 0,dt,energy_kwh
0,2020-01-01 01:00:00+09:00,9767.578125
1,2020-01-01 02:00:00+09:00,5381.835938
2,2020-01-01 03:00:00+09:00,3021.484375
3,2020-01-01 04:00:00+09:00,4400.390625
4,2020-01-01 05:00:00+09:00,4501.953125
...,...,...
26299,2022-12-31 20:00:00+09:00,18394.531250
26300,2022-12-31 21:00:00+09:00,18443.359375
26301,2022-12-31 22:00:00+09:00,18525.390625
26302,2022-12-31 23:00:00+09:00,18529.296875


In [22]:
# merge

data = pd.merge(gj_xx, gj_y, how = 'inner', on = 'dt')
data

Unnamed: 0,dt,wind_speed_100m,wind_speed_WTG01,wind_direcion_WTG01,temp_air_WTG01,wind_speed_WTG02,wind_direcion_WTG02,temp_air_WTG02,wind_speed_WTG03,wind_direcion_WTG03,...,wind_speed_WTG07,wind_direcion_WTG07,temp_air_WTG07,wind_speed_WTG08,wind_direcion_WTG08,temp_air_WTG08,wind_speed_WTG09,wind_direcion_WTG09,temp_air_WTG09,energy_kwh
0,2020-01-02 00:00:00+09:00,12.879008,12.879008,289.753601,269.467560,7.922320,307.658264,269.897491,7.903188,320.240692,...,7.903188,320.240692,269.935577,7.668751,327.180023,269.870636,7.668751,327.180023,269.870636,17827.148438
1,2020-01-02 01:00:00+09:00,13.110820,13.110820,287.669006,269.430847,7.825646,304.472595,269.871521,7.574843,317.785431,...,7.574843,317.785431,269.905701,7.227797,325.506805,269.864441,7.227797,325.506805,269.864441,17616.210938
2,2020-01-02 02:00:00+09:00,13.156212,13.156212,286.448578,269.374390,7.808031,302.931213,269.824097,7.480773,314.699219,...,7.480773,314.699219,269.889282,7.072643,322.184937,269.874146,7.072643,322.184937,269.874146,17904.296875
3,2020-01-02 03:00:00+09:00,12.568782,12.568782,287.401855,269.352112,7.453205,301.906036,269.807190,6.933155,311.205841,...,6.933155,311.205841,269.808411,6.440182,316.832153,269.763245,6.440182,316.832153,269.763245,17676.757812
4,2020-01-02 04:00:00+09:00,11.824823,11.824823,285.525635,269.413269,6.181468,300.397858,269.730896,5.667011,311.076782,...,5.667011,311.076782,269.755310,5.369712,316.859711,269.712097,5.369712,316.859711,269.712097,18519.531250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26197,2022-12-31 19:00:00+09:00,5.643694,14.597685,288.561768,271.441528,11.412551,292.386078,271.999634,11.129124,295.321136,...,11.129124,295.321136,271.768188,10.612328,298.820953,271.486450,10.612328,298.820953,271.486450,18488.281250
26198,2022-12-31 20:00:00+09:00,8.283588,15.118364,291.168121,271.384308,12.181539,296.365845,271.774933,12.008143,300.869934,...,12.008143,300.869934,271.739532,11.396485,304.600739,271.668732,11.396485,304.600739,271.668732,18394.531250
26199,2022-12-31 21:00:00+09:00,10.088695,15.047703,288.055420,271.120636,11.648945,295.193634,271.638702,11.097052,301.189056,...,11.097052,301.189056,271.320099,10.106722,307.490143,270.929230,10.106722,307.490143,270.929230,18443.359375
26200,2022-12-31 22:00:00+09:00,8.996800,13.454048,284.415344,270.192200,9.071298,294.490845,270.375549,8.153318,302.675110,...,8.153318,302.675110,270.213684,7.171095,310.975128,270.055725,7.171095,310.975128,270.055725,18525.390625


### train, test split

In [24]:
train_x = data.loc[data['dt'].between('2020-01-02', '2022-06-30', inclusive = 'both')].drop(['dt', 'energy_kwh'], axis = 1)
test_x = data.loc[data['dt'].between('2022-07-01', '2022-12-31', inclusive = 'both')].drop(['dt', 'energy_kwh'], axis = 1)
train_y = data.loc[data['dt'].between('2020-01-02', '2022-06-30', inclusive = 'both'), 'energy_kwh']
test_y = data.loc[data['dt'].between('2022-07-01', '2022-12-31', inclusive = 'both'), 'energy_kwh']

print(train_x.shape)
print(test_x.shape)
print(train_y.shape)
print(test_y.shape)

(21840, 28)
(4316, 28)
(21840,)
(4316,)


### RandomForestRegressor

In [25]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error

In [26]:
rf = RandomForestRegressor(n_jobs = -1)
rf.fit(train_x, train_y)
pred_y = rf.predict(test_x)


In [28]:
print(mean_absolute_error(test_y, pred_y))
print(mean_absolute_percentage_error(test_y, pred_y))

2898.638634314904
1.9601865551289132e+18


In [29]:
nmae = abs(pred_y-test_y)/20700*100
nmae = round(nmae.mean(), 2)
print(nmae)

14.0


In [31]:
from xgboost import XGBRegressor

xgb_params = {
    'tree_method': 'auto',
    # 'n_estimators': 1000,
    # 'max_depth': 8,
    'n_jobs': -1,
    'random_state': 1,
    # 'device': 'cuda'
}

xgb = XGBRegressor(**xgb_params)
xgb.fit(train_x, train_y)
pred_y_xgb = xgb.predict(test_x)

In [32]:
print(mean_absolute_error(test_y, pred_y_xgb))
print(mean_absolute_percentage_error(test_y, pred_y_xgb))

2936.601346891605
1.9695521593405391e+18


In [33]:
nmae = abs(pred_y_xgb-test_y)/20700*100
nmae = round(nmae.mean(), 2)
print(nmae)

14.19
