## Import

In [3]:
import random
import pandas as pd
import numpy as np
import os

from sklearn.ensemble import RandomForestRegressor

import warnings
warnings.filterwarnings(action='ignore') 

In [21]:
import lightgbm as lgb

## Fixed Random-Seed

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

## Load Data

In [93]:
train_df = pd.read_csv('./data/train.csv')
test_df = pd.read_csv('./data/test.csv')

In [94]:
building = pd.read_csv('./data/building_info.csv')

## Train Data Pre-Processing

In [95]:
def train_test_split(df, th):
    train = df[df['일시'].str[:8].astype(int) < th].reset_index(drop=True)
    test = df[df['일시'].str[:8].astype(int) >= th].reset_index(drop=True)
    return train, test

In [96]:
def preprocess_x(df):
    df = df.fillna(0)
    #시계열 특성을 학습에 반영하기 위해 일시를 월, 일, 시간으로 나눕니다
    df['month'] = df['일시'].apply(lambda x : int(x[4:6]))
    df['day'] = df['일시'].apply(lambda x : int(x[6:8]))
    df['time'] = df['일시'].apply(lambda x : int(x[9:11]))
    df = df.merge(building.iloc[:, :4])
    df['건물유형'] = df['건물유형'].astype('category').cat.codes
#     df = df.drop(columns=['num_date_time', '일시', '일조(hr)', '일사(MJ/m2)', '전력소비량(kWh)'])
    df = df.drop(columns=['num_date_time', '일시', '일조(hr)', '일사(MJ/m2)', '전력소비량(kWh)'])
    return df

In [97]:
train_df, test_df = train_test_split(train_df, 20220820)

In [98]:
train_x = preprocess_x(train_df)
train_y = train_df['전력소비량(kWh)']

In [99]:
test_x = preprocess_x(test_df)
test_y = test_df['전력소비량(kWh)']

## Regression Model Fit

In [13]:
model_rf = RandomForestRegressor()
model_rf.fit(train_x, train_y)

In [129]:
model_lgb = lgb.LGBMRegressor(objective='regression', n_estimators=300)

In [130]:
# model_lgb = lgb.LGBMRegressor(objective='regression')
model_lgb.fit(train_x, train_y)

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1000
[LightGBM] [Info] Number of data points in the train set: 192000, number of used features: 11
[LightGBM] [Info] Start training from score 2448.539357


## Inference

In [102]:
def SMAPE(y, pred):
    smape = abs((y - pred))/((abs(y) + abs(pred)) / 2) * 100
    smape = np.mean(smape)
    return smape

def mae(y, pred):
    return np.mean(abs(y-pred))

In [131]:
pred = model_lgb.predict(test_x)

In [132]:
SMAPE(test_y, pred)

15.798359023528862

In [133]:
mae(test_y, pred)

352.1086433302414

In [71]:
eda = test_x.copy()

In [73]:
eda['pred'], eda['y'] = pred, test_y

In [75]:
eda['gap'] = eda['y'] - eda['pred']

In [77]:
eda.sort_values('gap')

Unnamed: 0,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,time,건물유형,연면적(m2),냉방면적(m2),pred,y,gap
3155,29.6,0.0,2.7,61.0,8,21,11,2,578484.113,501381.53,23785.846815,15517.60,-8268.246815
3154,29.1,0.0,3.2,61.0,8,21,10,2,578484.113,501381.53,23437.946398,15260.00,-8177.946398
3156,30.5,0.0,2.7,59.0,8,21,12,2,578484.113,501381.53,23513.283952,15996.40,-7516.883952
3157,31.5,0.0,1.7,59.0,8,21,13,2,578484.113,501381.53,23848.714994,16587.20,-7261.514994
3158,31.6,0.0,1.6,54.0,8,21,14,2,578484.113,501381.53,23853.408656,16864.40,-6989.008656
...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,31.7,0.0,2.3,64.0,8,20,14,0,205884.000,150000.00,2948.549114,7550.64,4602.090886
518,31.3,0.0,1.5,63.0,8,21,14,0,205884.000,150000.00,3029.387746,7642.08,4612.692254
519,31.2,0.0,2.3,61.0,8,21,15,0,205884.000,150000.00,2992.269004,7608.96,4616.690996
517,30.4,0.0,1.3,66.0,8,21,13,0,205884.000,150000.00,3051.354047,7683.84,4632.485953


### 건물별 모델

In [134]:
models = {}

In [135]:
from tqdm import tqdm

In [151]:
for i in tqdm(range(1, 101)):
    _x = train_x[train_x['건물번호'] == i]
    _x = _x.drop(columns=['건물번호', '건물유형', '연면적(m2)', '냉방면적(m2)'])
    _y = train_y[_x.index]
    model_lgb = lgb.LGBMRegressor(objective='regression', n_estimators=300)
    model_lgb.fit(_x, _y)
    models[i] = model_lgb

  2%|█▉                                                                                               | 2/100 [00:00<00:08, 11.96it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2643.372748
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 402
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1676.138344
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1448.643469


  4%|███▉                                                                                             | 4/100 [00:00<00:08, 11.94it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 993.510891
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2725.884560
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 414
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1966.527750


  6%|█████▊                                                                                           | 6/100 [00:00<00:08, 11.49it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 414
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 779.504000
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 400
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1327.204626


 10%|█████████▌                                                                                      | 10/100 [00:00<00:08, 10.94it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 401
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2334.578250
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 402
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 4068.605124
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 400
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1930.192500


 12%|███████████▌                                                                                    | 12/100 [00:01<00:08, 10.58it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 414
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1438.711125
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 402
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2529.404875
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 397
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1918.012499


 16%|███████████████▎                                                                                | 16/100 [00:01<00:07, 10.52it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 401
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1782.539671
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 370
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 3401.638503
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 977.146922
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 414
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1447.680547


 20%|███████████████████▏                                                                            | 20/100 [00:01<00:07, 10.36it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1418.846194
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 388
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1723.790501
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 392
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1988.380053


 22%|█████████████████████                                                                           | 22/100 [00:02<00:07, 10.47it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 391
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1236.073126
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 372
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1263.963438
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 10031.880003


 26%|████████████████████████▉                                                                       | 26/100 [00:02<00:06, 10.67it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 7261.687308
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 416
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 3530.394249


 28%|██████████████████████████▉                                                                     | 28/100 [00:02<00:06, 10.49it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 402
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 17454.782905
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 381
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1740.951750


 30%|████████████████████████████▊                                                                   | 30/100 [00:02<00:06, 10.61it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 386
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1452.880391
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 397
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 841.668985


 32%|██████████████████████████████▋                                                                 | 32/100 [00:02<00:06, 10.53it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 388
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1361.915155
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 402
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 9863.946927


 34%|████████████████████████████████▋                                                               | 34/100 [00:03<00:06, 10.52it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 8695.852120
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 3350.778749


 36%|██████████████████████████████████▌                                                             | 36/100 [00:03<00:06, 10.55it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2171.601563
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 414
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2892.480062


 38%|████████████████████████████████████▍                                                           | 38/100 [00:03<00:05, 10.68it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 402
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 3344.602967
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1807.888687


 40%|██████████████████████████████████████▍                                                         | 40/100 [00:03<00:05, 10.52it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 401
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2166.561751
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 377
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1880.259376


 42%|████████████████████████████████████████▎                                                       | 42/100 [00:03<00:05, 10.67it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 3259.913282
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 380
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1604.980874
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 391
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1885.114691


 44%|██████████████████████████████████████████▏                                                     | 44/100 [00:04<00:05, 10.82it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 391
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2122.490000
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 402
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2947.071998
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 383
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2248.111000


 46%|████████████████████████████████████████████▏                                                   | 46/100 [00:04<00:05, 10.75it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 414
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 5373.330374
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 377
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1880.541938


 50%|████████████████████████████████████████████████                                                | 50/100 [00:04<00:04, 10.69it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 377
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 3043.575189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 380
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 3028.017029


 52%|█████████████████████████████████████████████████▉                                              | 52/100 [00:04<00:04, 10.82it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 401
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2642.153155
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 405
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2103.186249
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2011.187875


 54%|███████████████████████████████████████████████████▊                                            | 54/100 [00:05<00:04, 10.94it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 402
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1818.888282
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 377
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1008.893126
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 402
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 4170.889499


 58%|███████████████████████████████████████████████████████▋                                        | 58/100 [00:05<00:03, 10.86it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1904.013092
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 414
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2881.606748
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1663.413281


 60%|█████████████████████████████████████████████████████████▌                                      | 60/100 [00:05<00:03, 10.91it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 402
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2939.370000
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 3104.980498
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1143.751125


 64%|█████████████████████████████████████████████████████████████▍                                  | 64/100 [00:05<00:03, 10.88it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 401
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 751.909969
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 397
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1180.625531
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 377
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 568.408500


 66%|███████████████████████████████████████████████████████████████▎                                | 66/100 [00:06<00:03, 10.75it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 372
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 396.447984
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 405
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1281.764562
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2019.661562


 68%|█████████████████████████████████████████████████████████████████▎                              | 68/100 [00:06<00:02, 10.86it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 4493.378905
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 401
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 4286.103826


 72%|█████████████████████████████████████████████████████████████████████                           | 72/100 [00:06<00:02, 10.83it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 401
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2136.472753
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 377
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1271.575970
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 414
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 3624.625248


 74%|███████████████████████████████████████████████████████████████████████                         | 74/100 [00:06<00:02, 10.57it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 4024.317380
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 366
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1406.244249
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 374
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1197.294751


 78%|██████████████████████████████████████████████████████████████████████████▉                     | 78/100 [00:07<00:02, 10.80it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 414
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1485.046314
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 414
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1648.270124
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 409
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2890.036872


 80%|████████████████████████████████████████████████████████████████████████████▊                   | 80/100 [00:07<00:01, 10.61it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2553.714186
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1441.524374


 82%|██████████████████████████████████████████████████████████████████████████████▋                 | 82/100 [00:07<00:01, 10.42it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 395
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2286.404249
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 414
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1444.984532
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 415
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1721.301751


 86%|██████████████████████████████████████████████████████████████████████████████████▌             | 86/100 [00:08<00:01, 10.60it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2249.537187
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 414
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1144.428843
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 400
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1277.897673


 88%|████████████████████████████████████████████████████████████████████████████████████▍           | 88/100 [00:08<00:01, 10.40it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 395
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1191.543892
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 397
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1429.196124
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 401
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1379.525001


 92%|████████████████████████████████████████████████████████████████████████████████████████▎       | 92/100 [00:08<00:00, 10.75it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 377
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1322.122249
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 414
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1285.402313
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1102.378875


 94%|██████████████████████████████████████████████████████████████████████████████████████████▏     | 94/100 [00:08<00:00, 10.94it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 407
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2447.443998
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 398
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1112.845218


 96%|████████████████████████████████████████████████████████████████████████████████████████████▏   | 96/100 [00:08<00:00, 10.74it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 405
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 2801.350094
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 405
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1227.598031
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 381
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1134.933562


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:09<00:00, 10.71it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 406
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 1160.090812
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 397
[LightGBM] [Info] Number of data points in the train set: 1920, number of used features: 7
[LightGBM] [Info] Start training from score 821.566251


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:09<00:00, 10.71it/s]


In [154]:
preds = []

In [155]:
for i in tqdm(range(1, 101)):
    _x = test_x[test_x['건물번호'] == i]
    _x = _x.drop(columns=['건물번호', '건물유형', '연면적(m2)', '냉방면적(m2)'])
    pred = models[i].predict(_x).tolist()
    preds.extend(pred)

100%|██████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 671.63it/s]


In [156]:
preds = np.array(preds)

In [157]:
SMAPE(test_y, preds)

14.462616029933997

In [158]:
mae(test_y, preds)

351.1786298329664

In [159]:
train_x

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,time,건물유형,연면적(m2),냉방면적(m2)
0,1,18.6,0.0,0.9,42.0,6,1,0,0,110634.00,39570.00
1,1,18.0,0.0,1.1,45.0,6,1,1,0,110634.00,39570.00
2,1,17.7,0.0,1.5,45.0,6,1,2,0,110634.00,39570.00
3,1,16.7,0.0,1.4,48.0,6,1,3,0,110634.00,39570.00
4,1,18.4,0.0,2.8,43.0,6,1,4,0,110634.00,39570.00
...,...,...,...,...,...,...,...,...,...,...,...
191995,100,28.1,0.0,1.9,85.0,8,19,19,11,57497.84,40035.23
191996,100,27.7,0.0,1.8,87.0,8,19,20,11,57497.84,40035.23
191997,100,27.7,0.0,1.4,87.0,8,19,21,11,57497.84,40035.23
191998,100,27.7,0.0,2.4,86.0,8,19,22,11,57497.84,40035.23


## Validate

In [30]:
preds

array([2076.1152, 2014.896 , 1882.224 , ...,  692.712 ,  673.4592,
        566.616 ])

In [31]:
test_y.values

array([1921.44, 1851.36, 1731.84, ...,  825.12,  640.08,  540.24])

## Submission

In [11]:
submission = pd.read_csv('./sample_submission.csv')
submission

Unnamed: 0,num_date_time,answer
0,1_20220825 00,0
1,1_20220825 01,0
2,1_20220825 02,0
3,1_20220825 03,0
4,1_20220825 04,0
...,...,...
16795,100_20220831 19,0
16796,100_20220831 20,0
16797,100_20220831 21,0
16798,100_20220831 22,0


In [12]:
submission['answer'] = preds
submission

Unnamed: 0,num_date_time,answer
0,1_20220825 00,2127.2688
1,1_20220825 01,2090.5008
2,1_20220825 02,2009.9712
3,1_20220825 03,1981.9440
4,1_20220825 04,1946.7744
...,...,...
16795,100_20220831 19,893.0712
16796,100_20220831 20,784.7448
16797,100_20220831 21,748.2216
16798,100_20220831 22,654.1728


In [13]:
submission.to_csv('./baseline_submission.csv', index=False)