# 시작

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install sktime

Collecting sktime
  Downloading sktime-0.22.0-py3-none-any.whl (17.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.5/17.5 MB[0m [31m31.9 MB/s[0m eta [36m0:00:00[0m
Collecting scikit-base<0.6.0 (from sktime)
  Downloading scikit_base-0.5.1-py3-none-any.whl (118 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m118.7/118.7 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: scikit-base, sktime
Successfully installed scikit-base-0.5.1 sktime-0.22.0


In [3]:
from lightgbm import LGBMRegressor, early_stopping
from xgboost import XGBRegressor, callback

from sklearn.model_selection import KFold
from sklearn.preprocessing import OneHotEncoder

from sktime.forecasting.model_selection import temporal_train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import make_scorer

In [4]:
import pandas as pd
import numpy as np
import torch
from tqdm.auto import tqdm
import random
import os

def reset_seeds(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

DATA_PATH = '/content/drive/MyDrive/데이콘 캐글 컴페티션/DACON_23.07_PowerConsumption/'
SEED = 42

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [5]:
train = pd.read_csv(f"{DATA_PATH}train.csv")
test = pd.read_csv(f"{DATA_PATH}test.csv")
building = pd.read_csv(f"{DATA_PATH}building_info.csv")
submission = pd.read_csv(f"{DATA_PATH}sample_submission.csv")

df = pd.read_csv(f"{DATA_PATH}train_featured_3.csv")
train_ft = pd.read_csv(f"{DATA_PATH}train_selected_3.csv")

test_df = pd.read_csv(f"{DATA_PATH}test_featured_3.csv")
test_ft = pd.read_csv(f"{DATA_PATH}test_selected_3.csv")

In [7]:
def _smape(true, pred):
    true = np.array(true)
    pred = np.array(pred)
    output = np.mean((np.abs(true-pred))/(np.abs(true) + np.abs(pred))) * 100
    return output
smape = make_scorer(_smape, greater_is_better=False)

In [8]:
train_ft.columns

Index(['day_hour_mean', 'day_hour_median', 'discomfort_CDH', 'week',
       'day_hour_mode', 'discomfort', 'day_hour_std', 'CDH', 'hour_mean',
       'sin_time', 'cos_week', 'day_of_year'],
      dtype='object')

In [9]:
df.columns

Index(['num_date_time', 'building_num', 'date_time', 'temp', 'precip',
       'wind_ms', 'humidity', 'sunshine', 'solar_rad', 'energy_consum',
       'building_type', 'floor_area', 'cooling_area', 'solar_capa', 'ess_capa',
       'pcs_capa', 'hour', 'day', 'month', 'week', 'day_hour_mean',
       'hour_mean', 'hour_std', 'hour_type_mean', 'holiday', 'sin_time',
       'cos_time', 'THI', 'CDH', 'discomfort', 'discomfort_CDH', 'solar_gen',
       'CDH_solar', 'cooling_ratio', 'day_of_year', 'cos_day_of_year',
       'sin_day_of_year', 'cos_weekday', 'sin_weekday', 'cos_week', 'sin_week',
       'cos_month', 'sin_month', 'day_hour_median', 'day_hour_std',
       'day_hour_mode'],
      dtype='object')

In [11]:
test_df.columns

Index(['num_date_time', 'building_num', 'date_time', 'temp', 'precip',
       'wind_ms', 'humidity', 'building_type', 'floor_area', 'cooling_area',
       'solar_capa', 'ess_capa', 'pcs_capa', 'hour', 'day', 'month', 'week',
       'day_hour_mean', 'discomfort', 'discomfort_CDH', 'day_hour_median',
       'day_hour_mode', 'day_hour_std', 'sin_time', 'hour_mean', 'CDH',
       'day_of_year', 'cos_week'],
      dtype='object')

In [17]:
cols = ['building_num','day_of_year','energy_consum','day_hour_mean', 'hour_mean', 'day_hour_median', 'day_hour_mode', 'day_hour_std',
        'discomfort_CDH', 'CDH','discomfort', 'week','cos_week', 'sin_time']

train_ft = df[cols]

In [19]:
cols = ['building_num','day_of_year','day_hour_mean', 'hour_mean', 'day_hour_median', 'day_hour_mode', 'day_hour_std',
        'discomfort_CDH', 'CDH','discomfort', 'week','cos_week', 'sin_time']

test_ft = test_df[cols]

In [20]:
#train_ft : selected feature

# 1부터 36까지의 숫자에 따라 train_ft 데이터프레임을 생성하여 개별 변수로 저장
for i in range(1, 101):
    globals()[f'train_ft_{i}'] = train_ft[train_ft['building_num'] == i]

In [21]:
#df : full feature

# 1부터 36까지의 숫자에 따라 train_ft 데이터프레임을 생성하여 개별 변수로 저장
for i in range(1, 101):
    globals()[f'train_df_{i}'] = df[df['building_num'] == i]

In [365]:
#test_ft

# 1부터 36까지의 숫자에 따라 test_ft 데이터프레임을 생성하여 개별 변수로 저장
for i in range(1, 101):
    globals()[f'test_df_{i}'] = test_ft[test_ft['building_num'] == i]

In [461]:
#test_df

# 1부터 36까지의 숫자에 따라 test_df 데이터프레임을 생성하여 개별 변수로 저장
for i in range(1, 101):
    globals()[f'test_df_full_{i}'] = test_df[test_df['building_num'] == i]

## 1

In [493]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [494]:
train_x = train_ft_1.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_1['energy_consum']

test_x = test_df_1.drop(columns=test_drop_cols,axis=1)

In [495]:
test_x

Unnamed: 0,day_of_year,day_hour_mean,hour_mean,day_hour_median,day_hour_mode,day_hour_std,discomfort_CDH,CDH,discomfort,week,cos_week,sin_time
0,243,1627.800000,1706.318118,1847.28,1034.40,446.984913,45.78980,-2.5,71.78980,34,-0.464723,0.000000
1,243,1550.080000,1622.620235,1772.40,933.60,449.091398,90.81820,-5.5,71.02840,34,-0.464723,0.258819
2,243,1431.120000,1506.971294,1624.80,871.68,415.453568,135.63495,-8.8,70.81675,34,-0.464723,0.500000
3,243,1372.200000,1437.365647,1557.84,850.08,378.117772,179.74757,-12.7,70.11262,34,-0.464723,0.707107
4,243,1381.720000,1447.321412,1561.68,888.00,360.348483,223.31271,-16.9,69.56514,34,-0.464723,0.866025
...,...,...,...,...,...,...,...,...,...,...,...,...
163,242,2571.470769,2703.871059,2652.96,1871.52,382.865381,7061.48220,-36.5,72.05875,35,-0.568065,-0.965926
164,242,2391.027692,2491.979294,2471.04,1753.92,404.038290,7106.22700,-34.0,70.74480,35,-0.568065,-0.866025
165,242,1887.950769,1972.305882,1893.12,1454.40,241.919060,7149.92223,-32.6,69.69523,35,-0.568065,-0.707107
166,242,1412.861538,1473.049412,1420.32,1420.32,104.881852,7192.78210,-33.0,68.85987,35,-0.568065,-0.500000


In [496]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [497]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1540
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2634.583844
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[50]	training's l2: 10935.4	valid_1's l2: 24290.6


In [498]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [499]:
_smape(y_valid, x_pred)

2.231451607862049

In [500]:
model.fit(train_x, train_y)
y_pred = model.predict(test_x)
y_pred.shape

[LightGBM] [Info] Total Bins 1541
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2653.021410


(168,)

## 2

In [501]:
train_ft_2.columns

Index(['building_num', 'day_of_year', 'energy_consum', 'day_hour_mean',
       'hour_mean', 'day_hour_median', 'day_hour_mode', 'day_hour_std',
       'discomfort_CDH', 'CDH', 'discomfort', 'week', 'cos_week', 'sin_time'],
      dtype='object')

In [502]:
# train_drop_cols = ['energy_consum','building_num'] # day_hour mean 만 남김
train_drop_cols = ['building_num', 'day_of_year', 'energy_consum',
       'hour_mean', 'day_hour_median', 'day_hour_mode', 'day_hour_std',
       'discomfort_CDH', 'CDH', 'discomfort', 'week', 'cos_week', 'sin_time']

test_drop_cols = ['building_num', 'day_of_year',
       'hour_mean', 'day_hour_median', 'day_hour_mode', 'day_hour_std',
       'discomfort_CDH', 'CDH', 'discomfort', 'week', 'cos_week', 'sin_time']

In [503]:
train_x = train_ft_2.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_2['energy_consum']

test_x = test_df_2.drop(columns=test_drop_cols,axis=1)

In [504]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 1), (168, 1), (1872,), (168,))

In [505]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 1
[LightGBM] [Info] Start training from score 1673.842404
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[22]	training's l2: 95549.2	valid_1's l2: 46213.2


In [506]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [507]:
_smape(y_valid, x_pred)

4.505566614361061

In [508]:
model.fit(train_x, train_y)
pred = np.concatenate((y_pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 1
[LightGBM] [Info] Start training from score 1677.016677


(336,)

## 3

In [509]:
train_ft_3.columns

Index(['building_num', 'day_of_year', 'energy_consum', 'day_hour_mean',
       'hour_mean', 'day_hour_median', 'day_hour_mode', 'day_hour_std',
       'discomfort_CDH', 'CDH', 'discomfort', 'week', 'cos_week', 'sin_time'],
      dtype='object')

In [510]:
# train_drop_cols = ['energy_consum','building_num'] # day_hour mean 만 남김
train_drop_cols = ['building_num', 'day_of_year', 'energy_consum',
       'hour_mean', 'day_hour_median', 'day_hour_mode', 'day_hour_std',
       'discomfort_CDH', 'CDH', 'discomfort', 'week', 'cos_week', 'sin_time']

test_drop_cols = ['building_num', 'day_of_year',
       'hour_mean', 'day_hour_median', 'day_hour_mode', 'day_hour_std',
       'discomfort_CDH', 'CDH', 'discomfort', 'week', 'cos_week', 'sin_time']

In [511]:
train_x = train_ft_3.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_3['energy_consum']

test_x = test_df_3.drop(columns=test_drop_cols,axis=1)

In [512]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 1), (168, 1), (1872,), (168,))

In [513]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 1
[LightGBM] [Info] Start training from score 1451.631827
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[18]	training's l2: 112004	valid_1's l2: 74804.6


In [514]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [515]:
_smape(y_valid, x_pred)

6.3977863995370585

In [516]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape



[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 1
[LightGBM] [Info] Start training from score 1444.078501


(504,)

## 4

In [517]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [518]:
train_x = train_ft_4.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_4['energy_consum']

test_x = test_df_4.drop(columns=test_drop_cols,axis=1)

In [519]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [520]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1529
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 992.761106
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[94]	training's l2: 475.407	valid_1's l2: 3435.29


In [521]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [522]:
_smape(y_valid, x_pred)

2.414068309563586

In [523]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 995.425985


(672,)

## 5

In [524]:
train_drop_cols = ['energy_consum','building_num', 'day_of_year']

test_drop_cols = ['building_num', 'day_of_year']

In [525]:
train_x = train_ft_5.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_5['energy_consum']

test_x = test_df_5.drop(columns=test_drop_cols,axis=1)

In [526]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 11), (168, 11), (1872,), (168,))

In [527]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1502
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 11
[LightGBM] [Info] Start training from score 2713.622113
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[33]	training's l2: 47254.4	valid_1's l2: 56347.1


In [528]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [529]:
_smape(y_valid, x_pred)

2.8557702793741018

In [530]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1503
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 11
[LightGBM] [Info] Start training from score 2739.049585


(840,)

## 6

In [531]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [532]:
train_x = train_ft_6.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_6['energy_consum']

test_x = test_df_6.drop(columns=test_drop_cols,axis=1)

In [533]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [534]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1536
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1963.633590
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[37]	training's l2: 4406.16	valid_1's l2: 21298.7


In [535]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [536]:
_smape(y_valid, x_pred)

2.164876055806027

In [537]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1537
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1973.004706


(1008,)

## 7

In [538]:
train_ft.columns

Index(['building_num', 'day_of_year', 'energy_consum', 'day_hour_mean',
       'hour_mean', 'day_hour_median', 'day_hour_mode', 'day_hour_std',
       'discomfort_CDH', 'CDH', 'discomfort', 'week', 'cos_week', 'sin_time'],
      dtype='object')

In [539]:
train_drop_cols = ['energy_consum','building_num', 'day_of_year']

test_drop_cols = ['building_num', 'day_of_year']


In [540]:
train_x = train_ft_7.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_7['energy_consum']

test_x = test_df_7.drop(columns=test_drop_cols,axis=1)

In [541]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 11), (168, 11), (1872,), (168,))

In [542]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1506
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 11
[LightGBM] [Info] Start training from score 780.312692
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[60]	training's l2: 1192.52	valid_1's l2: 5974.21


In [543]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [544]:
_smape(y_valid, x_pred)

3.750607418795652

In [545]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1507
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 11
[LightGBM] [Info] Start training from score 773.160000


(1176,)

## 8

In [546]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [547]:
train_x = train_ft_8.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_8['energy_consum']

test_x = test_df_8.drop(columns=test_drop_cols,axis=1)

In [548]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [549]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1324.573975
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[19]	training's l2: 4963.85	valid_1's l2: 8332.82


In [550]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [551]:
_smape(y_valid, x_pred)

2.5318109561457107

In [552]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1535
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1327.463059


(1344,)

## 9

In [553]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [554]:
train_x = train_ft_9.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_9['energy_consum']

test_x = test_df_1.drop(columns=test_drop_cols,axis=1)

In [555]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [556]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1387
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2328.719743
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[24]	training's l2: 13884.3	valid_1's l2: 15837.4


In [557]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [558]:
_smape(y_valid, x_pred)

2.318074555540704

In [559]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1388
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2339.582588


(1512,)

## 10

In [560]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [561]:
train_x = train_ft_10.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_10['energy_consum']

test_x = test_df_10.drop(columns=test_drop_cols,axis=1)

In [562]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [563]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1531
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 4060.388333
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[5]	training's l2: 144088	valid_1's l2: 189606


In [564]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [565]:
_smape(y_valid, x_pred)

4.575918575207777

In [566]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1532
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 4058.186940


(1680,)

## 11

In [567]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [568]:
train_x = train_ft_11.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_11['energy_consum']

test_x = test_df_11.drop(columns=test_drop_cols,axis=1)

In [569]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [570]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1929.538205
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[29]	training's l2: 3903.52	valid_1's l2: 7161.3


In [571]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [572]:
_smape(y_valid, x_pred)

1.5741015250124935

In [573]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1930.556235


(1848,)

## 12

In [574]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [575]:
train_x = train_ft_12.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_12['energy_consum']

test_x = test_df_12.drop(columns=test_drop_cols,axis=1)

In [576]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [577]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1535
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1435.420865
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[36]	training's l2: 2075.75	valid_1's l2: 8563.85


In [578]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [579]:
_smape(y_valid, x_pred)

2.116755028247976

In [580]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1536
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1441.787117


(2016,)

## 13

In [581]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [582]:
train_x = train_ft_13.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_13['energy_consum']

test_x = test_df_13.drop(columns=test_drop_cols,axis=1)

In [583]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [584]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2529.188590
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[24]	training's l2: 16440.3	valid_1's l2: 33023.4


In [585]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [586]:
_smape(y_valid, x_pred)

2.8319164443171183

In [587]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1535
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2534.988353


(2184,)

## 14

In [588]:
# train_drop_cols = ['energy_consum','building_num']
# test_drop_cols = ['building_num']

train_cols = ['day_hour_mean']
test_cols = ['day_hour_mean']

In [589]:
train_x = train_ft_14[train_cols]
train_y = train_ft_14['energy_consum']

test_x = test_df_14[test_cols]

In [590]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 1), (168, 1), (1872,), (168,))

In [591]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 1
[LightGBM] [Info] Start training from score 1914.809486
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[19]	training's l2: 107511	valid_1's l2: 117120


In [592]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [593]:
_smape(y_valid, x_pred)

7.886121079195993

In [594]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 1
[LightGBM] [Info] Start training from score 1909.025647


(2352,)

## 15

In [595]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [596]:
train_x = train_ft_15.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_15['energy_consum']

test_x = test_df_15.drop(columns=test_drop_cols,axis=1)

In [597]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [598]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1540
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1780.052451
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[29]	training's l2: 2664.04	valid_1's l2: 4573.55


In [599]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [600]:
_smape(y_valid, x_pred)

1.4953615635962605

In [601]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1541
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1779.533823


(2520,)

## 16

In [602]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [603]:
train_x = train_ft_16.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_16['energy_consum']

test_x = test_df_16.drop(columns=test_drop_cols,axis=1)

In [604]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [605]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1537
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 3396.066157
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[22]	training's l2: 36889.7	valid_1's l2: 44644.9


In [606]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [607]:
_smape(y_valid, x_pred)

3.204330332694696

In [608]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 3405.128003


(2688,)

## 17

In [609]:
# train_drop_cols = ['energy_consum','building_num']
# test_drop_cols = ['building_num']

train_cols = ['day_hour_mean', 'discomfort_CDH', 'week', 'day_hour_median', 'discomfort']
test_cols = ['day_hour_mean', 'discomfort_CDH', 'week', 'day_hour_median', 'discomfort']

In [610]:
train_x = train_ft_17[train_cols]
train_y = train_ft_17['energy_consum']

test_x = test_df_17[test_cols]

In [611]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 5), (168, 5), (1872,), (168,))

In [612]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 856
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 5
[LightGBM] [Info] Start training from score 972.798702
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[29]	training's l2: 5014.89	valid_1's l2: 15956.3


In [613]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [614]:
_smape(y_valid, x_pred)

3.639099651099541

In [615]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 857
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 5
[LightGBM] [Info] Start training from score 977.502397


(2856,)

## 18

In [616]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [617]:
train_x = train_ft_18.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_18['energy_consum']

test_x = test_df_18.drop(columns=test_drop_cols,axis=1)

In [618]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [619]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1442.441747
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[52]	training's l2: 2451.56	valid_1's l2: 34304.3


In [620]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [621]:
_smape(y_valid, x_pred)

3.7048205707781876

In [622]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1453.704191


(3024,)

## 19

In [623]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [624]:
train_x = train_ft_19.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_19['energy_consum']

test_x = test_df_19.drop(columns=test_drop_cols,axis=1)

In [625]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [626]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1529
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1412.958597
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[80]	training's l2: 1152.2	valid_1's l2: 64945.4


In [627]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [628]:
_smape(y_valid, x_pred)

5.304662395810789

In [629]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1423.607418


(3192,)

## 20

In [630]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [631]:
train_x = train_ft_20.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_20['energy_consum']

test_x = test_df_20.drop(columns=test_drop_cols,axis=1)

In [632]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [633]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1522
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1713.578463
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[54]	training's l2: 5874.02	valid_1's l2: 87788


In [634]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [635]:
_smape(y_valid, x_pred)

3.5242480221810415

In [636]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1523
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1722.787530


(3360,)

## 21

In [637]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [638]:
train_x = train_ft_21.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_21['energy_consum']

test_x = test_df_21.drop(columns=test_drop_cols,axis=1)

In [639]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [640]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1542
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1980.851889
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[51]	training's l2: 6874.74	valid_1's l2: 46839.6


In [641]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [642]:
_smape(y_valid, x_pred)

3.7784572351456047

In [643]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1543
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1989.104499


(3528,)

## 22

In [644]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [645]:
train_x = train_ft_22.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_22['energy_consum']

test_x = test_df_22.drop(columns=test_drop_cols,axis=1)

In [646]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [647]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1524
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1233.682693
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[26]	training's l2: 2728.62	valid_1's l2: 7506.85


In [648]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [649]:
_smape(y_valid, x_pred)

2.3633307301617728

In [650]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1525
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1235.643177


(3696,)

## 23

In [651]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [652]:
train_x = train_ft_12.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_12['energy_consum']

test_x = test_df_12.drop(columns=test_drop_cols,axis=1)

In [653]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [654]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1535
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1435.420865
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[36]	training's l2: 2075.75	valid_1's l2: 8563.85


In [655]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [656]:
_smape(y_valid, x_pred)

2.116755028247976

In [657]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1536
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1441.787117


(3864,)

## 24

In [658]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [659]:
train_x = train_ft_24.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_24['energy_consum']

test_x = test_df_24.drop(columns=test_drop_cols,axis=1)

In [660]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [661]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 10008.232696
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[65]	training's l2: 26791.1	valid_1's l2: 407361


In [662]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [663]:
_smape(y_valid, x_pred)

1.78977937275151

In [664]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1539
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 10043.469708


(4032,)

## 25

In [665]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [666]:
train_x = train_ft_25.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_25['energy_consum']

test_x = test_df_25.drop(columns=test_drop_cols,axis=1)

In [667]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [668]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1540
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 7239.035765
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[53]	training's l2: 25568	valid_1's l2: 252636


In [669]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [670]:
_smape(y_valid, x_pred)

2.1129540779151554

In [671]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1541
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 7264.323584


(4200,)

## 26

In [672]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [673]:
train_x = train_ft_26.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_26['energy_consum']

test_x = test_df_26.drop(columns=test_drop_cols,axis=1)

In [674]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [675]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1535
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 3524.546410
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[48]	training's l2: 6421.27	valid_1's l2: 65443.7


In [676]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [677]:
_smape(y_valid, x_pred)

1.7442687132000916

In [678]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1536
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 3529.790117


(4368,)

## 27

In [679]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [680]:
train_x = train_ft_27.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_27['energy_consum']

test_x = test_df_27.drop(columns=test_drop_cols,axis=1)

In [681]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [682]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1532
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 17427.762381
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[56]	training's l2: 53368.3	valid_1's l2: 629630


In [683]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [684]:
_smape(y_valid, x_pred)

1.5241064626461576

In [685]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 17453.642143


(4536,)

## 28

In [686]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [687]:
train_x = train_ft_28.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_28['energy_consum']

test_x = test_df_28.drop(columns=test_drop_cols,axis=1)

In [688]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [689]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1531
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1738.069039
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[38]	training's l2: 6779.99	valid_1's l2: 10733.3


In [690]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [691]:
_smape(y_valid, x_pred)

2.4688528211161915

In [692]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1532
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1730.841176


(4704,)

## 29

In [693]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [694]:
train_x = train_ft_29.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_29['energy_consum']

test_x = test_df_29.drop(columns=test_drop_cols,axis=1)

In [695]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [696]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1536
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1450.710978
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[52]	training's l2: 1680.28	valid_1's l2: 13197.1


In [697]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [698]:
_smape(y_valid, x_pred)

2.3240832809865646

In [699]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1537
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1450.082427


(4872,)

## 30

In [700]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [701]:
train_x = train_ft_30.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_30['energy_consum']

test_x = test_df_30.drop(columns=test_drop_cols,axis=1)

In [702]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [703]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1535
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 839.850401
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[48]	training's l2: 1965.09	valid_1's l2: 12333.2


In [704]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [705]:
_smape(y_valid, x_pred)

4.3617305364450925

In [706]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1536
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 841.570368


(5040,)

## 31

In [707]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [708]:
train_x = train_ft_31.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_31['energy_consum']

test_x = test_df_31.drop(columns=test_drop_cols,axis=1)

In [709]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [710]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1536
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1360.217210
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[34]	training's l2: 3297.24	valid_1's l2: 8062.36


In [711]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [712]:
_smape(y_valid, x_pred)

2.5903480871750144

In [713]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1537
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1355.368499


(5208,)

## 32

In [714]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [715]:
train_x = train_ft_32.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_32['energy_consum']

test_x = test_df_32.drop(columns=test_drop_cols,axis=1)

In [716]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [717]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1505
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 9863.655566
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[92]	training's l2: 279.846	valid_1's l2: 5176.9


In [718]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [719]:
_smape(y_valid, x_pred)

0.2869756567290568

In [720]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1506
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 9865.783576


(5376,)

## 33

In [721]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [722]:
train_x = train_ft_33.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_33['energy_consum']

test_x = test_df_33.drop(columns=test_drop_cols,axis=1)

In [723]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [724]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1499
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 8694.078069
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[44]	training's l2: 375.186	valid_1's l2: 1229.01


In [725]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [726]:
_smape(y_valid, x_pred)

0.1566905020719347

In [727]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1500
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 8700.811057


(5544,)

## 34

In [728]:
train_drop_cols = ['energy_consum','building_num', 'day_of_year']

test_drop_cols = ['building_num', 'day_of_year']

In [729]:
train_x = train_ft_34.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_34['energy_consum']

test_x = test_df_34.drop(columns=test_drop_cols,axis=1)

In [730]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 11), (168, 11), (1872,), (168,))

In [731]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1482
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 11
[LightGBM] [Info] Start training from score 3350.502691
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[23]	training's l2: 775.669	valid_1's l2: 1094.53


In [732]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [733]:
_smape(y_valid, x_pred)

0.37480012612475017

In [734]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1483
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 11
[LightGBM] [Info] Start training from score 3352.706646


(5712,)

## 35

In [735]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [736]:
train_x = train_ft_35.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_35['energy_consum']

test_x = test_df_35.drop(columns=test_drop_cols,axis=1)

In [737]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [738]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1481
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2171.357052
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[75]	training's l2: 27.5985	valid_1's l2: 171.098


In [739]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [740]:
_smape(y_valid, x_pred)

0.2488262199935984

In [741]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1482
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2172.365295


(5880,)

## 36

In [742]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [743]:
train_x = train_ft_36.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_36['energy_consum']

test_x = test_df_36.drop(columns=test_drop_cols,axis=1)

In [744]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [745]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1501
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2892.460448
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[44]	training's l2: 83.8226	valid_1's l2: 280.46


In [746]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [747]:
_smape(y_valid, x_pred)

0.22040637744005104

In [748]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1502
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2892.821823


(6048,)

## 37

In [749]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [750]:
train_x = train_ft_37.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_37['energy_consum']

test_x = test_df_37.drop(columns=test_drop_cols,axis=1)

In [751]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [752]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1532
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 3341.783492
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[51]	training's l2: 17570.3	valid_1's l2: 105103


In [753]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [754]:
_smape(y_valid, x_pred)

2.364905795970578

In [755]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 3352.271910


(6216,)

## 38

In [756]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [757]:
train_x = train_ft_38.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_38['energy_consum']

test_x = test_df_38.drop(columns=test_drop_cols,axis=1)

In [758]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [759]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1805.747500
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[98]	training's l2: 3231.99	valid_1's l2: 7175.32


In [760]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [761]:
_smape(y_valid, x_pred)

1.9524974772677588

In [762]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1812.133588


(6384,)

## 39

In [763]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [764]:
train_x = train_ft_39.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_39['energy_consum']

test_x = test_df_39.drop(columns=test_drop_cols,axis=1)

In [765]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [766]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2164.378719
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[99]	training's l2: 1920.54	valid_1's l2: 49968.8


In [767]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [768]:
_smape(y_valid, x_pred)

3.0628340475684097

In [769]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1531
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2170.466354


(6552,)

## 40

In [770]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [771]:
train_x = train_ft_40.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_40['energy_consum']

test_x = test_df_40.drop(columns=test_drop_cols,axis=1)

In [772]:

# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [773]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1537
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1878.003046
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[23]	training's l2: 51835.7	valid_1's l2: 38412


In [774]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [775]:
_smape(y_valid, x_pred)

5.242813534753072

In [776]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1888.743677


(6720,)

## 41

In [777]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [778]:
train_x = train_ft_41.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_41['energy_consum']

test_x = test_df_41.drop(columns=test_drop_cols,axis=1)

In [779]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [780]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 3254.726122
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[78]	training's l2: 8954.16	valid_1's l2: 26470.3


In [781]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [782]:
_smape(y_valid, x_pred)

2.018454865052121

In [783]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1539
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 3272.223971


(6888,)

## 42

In [784]:
# train_drop_cols = ['energy_consum','building_num']

# test_drop_cols = ['building_num']

train_cols = ['day_hour_median']
test_cols = ['day_hour_median']

In [785]:
train_x = train_ft_42[train_cols]
train_y = train_ft_42['energy_consum']

test_x = test_df_42[test_cols]

In [786]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 1), (168, 1), (1872,), (168,))

In [787]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 165
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 1
[LightGBM] [Info] Start training from score 1602.286537
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[100]	training's l2: 69817.4	valid_1's l2: 208335


In [788]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [789]:
_smape(y_valid, x_pred)

5.435041677843239

In [790]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 165
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 1
[LightGBM] [Info] Start training from score 1595.578235


(7056,)

## 43

In [791]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [792]:
train_x = train_ft_43.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_43['energy_consum']

test_x = test_df_43.drop(columns=test_drop_cols,axis=1)

In [793]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [794]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1527
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1885.231895
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[90]	training's l2: 1316.57	valid_1's l2: 5804.94


In [795]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [796]:
_smape(y_valid, x_pred)

2.2500049857101105

In [797]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1528
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1893.269857


(7224,)

## 44

In [798]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [799]:
train_x = train_ft_44.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_44['energy_consum']

test_x = test_df_44.drop(columns=test_drop_cols,axis=1)

In [800]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [801]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1520
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2122.372820
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[31]	training's l2: 6630.62	valid_1's l2: 10404.5


In [802]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [803]:
_smape(y_valid, x_pred)

3.308501718394664

In [804]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1521
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2126.286824


(7392,)

## 45

In [805]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [806]:
train_x = train_ft_45.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_45['energy_consum']

test_x = test_df_45.drop(columns=test_drop_cols,axis=1)

In [807]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [808]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1537
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2942.071024
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[33]	training's l2: 4544.18	valid_1's l2: 8600.76


In [809]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [810]:
_smape(y_valid, x_pred)

1.1134363326324

In [811]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2953.833645


(7560,)

## 46

In [812]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [813]:
train_x = train_ft_46.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_46['energy_consum']

test_x = test_df_46.drop(columns=test_drop_cols,axis=1)

In [814]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [815]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1541
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2242.025000
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[34]	training's l2: 6379.2	valid_1's l2: 32803.7


In [816]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [817]:
_smape(y_valid, x_pred)

2.7436669210097713

In [818]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1542
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2251.192588


(7728,)

## 47

In [819]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [820]:
train_x = train_ft_47.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_47['energy_consum']

test_x = test_df_47.drop(columns=test_drop_cols,axis=1)

In [821]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [822]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 5370.090128
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[48]	training's l2: 11196.4	valid_1's l2: 106057


In [823]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [824]:
_smape(y_valid, x_pred)

2.3348761403894818

In [825]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1539
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 5375.089999


(7896,)

## 48

In [826]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [827]:
train_x = train_ft_48.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_48['energy_consum']

test_x = test_df_48.drop(columns=test_drop_cols,axis=1)

In [828]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [829]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1532
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1876.627309
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[45]	training's l2: 1186.23	valid_1's l2: 3929.55


In [830]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [831]:
_smape(y_valid, x_pred)

1.1301322843134722

In [832]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1884.144177


(8064,)

## 49

In [833]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [834]:
train_x = train_ft_49.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_49['energy_consum']

test_x = test_df_49.drop(columns=test_drop_cols,axis=1)

In [835]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [836]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1535
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 3037.270386
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[44]	training's l2: 2257.14	valid_1's l2: 9586.48


In [837]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [838]:
_smape(y_valid, x_pred)

1.2444804039093795

In [839]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1536
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 3055.167001


(8232,)

## 50

In [840]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [841]:
train_x = train_ft_50.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_50['energy_consum']

test_x = test_df_50.drop(columns=test_drop_cols,axis=1)

In [842]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [843]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 3022.480447
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[30]	training's l2: 3972.84	valid_1's l2: 9346.82


In [844]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [845]:
_smape(y_valid, x_pred)

1.337830206022018

In [846]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1535
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 3030.019999


(8400,)

## 51

In [847]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [848]:
train_x = train_ft_51.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_51['energy_consum']

test_x = test_df_51.drop(columns=test_drop_cols,axis=1)

In [849]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [850]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1537
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2637.836057
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[79]	training's l2: 2066.17	valid_1's l2: 23937.7


In [851]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [852]:
_smape(y_valid, x_pred)

2.1733077004938943

In [853]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2643.130147


(8568,)

## 52

In [854]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [855]:
train_x = train_ft_52.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_52['energy_consum']

test_x = test_df_52.drop(columns=test_drop_cols,axis=1)

In [856]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [857]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2095.868108
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[63]	training's l2: 2810.04	valid_1's l2: 15711.7


In [858]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [859]:
_smape(y_valid, x_pred)

1.8990752613971766

In [860]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2109.743088


(8736,)

## 53

In [861]:
train_cols = ['day_hour_mean']
test_cols = ['day_hour_mean']

In [862]:
train_x = train_ft_53[train_cols]
train_y = train_ft_53['energy_consum']

test_x = test_df_53[test_cols]

In [863]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 1), (168, 1), (1872,), (168,))

In [864]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 1
[LightGBM] [Info] Start training from score 1996.396025
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[60]	training's l2: 228187	valid_1's l2: 51020.3


In [865]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [866]:
_smape(y_valid, x_pred)

4.911926260373919

In [867]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 1
[LightGBM] [Info] Start training from score 2003.826705


(8904,)

## 54

In [868]:
train_cols = ['day_hour_mean']
test_cols = ['day_hour_mean']

In [869]:
train_x = train_ft_54[train_cols]
train_y = train_ft_54['energy_consum']

test_x = test_df_54[test_cols]

In [870]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 1), (168, 1), (1872,), (168,))

In [871]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 1
[LightGBM] [Info] Start training from score 1807.779328
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[34]	training's l2: 124042	valid_1's l2: 96753.2


In [872]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [873]:
_smape(y_valid, x_pred)

8.385730000848415

In [874]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 1
[LightGBM] [Info] Start training from score 1824.230383


(9072,)

## 55

In [875]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [876]:
train_x = train_ft_55.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_55['energy_consum']

test_x = test_df_55.drop(columns=test_drop_cols,axis=1)

In [877]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [878]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1517
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1008.273398
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[46]	training's l2: 94.1202	valid_1's l2: 677.527


In [879]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [880]:
_smape(y_valid, x_pred)

0.9107744707146657

In [881]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1518
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1009.086589


(9240,)

## 56

In [882]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [883]:
train_x = train_ft_56.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_56['energy_consum']

test_x = test_df_56.drop(columns=test_drop_cols,axis=1)

In [884]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [885]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1518
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 4169.519999
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[41]	training's l2: 1266.81	valid_1's l2: 2271.91


In [886]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [887]:
_smape(y_valid, x_pred)

0.47331469894109374

In [888]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1519
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 4169.326059


(9408,)

## 57

In [889]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [890]:
train_x = train_ft_57.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_57['energy_consum']

test_x = test_df_57.drop(columns=test_drop_cols,axis=1)

In [891]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [892]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1899.888556
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[34]	training's l2: 7813.9	valid_1's l2: 17481.7


In [893]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [894]:
_smape(y_valid, x_pred)

2.706628282617147

In [895]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1906.983440


(9576,)

## 58

In [896]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [897]:
train_x = train_ft_58.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_58['energy_consum']

test_x = test_df_58.drop(columns=test_drop_cols,axis=1)

In [898]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [899]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1474
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2880.324870
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[62]	training's l2: 98.2376	valid_1's l2: 415.512


In [900]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [901]:
_smape(y_valid, x_pred)

0.2465109916279197

In [902]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1475
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2882.592939


(9744,)

## 59

In [903]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [904]:
train_x = train_ft_59.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_59['energy_consum']

test_x = test_df_59.drop(columns=test_drop_cols,axis=1)

In [905]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [906]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1523
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1654.332404
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[33]	training's l2: 16252.5	valid_1's l2: 76746.8


In [907]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [908]:
_smape(y_valid, x_pred)

3.369302100439365

In [909]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1524
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1659.545029


(9912,)

## 60

In [910]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [911]:
train_x = train_ft_60.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_60['energy_consum']

test_x = test_df_60.drop(columns=test_drop_cols,axis=1)

In [912]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [913]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1536
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2930.641282
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[34]	training's l2: 15400.3	valid_1's l2: 33594.2


In [914]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [915]:
_smape(y_valid, x_pred)

2.2451911889492115

In [916]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1537
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2939.409882


(10080,)

## 61

In [917]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [918]:
train_x = train_ft_61.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_61['energy_consum']

test_x = test_df_61.drop(columns=test_drop_cols,axis=1)

In [919]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [920]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1539
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 3106.595767
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[32]	training's l2: 13590.4	valid_1's l2: 38936


In [921]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [922]:
_smape(y_valid, x_pred)

2.226324994834431

In [923]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1540
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 3112.799292


(10248,)

## 62

In [924]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [925]:
train_x = train_ft_62.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_62['energy_consum']

test_x = test_df_62.drop(columns=test_drop_cols,axis=1)

In [926]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [927]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1144.295578
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[51]	training's l2: 830.353	valid_1's l2: 6508.15


In [928]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [929]:
_smape(y_valid, x_pred)

2.3529655731355197

In [930]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1539
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1147.330854


(10416,)

## 63

In [931]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [932]:
train_x = train_ft_63.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_63['energy_consum']

test_x = test_df_63.drop(columns=test_drop_cols,axis=1)

In [933]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [934]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 752.697741
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[97]	training's l2: 210.461	valid_1's l2: 2539.64


In [935]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [936]:
_smape(y_valid, x_pred)

2.7093836631448136

In [937]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1535
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 753.029956


(10584,)

## 64

In [938]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [939]:
train_x = train_ft_64.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_64['energy_consum']

test_x = test_df_64.drop(columns=test_drop_cols,axis=1)

In [940]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [941]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1180.797115
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[79]	training's l2: 417.486	valid_1's l2: 5402.03


In [942]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [943]:
_smape(y_valid, x_pred)

2.3516206806703934

In [944]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1183.805471


(10752,)

## 65

In [945]:
train_cols = ['day_hour_mean',
 'discomfort_CDH',
 'discomfort',
 'CDH',
 'day_hour_mode',
 'hour_mean',
 'day_hour_std',
 'day_hour_median']

test_cols = ['day_hour_mean',
 'discomfort_CDH',
 'discomfort',
 'CDH',
 'day_hour_mode',
 'hour_mean',
 'day_hour_std',
 'day_hour_median']

In [946]:
train_x = train_ft_65[train_cols]
train_y = train_ft_65['energy_consum']

test_x = test_df_65[test_cols]

In [947]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 8), (168, 8), (1872,), (168,))

In [948]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1456
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 8
[LightGBM] [Info] Start training from score 567.170097
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[19]	training's l2: 3669.67	valid_1's l2: 7010.49


In [949]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [950]:
_smape(y_valid, x_pred)

4.959066619448647

In [951]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1456
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 8
[LightGBM] [Info] Start training from score 574.458530


(10920,)

## 66

In [952]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [953]:
train_x = train_ft_66.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_66['energy_consum']

test_x = test_df_66.drop(columns=test_drop_cols,axis=1)

In [954]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [955]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1522
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 396.547884
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[100]	training's l2: 31.4345	valid_1's l2: 360.068


In [956]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [957]:
_smape(y_valid, x_pred)

1.7049675498011823

In [958]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1523
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 397.623882


(11088,)

## 67 - THI추가필요

In [959]:
train_cols = ['day_hour_mean',
 'discomfort',
 'discomfort_CDH',
 'CDH',
#  'THI',
 'day_hour_median',
 'day_hour_mode']

test_cols = ['day_hour_mean',
 'discomfort',
 'discomfort_CDH',
 'CDH',
#  'THI',
 'day_hour_median',
 'day_hour_mode']


In [960]:
train_x = train_ft_67[train_cols]
train_y = train_ft_67['energy_consum']

test_x = test_df_67[test_cols]

In [961]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 6), (168, 6), (1872,), (168,))

In [962]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1263
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 6
[LightGBM] [Info] Start training from score 1282.730320
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[10]	training's l2: 21053.6	valid_1's l2: 21195.5


In [963]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [964]:
_smape(y_valid, x_pred)

5.437220961959613

In [965]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1263
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 6
[LightGBM] [Info] Start training from score 1279.817470


(11256,)

## 68

In [966]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [967]:
train_x = train_ft_68.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_68['energy_consum']

test_x = test_df_68.drop(columns=test_drop_cols,axis=1)

In [968]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [969]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1539
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2021.266506
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[48]	training's l2: 2453.24	valid_1's l2: 20250.3


In [970]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [971]:
_smape(y_valid, x_pred)

2.411900911097016

In [972]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1540
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2023.139558


(11424,)

## 69

In [973]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [974]:
train_x = train_ft_69.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_69['energy_consum']

test_x = test_df_69.drop(columns=test_drop_cols,axis=1)

In [975]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [976]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1537
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 4482.963461
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[45]	training's l2: 13940.1	valid_1's l2: 87445


In [977]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [978]:
_smape(y_valid, x_pred)

1.942005988308884

In [979]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 4491.791028


(11592,)

## 70

In [980]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [981]:
train_x = train_ft_70.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_70['energy_consum']

test_x = test_df_70.drop(columns=test_drop_cols,axis=1)

In [982]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [983]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1541
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 4269.895393
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[43]	training's l2: 15531.9	valid_1's l2: 152640


In [984]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [985]:
_smape(y_valid, x_pred)

2.099338369699532

In [986]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1542
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 4289.986327


(11760,)

## 71

In [987]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [988]:
train_x = train_ft_71.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_71['energy_consum']

test_x = test_df_71.drop(columns=test_drop_cols,axis=1)

In [989]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [990]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1537
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2129.925259
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[64]	training's l2: 4311.39	valid_1's l2: 26461.4


In [991]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [992]:
_smape(y_valid, x_pred)

2.3601528167914285

In [993]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2141.101649


(11928,)

## 72

In [994]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [995]:
train_x = train_ft_72.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_72['energy_consum']

test_x = test_df_72.drop(columns=test_drop_cols,axis=1)

In [996]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [997]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1520
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1268.978463
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[35]	training's l2: 687.986	valid_1's l2: 7469.34


In [998]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [999]:
_smape(y_valid, x_pred)

2.159448101634035

In [1000]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1521
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1278.976766


(12096,)

## 73

In [1001]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1002]:
train_x = train_ft_73.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_73['energy_consum']

test_x = test_df_73.drop(columns=test_drop_cols,axis=1)

In [1003]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1004]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1532
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 3617.683331
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[25]	training's l2: 9331.77	valid_1's l2: 28565.1


In [1005]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1006]:
_smape(y_valid, x_pred)

1.8038365758012584

In [1007]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 3631.879998


(12264,)

## 74

In [1008]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1009]:
train_x = train_ft_74.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_74['energy_consum']

test_x = test_df_74.drop(columns=test_drop_cols,axis=1)

In [1010]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1011]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1532
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 4002.030391
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[59]	training's l2: 23669.2	valid_1's l2: 344135


In [1012]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1013]:
_smape(y_valid, x_pred)

3.990469341193559

In [1014]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 4027.799300


(12432,)

## 75

In [1015]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1016]:
train_x = train_ft_75.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_75['energy_consum']

test_x = test_df_75.drop(columns=test_drop_cols,axis=1)

In [1017]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1018]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1537
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1404.004871
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[27]	training's l2: 6533.17	valid_1's l2: 7768.49


In [1019]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1020]:
_smape(y_valid, x_pred)

1.9801131129552414

In [1021]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1408.176470


(12600,)

## 76

In [1022]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1023]:
train_x = train_ft_76.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_76['energy_consum']

test_x = test_df_76.drop(columns=test_drop_cols,axis=1)

In [1024]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1025]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1194.931347
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[15]	training's l2: 5358.7	valid_1's l2: 5724.42


In [1026]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1027]:
_smape(y_valid, x_pred)

2.202308241556297

In [1028]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1539
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1200.375795


(12768,)

## 77

In [1029]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1030]:
train_x = train_ft_77.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_77['energy_consum']

test_x = test_df_77.drop(columns=test_drop_cols,axis=1)

In [1031]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1032]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1480.807501
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[33]	training's l2: 2871.69	valid_1's l2: 15079.7


In [1033]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1034]:
_smape(y_valid, x_pred)

2.1473506247361884

In [1035]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1531
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1485.077648


(12936,)

## 78

In [1036]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1037]:
train_x = train_ft_78.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_78['energy_consum']

test_x = test_df_78.drop(columns=test_drop_cols,axis=1)

In [1038]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1039]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1525
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1639.676794
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[48]	training's l2: 2782.24	valid_1's l2: 21177.5


In [1040]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1041]:
_smape(y_valid, x_pred)

2.3334290754333002

In [1042]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1526
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1648.894469


(13104,)

## 79



In [1043]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1044]:
train_x = train_ft_79.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_79['energy_consum']

test_x = test_df_79.drop(columns=test_drop_cols,axis=1)

In [1045]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1046]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1539
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2882.323458
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[76]	training's l2: 2197.62	valid_1's l2: 39494.8


In [1047]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1048]:
_smape(y_valid, x_pred)

1.5832086098710378

In [1049]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1540
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2892.282939


(13272,)

## 80

In [1050]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1051]:
train_x = train_ft_80.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_80['energy_consum']

test_x = test_df_80.drop(columns=test_drop_cols,axis=1)

In [1052]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1053]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1529
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2536.472883
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[51]	training's l2: 9811.94	valid_1's l2: 102206


In [1054]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1055]:
_smape(y_valid, x_pred)

2.905868087015731

In [1056]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2556.397939


(13440,)

## 81

In [1057]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [1058]:
train_x = train_ft_81.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_81['energy_consum']

test_x = test_df_81.drop(columns=test_drop_cols,axis=1)

In [1059]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1060]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1440.172499
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[87]	training's l2: 1418.63	valid_1's l2: 5976.18


In [1061]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1062]:
_smape(y_valid, x_pred)

1.779869038578093

In [1063]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1446.166058


(13608,)

## 82

In [1064]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [1065]:
train_x = train_ft_82.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_82['energy_consum']

test_x = test_df_82.drop(columns=test_drop_cols,axis=1)

In [1066]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1067]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1526
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2273.020383
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[49]	training's l2: 6818.57	valid_1's l2: 59512.3


In [1068]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1069]:
_smape(y_valid, x_pred)

3.714889374976349

In [1070]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1527
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2288.431411


(13776,)

## 83

In [1071]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1072]:
train_x = train_ft_83.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_83['energy_consum']

test_x = test_df_83.drop(columns=test_drop_cols,axis=1)

In [1073]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1074]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1525
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1437.338622
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[44]	training's l2: 4346.56	valid_1's l2: 48075.9


In [1075]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1076]:
_smape(y_valid, x_pred)

3.3243368845820855

In [1077]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1526
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1445.103677


(13944,)

## 84

In [1078]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1079]:
train_x = train_ft_84.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_84['energy_consum']

test_x = test_df_84.drop(columns=test_drop_cols,axis=1)

In [1080]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1081]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1528
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1714.789232
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[53]	training's l2: 2405.02	valid_1's l2: 41165.1


In [1082]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1083]:
_smape(y_valid, x_pred)

2.534457441072522

In [1084]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1529
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1721.985412


(14112,)

## 85

In [1085]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1086]:
train_x = train_ft_85.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_85['energy_consum']

test_x = test_df_85.drop(columns=test_drop_cols,axis=1)

In [1087]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1088]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1539
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2246.245191
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[91]	training's l2: 1111.94	valid_1's l2: 9637.25


In [1089]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1090]:
_smape(y_valid, x_pred)

1.6727484251035047

In [1091]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1540
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2258.645116


(14280,)

## 86

In [1092]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1093]:
train_x = train_ft_86.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_86['energy_consum']

test_x = test_df_86.drop(columns=test_drop_cols,axis=1)

In [1094]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1095]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1143.796826
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[99]	training's l2: 955.295	valid_1's l2: 12354.9


In [1096]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1097]:
_smape(y_valid, x_pred)

3.808028759782374

In [1098]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1149.326646


(14448,)

## 87

In [1099]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1100]:
train_x = train_ft_87.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_87['energy_consum']

test_x = test_df_87.drop(columns=test_drop_cols,axis=1)

In [1101]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1102]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1274.642068
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[22]	training's l2: 15884.8	valid_1's l2: 23108


In [1103]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1104]:
_smape(y_valid, x_pred)

4.6958632689907915

In [1105]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1539
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1289.623369


(14616,)

## 88

In [1106]:
train_cols = ['day_hour_mean']

test_cols = ['day_hour_mean']

In [1107]:
train_x = train_ft_88[train_cols]
train_y = train_ft_88['energy_consum']

test_x = test_df_88[test_cols]

In [1108]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 1), (168, 1), (1872,), (168,))

In [1109]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 1
[LightGBM] [Info] Start training from score 1190.265915
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[60]	training's l2: 40718.7	valid_1's l2: 34779.6


In [1110]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1111]:
_smape(y_valid, x_pred)

3.5338355546784537

In [1112]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 1
[LightGBM] [Info] Start training from score 1195.243016


(14784,)

## 89

In [1113]:
train_cols = ['day_hour_mean']

test_cols = ['day_hour_mean']

In [1114]:
train_x = train_ft_89[train_cols]
train_y = train_ft_89['energy_consum']

test_x = test_df_89[test_cols]

In [1115]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 1), (168, 1), (1872,), (168,))

In [1116]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 1
[LightGBM] [Info] Start training from score 1426.763717
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[61]	training's l2: 80690.7	valid_1's l2: 65846.5


In [1117]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1118]:
_smape(y_valid, x_pred)

3.832730721446017

In [1119]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 1
[LightGBM] [Info] Start training from score 1435.096470


(14952,)

## 90

In [1120]:
train_cols = ['day_hour_mean']

test_cols = ['day_hour_mean']


In [1121]:
train_x = train_ft_90.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_90['energy_consum']

test_x = test_df_90.drop(columns=test_drop_cols,axis=1)

In [1122]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1123]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1375.696412
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[30]	training's l2: 19828.3	valid_1's l2: 65215.2


In [1124]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1125]:
_smape(y_valid, x_pred)

4.535332153727575

In [1126]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1535
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1386.890825


(15120,)

## 91 - THI추가 필요

In [1127]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [1128]:
train_x = train_ft_91.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_91['energy_consum']

test_x = test_df_91.drop(columns=test_drop_cols,axis=1)

In [1129]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1130]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1320.696409
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[28]	training's l2: 9291.18	valid_1's l2: 44652


In [1131]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1132]:
_smape(y_valid, x_pred)

6.705576717515309

In [1133]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1326.801763


(15288,)

## 92

In [1134]:
train_cols = ['day_hour_mean', 'discomfort_CDH', 'hour', 'week', 'CDH']


test_cols = ['day_hour_mean', 'discomfort_CDH', 'hour', 'week', 'CDH']

In [1135]:
train_x = train_df_92[train_cols]
train_y = train_df_92['energy_consum']

test_x = test_df_full_92[test_cols]

In [1136]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 5), (168, 5), (1872,), (168,))

In [1137]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 716
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 5
[LightGBM] [Info] Start training from score 1286.057501
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[100]	training's l2: 2218.73	valid_1's l2: 37654.5


In [1138]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1139]:
_smape(y_valid, x_pred)

3.77981441146232

In [1140]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 717
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 5
[LightGBM] [Info] Start training from score 1285.452795


(15456,)

## 93

In [1141]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']


In [1142]:
train_x = train_ft_93.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_93['energy_consum']

test_x = test_df_93.drop(columns=test_drop_cols,axis=1)

In [1143]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1144]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1534
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1099.958654
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[49]	training's l2: 781.564	valid_1's l2: 7659.4


In [1145]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1146]:
_smape(y_valid, x_pred)

3.1008900341213526

In [1147]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1535
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1105.749706


(15624,)

## 94

In [1148]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1149]:
train_x = train_ft_94.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_94['energy_consum']

test_x = test_df_94.drop(columns=test_drop_cols,axis=1)

In [1150]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1151]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1537
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2442.008075
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[23]	training's l2: 16777.6	valid_1's l2: 53238.6


In [1152]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1153]:
_smape(y_valid, x_pred)

3.6452202661818665

In [1154]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2457.945881


(15792,)

## 95

In [1155]:
train_cols = ['day_hour_mean']

test_cols = ['day_hour_mean']


In [1156]:
train_x = train_ft_95[train_cols]
train_y = train_ft_95['energy_consum']

test_x = test_df_95[test_cols]

In [1157]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 1), (168, 1), (1872,), (168,))

In [1158]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 1
[LightGBM] [Info] Start training from score 1111.251442
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[43]	training's l2: 48600.9	valid_1's l2: 33848.7


In [1159]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1160]:
_smape(y_valid, x_pred)

6.843005535263373

In [1161]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 169
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 1
[LightGBM] [Info] Start training from score 1112.192735


(15960,)

## 96

In [1162]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1163]:
train_x = train_ft_96.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_96['energy_consum']

test_x = test_df_96.drop(columns=test_drop_cols,axis=1)

In [1164]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1165]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1539
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 2797.106571
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[34]	training's l2: 5759.19	valid_1's l2: 19643.1


In [1166]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1167]:
_smape(y_valid, x_pred)

1.9100433899380462

In [1168]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape


[LightGBM] [Info] Total Bins 1540
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 2808.038089


(16128,)

## 97

In [1169]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1170]:
train_x = train_ft_97.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_97['energy_consum']

test_x = test_df_97.drop(columns=test_drop_cols,axis=1)

In [1171]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1172]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1539
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1223.227500
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[26]	training's l2: 3179.13	valid_1's l2: 8135.91


In [1173]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1174]:
_smape(y_valid, x_pred)

2.4184091338002043

In [1175]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1540
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1235.922176


(16296,)

## 98 - THI 추가 필요

In [1176]:
train_cols = ['day_hour_mean',
 'discomfort_CDH',
 'day_hour_median',
 'temp',
#  'THI',
 'discomfort',
 'day_hour_mode']

test_cols = ['day_hour_mean',
 'discomfort_CDH',
 'day_hour_median',
 'temp',
#  'THI',
 'discomfort',
 'day_hour_mode']


In [1177]:
test_df_full_98.columns

Index(['num_date_time', 'building_num', 'date_time', 'temp', 'precip',
       'wind_ms', 'humidity', 'building_type', 'floor_area', 'cooling_area',
       'solar_capa', 'ess_capa', 'pcs_capa', 'hour', 'day', 'month', 'week',
       'day_hour_mean', 'discomfort', 'discomfort_CDH', 'day_hour_median',
       'day_hour_mode', 'day_hour_std', 'sin_time', 'hour_mean', 'CDH',
       'day_of_year', 'cos_week'],
      dtype='object')

In [1178]:
train_x = train_df_98[train_cols]
train_y = train_df_98['energy_consum']

test_x = test_df_full_98[test_cols]

In [1179]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 6), (168, 6), (1872,), (168,))

In [1180]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1190
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 6
[LightGBM] [Info] Start training from score 1133.716153
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[72]	training's l2: 3093.61	valid_1's l2: 27508.1


In [1181]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1182]:
_smape(y_valid, x_pred)

6.44668877872727

In [1183]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1191
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 6
[LightGBM] [Info] Start training from score 1136.114912


(16464,)

## 99

In [1184]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1185]:
train_x = train_ft_99.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_99['energy_consum']

test_x = test_df_99.drop(columns=test_drop_cols,axis=1)

In [1186]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1187]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1537
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 1159.202211
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[26]	training's l2: 1741.01	valid_1's l2: 3239.88


In [1188]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1189]:
_smape(y_valid, x_pred)

2.195777476085757

In [1190]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1538
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 1159.884794


(16632,)

## 100

In [1191]:
train_drop_cols = ['energy_consum','building_num']

test_drop_cols = ['building_num']

In [1192]:
train_x = train_ft_100.drop(columns=train_drop_cols,axis=1)
train_y = train_ft_100['energy_consum']

test_x = test_df_100.drop(columns=test_drop_cols,axis=1)

In [1193]:
# 마지막 7일을 검증셋으로 설정
y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = 168)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

((1872, 12), (168, 12), (1872,), (168,))

In [1194]:
model = LGBMRegressor(random_state=SEED, objective='regression', force_row_wise=True)
model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(300)])

[LightGBM] [Info] Total Bins 1343
[LightGBM] [Info] Number of data points in the train set: 1872, number of used features: 12
[LightGBM] [Info] Start training from score 820.772565
Training until validation scores don't improve for 300 rounds
Did not meet early stopping. Best iteration is:
[88]	training's l2: 747.521	valid_1's l2: 3282.08


In [1195]:
x_pred = model.predict(x_valid)
x_pred = pd.Series(x_pred)

In [1196]:
_smape(y_valid, x_pred)

2.631401075268324

In [1197]:
model.fit(train_x, train_y)
pred = np.concatenate((pred, model.predict(test_x)))
pred.shape

[LightGBM] [Info] Total Bins 1344
[LightGBM] [Info] Number of data points in the train set: 2040, number of used features: 12
[LightGBM] [Info] Start training from score 824.705177


(16800,)

In [1198]:
test_ft.shape

(16800, 13)

In [1199]:
len(pred)

16800

In [1200]:
submission['answer'] = pred

In [1208]:
submission1 = submission['answer']

In [1213]:
submission.to_csv('submission_ys.csv', index=False)