In [1]:
!pip install sktime



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import pandas as pd
import numpy as np
import torch
from tqdm.auto import tqdm
import random
import os

def reset_seeds(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

DATA_PATH = '/content/drive/MyDrive/데이콘 캐글 컴페티션/DACON_23.07_PowerConsumption/'
SEED = 42

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [4]:
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
import sktime
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.utils.plotting import plot_series
import matplotlib.pyplot as pl24
from sklearn.metrics import make_scorer
from lightgbm import early_stopping
import xgboost as xgb
import seaborn as sns

In [5]:
def _smape(true, pred):
    true = np.array(true)
    pred = np.array(pred)
    v = 2 * np.abs(pred - true) / (np.abs(pred) + np.abs(true))
    output = np.mean(v) * 100
    return output
smape = make_scorer(_smape, greater_is_better=False)


def lightgbm_smape(y_true, y_pred):
    smape_val = _smape(y_true, y_pred)
    return 'SMAPE', smape_val, False

In [6]:
train = pd.read_csv(f"{DATA_PATH}train.csv")
test = pd.read_csv(f"{DATA_PATH}test.csv")
# building = pd.read_csv(f"{DATA_PATH}building_info.csv")
submission = pd.read_csv(f"{DATA_PATH}sample_submission.csv")

In [7]:

train_ft = pd.read_csv(f'{DATA_PATH}train_ver3.csv') #ver 4 미완성본

test_ft = pd.read_csv(f'{DATA_PATH}test_ver3.csv')



# train_ft = pd.read_csv(f"{DATA_PATH}train_feature9.csv")
# test_ft = pd.read_csv(f"{DATA_PATH}test_feature9.csv")


# train_ft = pd.read_csv(f"{DATA_PATH}train_selected_3.csv")
# test_ft = pd.read_csv(f"{DATA_PATH}test_selected_3.csv")

# df = pd.read_csv(f"{DATA_PATH}train_featured_3.csv")


In [8]:
# cols = ['day_hour_mean', 'day_hour_median', 'discomfort_CDH', 'week',
#        'day_hour_mode', 'discomfort', 'day_hour_std', 'CDH', 'hour_mean',
#        'sin_time', 'cos_week','building_num','energy_consum']

# train_ft = df[cols]

- 피처추가

In [9]:
train_ft.shape, test_ft.shape

((201600, 31), (16800, 28))

In [10]:
train_ft.columns

cols = ['energy_consum', 'building_num', 'date_time', 'temp', 'precip', 'wind', 'hum',
       'sunshine', 'solar_rad', 'building_type', 'time', 'day', 'month', 'week',
       'day_of_year', 'sin_time', 'cos_time', 'temp_RM', 'wind_RM', 'hum_RM',
       'temp_EMA', 'wind_EMA', 'hum_EMA', 'THI', 'feels_temp', 'CDH',
       'hour_mean', 'day_hour_mean', 'hour_std', 'heat_index', 'heat_wave']


train_ft.columns = cols

In [11]:
test_ft.columns

cols = ['building_num', 'date_time', 'temp', 'precip', 'wind', 'hum', 'building_type',
       'time', 'day', 'month', 'week', 'day_of_year', 'sin_time', 'cos_time',
       'temp_RM', 'wind_RM', 'hum_RM', 'temp_EMA', 'wind_EMA', 'hum_EMA',
       'THI', 'feels_temp', 'CDH', 'hour_mean', 'day_hour_mean', 'hour_std',
       'heat_index', 'heat_wave']

test_ft.columns = cols

In [12]:
train_ft.isna().sum().sum(), test_ft.isna().sum().sum()

(0, 0)

In [13]:
train_ft.shape, test_ft.shape

((201600, 31), (16800, 28))

In [14]:
cols = ['sunshine', 'solar_rad']
train_ft = train_ft.drop(columns=cols)

요일별 6시간 전 대비 전력 변화율

In [15]:
train_ft.day

0         3
1         3
2         3
3         3
4         3
         ..
201595    2
201596    2
201597    2
201598    2
201599    2
Name: day, Length: 201600, dtype: int64

In [16]:
# train_ft['power_change_rate'] = (train_ft['energy_consum'] - train_ft.groupby(['building_num', train_ft.day])['energy_consum'].shift(6)) / train_ft.groupby(['building_num', train_ft.day])['energy_consum'].shift(6)


In [17]:
# train_ft['day_within_cycle'] = train_ft.index % (7*24)

# periodic_avg_power_change_rate = train_ft.groupby(['building_num', 'day_within_cycle'])['power_change_rate'].mean().reset_index()
# periodic_avg_power_change_rate.rename(columns={'power_change_rate': 'periodic_avg_power_change_rate'}, inplace=True)

# train_ft = train_ft.merge(periodic_avg_power_change_rate, on=['building_num', 'day_within_cycle'], how='left')

In [18]:
# cols = ['day_within_cycle', 'power_change_rate']
# train_ft = train_ft.drop(columns=cols)

# train_ft['periodic_avg_power_change_rate'].nunique()

# last_rows = train_ft.groupby('building_num').tail(28)['periodic_avg_power_change_rate'].reset_index().drop('index', axis=1)
# test_ft = pd.concat([test_ft, last_rows], axis=1)

In [19]:
# train_ft['power_change_rate'] = (train_ft['energy_consum'] - train_ft.groupby(['building_num', train_ft.day])['energy_consum'].shift(6)) / train_ft.groupby(['building_num', train_ft.day])['energy_consum'].shift(6)

# train_ft['day_within_cycle'] = train_ft.index % (7*24)

# periodic_avg_power_change_rate = train_ft.groupby(['building_num', 'day_within_cycle'])['power_change_rate'].mean().reset_index()
# periodic_avg_power_change_rate.rename(columns={'power_change_rate': 'periodic_avg_power_change_rate'}, inplace=True)

# train_ft = train_ft.merge(periodic_avg_power_change_rate, on=['building_num', 'day_within_cycle'], how='left')

# cols = ['day_within_cycle', 'power_change_rate']
# train_ft = train_ft.drop(columns=cols)

# train_ft['periodic_avg_power_change_rate'].nunique()

# last_rows = train_ft.groupby('building_num').tail(28)['periodic_avg_power_change_rate'].reset_index().drop('index', axis=1)
# test_ft = pd.concat([test_ft, last_rows], axis=1)


In [20]:
train_ft.columns

Index(['energy_consum', 'building_num', 'date_time', 'temp', 'precip', 'wind',
       'hum', 'building_type', 'time', 'day', 'month', 'week', 'day_of_year',
       'sin_time', 'cos_time', 'temp_RM', 'wind_RM', 'hum_RM', 'temp_EMA',
       'wind_EMA', 'hum_EMA', 'THI', 'feels_temp', 'CDH', 'hour_mean',
       'day_hour_mean', 'hour_std', 'heat_index', 'heat_wave'],
      dtype='object')

#

In [21]:
# train_ft['day_hour_mean_log'] = train_ft['day_hour_mean'].apply(np.log)
# test_ft['day_hour_mean_log'] = test_ft['day_hour_mean'].apply(np.log)

In [22]:
# time_intervals = pd.cut(train_ft['time'], bins=[0, 6, 12, 18, 24], labels=['00-06', '06-12', '12-18', '18-24'])

# # Group data by building_num, day, and time intervals, and calculate the cumulative mean day hour mean
# train_ft['building_day_time_cummean'] = train_ft.groupby(['building_num', 'day', time_intervals])['day_hour_mean'].cumsum()

In [23]:
# train_ft['building_day_time_cummean']

In [24]:
# time_intervals = pd.cut(test_ft['time'], bins=[0, 6, 12, 18, 24], labels=['00-06', '06-12', '12-18', '18-24'])

# # Group data by building_num, day, and time intervals, and calculate the cumulative mean day hour mean
# test_ft['building_day_time_cummean'] = test_ft.groupby(['building_num', 'day', time_intervals])['day_hour_mean'].cumsum()

In [25]:
train_ft['temp_EMA']

0         22.027950
1         21.346727
2         20.708769
3         20.168959
4         19.712196
            ...    
201595    24.273233
201596    23.985044
201597    23.571960
201598    23.176274
201599    22.795309
Name: temp_EMA, Length: 201600, dtype: float64


7일 주기, 7일전 대비 전력 변화율

In [26]:
train_ft['day_hour_mean'].nunique() # 168개 고유값. 건물별로.

16425

In [27]:
train_ft['power_change_rate'] = (train_ft['energy_consum'] - train_ft.groupby('building_num')['energy_consum'].shift(7*24)) / train_ft.groupby('building_num')['energy_consum'].shift(7*24)

train_ft['day_within_cycle'] = train_ft.index % (7*24)

periodic_avg_power_change_rate = train_ft.groupby(['building_num','day_within_cycle' ])['power_change_rate'].mean().reset_index()
periodic_avg_power_change_rate.rename(columns={'power_change_rate': 'periodic_avg_power_change_rate'}, inplace=True)

train_ft = train_ft.merge(periodic_avg_power_change_rate, on=['building_num','day_within_cycle'], how='left')
cols = ['day_within_cycle','power_change_rate']
train_ft = train_ft.drop(columns = cols)

train_ft['periodic_avg_power_change_rate'].nunique()

last_rows = train_ft.groupby('building_num').tail(168)['periodic_avg_power_change_rate'].reset_index().drop('index',axis=1)
test_ft = pd.concat([test_ft, last_rows], axis=1)

In [28]:
train_ft['power_change_rate_6'] = (train_ft['energy_consum'] - train_ft.groupby('building_num')['energy_consum'].shift(24)) / train_ft.groupby('building_num')['energy_consum'].shift(24)

train_ft['day_within_cycle_6'] = train_ft.index % (24)

periodic_avg_power_change_rate = train_ft.groupby(['building_num','day_within_cycle_6' ])['power_change_rate_6'].mean().reset_index()
periodic_avg_power_change_rate.rename(columns={'power_change_rate_6': 'periodic_avg_power_change_rate_6'}, inplace=True)

train_ft = train_ft.merge(periodic_avg_power_change_rate, on=['building_num','day_within_cycle_6'], how='left')
cols = ['day_within_cycle_6','power_change_rate_6']
train_ft = train_ft.drop(columns = cols)

train_ft['periodic_avg_power_change_rate_6'].nunique()

last_rows = train_ft.groupby('building_num').tail(168)['periodic_avg_power_change_rate_6'].reset_index().drop('index',axis=1)
test_ft = pd.concat([test_ft, last_rows], axis=1)

In [29]:
pd.DataFrame(train_ft['periodic_avg_power_change_rate_6'])

Unnamed: 0,periodic_avg_power_change_rate_6
0,0.021710
1,0.023154
2,0.025346
3,0.030905
4,0.033128
...,...
201595,0.004240
201596,0.004927
201597,0.006987
201598,0.008615


- 건물별 요일별 시간별 표준편차 (기온, 풍속의 곱)
--> 기온이 평균과 멀어질수록. 풍속이 평균과 멀어질수록.?

In [30]:
# # 요일별 시간별 전력량 평균 추가
# wind_day_hour_std = pd.pivot_table(train_ft, values = 'wind_RM', index = ['building_num', 'time', 'day'], aggfunc = np.std).reset_index()

# train_ft['wind_day_hour_std'] = train_ft.apply(lambda x : wind_day_hour_std.loc[(wind_day_hour_std.building_num == x['building_num']) & (wind_day_hour_std.time == x['time']) & (wind_day_hour_std.day == x['day']) ,'wind_RM'].values[0], axis = 1)
# test_ft['wind_day_hour_std'] = test_ft.apply(lambda x : wind_day_hour_std.loc[(wind_day_hour_std.building_num == x['building_num']) & (wind_day_hour_std.time == x['time']) & (wind_day_hour_std.day == x['day']) ,'wind_RM'].values[0], axis = 1)

In [31]:
# train_ft['power_change_rate'] = (train_ft['energy_consum'] - train_ft.groupby('building_num')['energy_consum'].shift(6)) / train_ft.groupby('building_num')['energy_consum'].shift(6)

# train_ft['day_within_cycle'] = train_ft.index % (6)

# periodic_avg_power_change_rate = train_ft.groupby(['building_num','day_within_cycle' ])['power_change_rate'].mean().reset_index()
# periodic_avg_power_change_rate.rename(columns={'power_change_rate': 'periodic_avg_power_change_rate_6h'}, inplace=True)

# train_ft = train_ft.merge(periodic_avg_power_change_rate, on=['building_num','day_within_cycle'], how='left')
# cols = ['day_within_cycle','periodic_avg_power_change_rate_6h']
# train_ft = train_ft.drop(columns = cols)

# # train_ft['periodic_avg_power_change_rate_6h'].nunique()

# last_rows = train_ft.groupby('building_num').tail(168)['periodic_avg_power_change_rate_6h'].reset_index().drop('index',axis=1)
# test_ft = pd.concat([test_ft, last_rows], axis=1)

In [32]:
train_ft

Unnamed: 0,energy_consum,building_num,date_time,temp,precip,wind,hum,building_type,time,day,...,THI,feels_temp,CDH,hour_mean,day_hour_mean,hour_std,heat_index,heat_wave,periodic_avg_power_change_rate,periodic_avg_power_change_rate_6
0,1034.40,1,20220602 00,18.1,0.0,0.4,67.0,건물기타,0,3,...,69.672872,28.818026,-1.850000,1713.711429,1627.80,444.306331,0.0,0,0.069171,0.021710
1,973.92,1,20220602 01,17.6,0.0,0.8,69.0,건물기타,1,3,...,68.942223,27.987268,-4.591667,1629.468571,1550.08,437.718795,0.0,0,0.072765,0.023154
2,909.12,1,20220602 02,17.2,0.0,0.7,71.0,건물기타,2,3,...,68.121589,27.080175,-8.266667,1513.305714,1431.12,410.362488,0.0,0,0.071155,0.025346
3,893.28,1,20220602 03,17.2,0.0,2.2,75.0,건물기타,3,3,...,67.254888,26.152269,-12.866667,1443.122857,1372.20,389.916262,0.0,0,0.067707,0.030905
4,924.00,1,20220602 04,17.2,0.0,2.0,81.0,건물기타,4,3,...,66.367960,25.248724,-18.333333,1452.808571,1381.72,379.995696,0.0,0,0.063264,0.033128
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201595,881.04,100,20220824 19,23.1,0.0,0.9,86.0,호텔및리조트,19,2,...,73.846285,30.510262,-35.641667,1012.602857,976.06,161.150392,0.0,0,0.039282,0.004240
201596,798.96,100,20220824 20,22.4,0.0,1.3,86.0,호텔및리조트,20,2,...,73.733854,30.441848,-33.391667,930.205714,892.92,137.040302,0.0,0,0.040570,0.004927
201597,825.12,100,20220824 21,21.3,0.0,1.0,92.0,호텔및리조트,21,2,...,73.494949,30.233938,-31.266667,831.714286,786.62,128.125250,0.0,0,0.053521,0.006987
201598,640.08,100,20220824 22,21.0,0.0,0.3,94.0,호텔및리조트,22,2,...,73.192381,29.960151,-29.483333,724.480000,667.94,112.413470,0.0,0,0.036985,0.008615


일주일 전시점의, 7일전 대비 전력 변화율

In [33]:
# train_ft['power_change_rate'] = (train_ft.groupby('building_num')['energy_consum'].shift(7*24) - train_ft.groupby('building_num')['energy_consum'].shift(7*2*24)) / train_ft.groupby('building_num')['energy_consum'].shift(7*2*24) #변화율 변경 기준을 7로도 바꿔볼 것
# train_ft['day_within_cycle'] = train_ft.index % (7*24)

# periodic_avg_power_change_rate = train_ft.groupby(['building_num','day_within_cycle' ])['power_change_rate'].mean().reset_index()
# periodic_avg_power_change_rate.rename(columns={'power_change_rate': 'periodic_avg_power_change_rate'}, inplace=True)

# train_ft = train_ft.merge(periodic_avg_power_change_rate, on=['building_num','day_within_cycle'], how='left')
# train_ft['power_change_rate'].fillna(train_ft['periodic_avg_power_change_rate'], inplace=True)

# cols = ['day_within_cycle','periodic_avg_power_change_rate']
# train_ft = train_ft.drop(columns = cols)


# last_rows = train_ft.groupby('building_num').tail(168)['power_change_rate'].reset_index().drop('index',axis=1) # train 데이터의 마지막 168행을 그대로 test셋에 적용
# test_ft = pd.concat([test_ft, last_rows], axis=1)

In [34]:
# #test
# # test_ft['power_change_rate'] = periodic_avg_power_change_rate['periodic_avg_power_change_rate']
# last_rows = train_ft.groupby('building_num').tail(7*2*24)['power_change_rate'].reset_index().drop('index',axis=1) # train 데이터의 마지막 168행을 그대로 test셋에 적용
# test_ft = pd.concat([test_ft, last_rows], axis=1)

In [35]:
# # import matplotlib.pyplot as plt
# cols = ['index']
# result_df = train_ft['power_change_rate'].reset_index().drop(columns=cols)
# result_df

In [36]:
# import matplotlib.pyplot as plt

# result_df *= 1000


# # 그래프 그리기
# plt.figure(figsize=(12, 6))
# plt.plot(result_df.values)
# plt.xlabel('Index')
# plt.ylabel('Scaled Periodic Avg Power Change Rate')
# # plt.title(f'Scaled Periodic Avg Power Change Rate for Building {building_num}')
# plt.legend(result_df.columns, title='Day within Cycle')
# plt.grid()
# plt.show()


- 6시간 전 대비 전력 벼노하율

In [37]:
# #train
# #test에는

# train_ft['power_change_rate_6h'] = (train_ft['energy_consum'] - train_ft.groupby('building_num')['energy_consum'].shift(6)) / train_ft.groupby('building_num')['energy_consum'].shift(6) #변화율 변경 기준을 7로도 바꿔볼 것
# train_ft['day_within_cycle'] = train_ft.index % (6)

# periodic_avg_power_change_rate = train_ft.groupby(['building_num','day_within_cycle' ])['power_change_rate_6h'].mean().reset_index()
# periodic_avg_power_change_rate.rename(columns={'power_change_rate_6h': 'periodic_avg_power_change_rate'}, inplace=True)

# train_ft = train_ft.merge(periodic_avg_power_change_rate, on=['building_num','day_within_cycle'], how='left')
# train_ft['power_change_rate_6h'].fillna(train_ft['periodic_avg_power_change_rate'], inplace=True)

# cols = ['day_within_cycle','periodic_avg_power_change_rate']
# train_ft = train_ft.drop(columns = cols)

# #test
# # test_ft['power_change_rate'] = periodic_avg_power_change_rate['periodic_avg_power_change_rate']
# last_rows = train_ft.groupby('building_num').tail(168)['power_change_rate_6h'].reset_index().drop('index',axis=1) # train 데이터의 마지막 168행을 그대로 test셋에 적용
# test_ft = pd.concat([test_ft, last_rows], axis=1)

In [38]:

train_ft = train_ft.interpolate()
test_ft = test_ft.interpolate()

train_ft.isnull().sum(), test_ft.isnull().sum()


(energy_consum                       0
 building_num                        0
 date_time                           0
 temp                                0
 precip                              0
 wind                                0
 hum                                 0
 building_type                       0
 time                                0
 day                                 0
 month                               0
 week                                0
 day_of_year                         0
 sin_time                            0
 cos_time                            0
 temp_RM                             0
 wind_RM                             0
 hum_RM                              0
 temp_EMA                            0
 wind_EMA                            0
 hum_EMA                             0
 THI                                 0
 feels_temp                          0
 CDH                                 0
 hour_mean                           0
 day_hour_mean           

## 피처추가

In [39]:
train_ft.columns

Index(['energy_consum', 'building_num', 'date_time', 'temp', 'precip', 'wind',
       'hum', 'building_type', 'time', 'day', 'month', 'week', 'day_of_year',
       'sin_time', 'cos_time', 'temp_RM', 'wind_RM', 'hum_RM', 'temp_EMA',
       'wind_EMA', 'hum_EMA', 'THI', 'feels_temp', 'CDH', 'hour_mean',
       'day_hour_mean', 'hour_std', 'heat_index', 'heat_wave',
       'periodic_avg_power_change_rate', 'periodic_avg_power_change_rate_6'],
      dtype='object')

In [40]:
# train_ft['temp_product'] = train_ft['wind_RM'] *train_ft['temp_RM'] *train_ft['hum_RM'].std()
# test_ft['temp_product'] = test_ft['wind_RM'] *test_ft['temp_RM'] *test_ft['hum_RM'].std()


In [41]:
# train_df_1.columns

- 건물별 ft 생성

In [42]:
for i in range(1, 101):
    globals()[f'train_ft_{i}'] = train_ft[train_ft['building_num'] == i]

for i in range(1, 101):
    globals()[f'test_ft_{i}'] = test_ft[test_ft['building_num'] == i]

## 공휴일 반영

In [43]:
# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_1['weekend'] = train_ft_1['day'].isin([6]) + 0
test_ft_1['weekend'] = test_ft_1['day'].isin([6]) + 0

mask = train_ft_1['day_of_year'] == 157
train_ft_1.loc[mask, 'weekend'] = 1

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_2['weekend'] = train_ft_2['day'].isin([0]) + 0
test_ft_2['weekend'] = test_ft_2['day'].isin([0]) + 0

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_3['weekend'] = train_ft_3['day'].isin([0]) + 0
test_ft_3['weekend'] = test_ft_3['day'].isin([0]) + 0

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_5['weekend'] = train_ft_5['day'].isin([0, 1, 2, 3]) + 0
test_ft_5['weekend'] = test_ft_5['day'].isin([0, 1, 2, 3]) + 0

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_6['weekend'] = 0
test_ft_6['weekend'] = 0

mask_train = (train_ft_6['day_of_year'] == 163) | (train_ft_6['day_of_year'] == 177)| (train_ft_6['day_of_year'] == 191)| (train_ft_6['day_of_year'] == 205)| (train_ft_6['day_of_year'] == 226)
train_ft_6.loc[mask_train, 'weekend'] = 1

mask_test = test_ft_6['day_of_year'] == 240
test_ft_6.loc[mask_test, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_7['weekend'] = train_ft_7['day'].isin([5, 6]) + 0
test_ft_7['weekend'] = test_ft_7['day'].isin([5, 6]) + 0

# 주말 지정(0: 주중, 1: 주말)
train_ft_8['weekend'] = train_ft_8['day'].isin([6]) + 0
test_ft_8['weekend'] = test_ft_8['day'].isin([6]) + 0

# 주말 지정(0: 주중, 1: 주말)
train_ft_9['weekend'] = 0
test_ft_9['weekend'] = 0

mask_train = (train_ft_9['day_of_year'] == 163) | (train_ft_9['day_of_year'] == 177)| (train_ft_9['day_of_year'] == 191)| (train_ft_9['day_of_year'] == 205)| (train_ft_9['day_of_year'] == 226)
train_ft_9.loc[mask_train, 'weekend'] = 1

mask_test = test_ft_9['day_of_year'] == 240
test_ft_9.loc[mask_test, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_12['weekend'] = train_ft_12['day'].isin([6]) + 0
test_ft_12['weekend'] = test_ft_12['day'].isin([6]) + 0

# 주말 지정(0: 주중, 1: 주말)
train_ft_14['weekend'] = train_ft_14['day'].isin([5,6]) + 0
test_ft_14['weekend'] = test_ft_14['day'].isin([5,6]) + 0

# 주말 지정(0: 주중, 1: 주말)
train_ft_15['weekend'] = train_ft_15['day'].isin([5,6]) + 0
test_ft_15['weekend'] = test_ft_15['day'].isin([5,6]) + 0

# 주말 지정(0: 주중, 1: 주말)
train_ft_16['weekend'] = train_ft_16['day'].isin([5, 6]) + 0
test_ft_16['weekend'] = test_ft_16['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_16['day_of_year'] == 157) | (train_ft_16['day_of_year'] == 227)
train_ft_16.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_17['weekend'] = train_ft_17['day'].isin([5, 6]) + 0
test_ft_17['weekend'] = test_ft_17['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_17['day_of_year'] == 157) | (train_ft_17['day_of_year'] == 227)
train_ft_17.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_18['weekend'] = train_ft_18['day'].isin([5, 6]) + 0
test_ft_18['weekend'] = test_ft_18['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_18['day_of_year'] == 157) | (train_ft_18['day_of_year'] == 227)
train_ft_18.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_19['weekend'] = train_ft_19['day'].isin([5, 6]) + 0
test_ft_19['weekend'] = test_ft_19['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_19['day_of_year'] == 157) | (train_ft_19['day_of_year'] == 227)
train_ft_19.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_20['weekend'] = train_ft_20['day'].isin([5, 6]) + 0

test_ft_20['weekend'] = test_ft_20['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_20['day_of_year'] == 157) | (train_ft_20['day_of_year'] == 227)
train_ft_20.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_21['weekend'] = train_ft_21['day'].isin([5, 6]) + 0
test_ft_21['weekend'] = test_ft_21['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_21['day_of_year'] == 157) | (train_ft_21['day_of_year'] == 227)
train_ft_21.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_22['weekend'] = train_ft_22['day'].isin([5, 6]) + 0
test_ft_22['weekend'] = test_ft_22['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_22['day_of_year'] == 157) | (train_ft_22['day_of_year'] == 227)
train_ft_22.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_23['weekend'] = train_ft_23['day'].isin([5, 6]) + 0
test_ft_23['weekend'] = test_ft_23['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_23['day_of_year'] == 157) | (train_ft_23['day_of_year'] == 227)
train_ft_23.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_24['weekend'] = train_ft_24['day'].isin([5, 6]) + 0
test_ft_24['weekend'] = test_ft_24['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_24['day_of_year'] == 157) | (train_ft_24['day_of_year'] == 227)
train_ft_24.loc[mask, 'weekend'] = 1

# 휴무일 지정(0: 영업일, 1: 휴무일)
train_ft_25['weekend'] = (train_ft_25['day'] == 0) + 0
test_ft_25['weekend'] = (test_ft_25['day'] == 0) + 0

# 8월 16일, 8월 17일 휴무일로 예상.
mask = (train_ft_25['day_of_year'] == 228) | (train_ft_25['day_of_year'] == 229)
train_ft_25.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_26['weekend'] = train_ft_26['day'].isin([5, 6]) + 0
test_ft_26['weekend'] = test_ft_26['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_26['day_of_year'] == 157) | (train_ft_26['day_of_year'] == 227)
train_ft_26.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_27['weekend'] = train_ft_27['day'].isin([5, 6]) + 0
test_ft_27['weekend'] = test_ft_27['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_27['day_of_year'] == 157) | (train_ft_27['day_of_year'] == 227)
train_ft_27.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_28['weekend'] = train_ft_28['day'].isin([5, 6]) + 0
test_ft_28['weekend'] = test_ft_28['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_28['day_of_year'] == 157) | (train_ft_28['day_of_year'] == 227)
train_ft_28.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_29['weekend'] = train_ft_29['day'].isin([5, 6]) + 0
test_ft_29['weekend'] = test_ft_29['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_29['day_of_year'] == 157) | (train_ft_29['day_of_year'] == 227)
train_ft_29.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_30['weekend'] = train_ft_30['day'].isin([5, 6]) + 0
test_ft_30['weekend'] = test_ft_30['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_30['day_of_year'] == 157) | (train_ft_30['day_of_year'] == 227)
train_ft_30.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_31['weekend'] = train_ft_31['day'].isin([5, 6]) + 0
test_ft_31['weekend'] = test_ft_31['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_31['day_of_year'] == 157) | (train_ft_31['day_of_year'] == 227)
train_ft_31.loc[mask, 'weekend'] = 1



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_ft_1['weekend'] = train_ft_1['day'].isin([6]) + 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_ft_1['weekend'] = test_ft_1['day'].isin([6]) + 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_ft_2['weekend'] = train_ft_2['day'].isin([0]) + 0
A value is trying to be set on a copy o

In [44]:
# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_37['closed'] = 0
mask = (train_ft_37['day_of_year'] == 171) | (train_ft_37['day_of_year'] == 192) | (train_ft_37['day_of_year'] == 220)
train_ft_37.loc[mask, 'closed'] = 1

test_ft_37['closed'] = 0

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_38['closed'] = 0
mask = (train_ft_38['day_of_year'] == 164) | (train_ft_38['day_of_year'] == 206) | (train_ft_38['day_of_year'] == 213)
train_ft_38.loc[mask, 'closed'] = 1

test_ft_38['closed'] = 0

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_39['closed'] = 0
mask = (train_ft_39['day_of_year'] == 199) | (train_ft_39['day_of_year'] == 220)
train_ft_39.loc[mask, 'closed'] = 1

test_ft_39['closed'] = 0

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_40['closed'] = 0
mask = (train_ft_40['day_of_year'] == 171) | (train_ft_40['day_of_year'] == 199) | (train_ft_40['day_of_year'] == 220)
train_ft_40.loc[mask, 'closed'] = 1

test_ft_40['closed'] = 0

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_41['closed'] = 0
mask = (train_ft_41['day_of_year'] == 178) | (train_ft_41['day_of_year'] == 206) | (train_ft_41['day_of_year'] == 220)
train_ft_41.loc[mask, 'closed'] = 1

test_ft_41['closed'] = 0

# 주말 지정(0: 주중, 1: 주말)
train_ft_41['weekend'] = train_ft_41['day'].isin([5, 6]) + 0

test_ft_41['weekend'] = test_ft_41['day'].isin([5, 6]) + 0

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_42['closed'] = 0
mask = (train_ft_42['day_of_year'] == 164) | (train_ft_42['day_of_year'] == 192) | (train_ft_42['day_of_year'] == 234)
train_ft_42.loc[mask, 'closed'] = 1

test_ft_42['closed'] = 0

# 주말 지정(0: 주중, 1: 주말)
train_ft_43['weekend'] = train_ft_43['day'].isin([5, 6]) + 0

test_ft_43['weekend'] = test_ft_43['day'].isin([5, 6]) + 0

# 주말 지정(0: 주중, 1: 주말)
train_ft_44['weekend'] = train_ft_44['day'].isin([5, 6]) + 0

test_ft_44['weekend'] = test_ft_44['day'].isin([5, 6]) + 0

# 주말 지정(0: 주중, 1: 주말)
train_ft_45['weekend'] = train_ft_45['day'].isin([5, 6]) + 0

test_ft_45['weekend'] = test_ft_45['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_45['day_of_year'] == 157) | (train_ft_45['day_of_year'] == 227)
train_ft_45.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_46['weekend'] = train_ft_46['day'].isin([5, 6]) + 0

test_ft_46['weekend'] = test_ft_46['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_46['day_of_year'] == 157) | (train_ft_46['day_of_year'] == 227)
train_ft_46.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_47['weekend'] = train_ft_47['day'].isin([5, 6]) + 0

test_ft_47['weekend'] = test_ft_47['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_47['day_of_year'] == 157) | (train_ft_47['day_of_year'] == 227)
train_ft_47.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_48['weekend'] = train_ft_48['day'].isin([5, 6]) + 0

test_ft_48['weekend'] = test_ft_48['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_48['day_of_year'] == 157) | (train_ft_48['day_of_year'] == 227)
train_ft_48.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_49['weekend'] = train_ft_49['day'].isin([5, 6]) + 0

test_ft_49['weekend'] = test_ft_49['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_49['day_of_year'] == 157) | (train_ft_49['day_of_year'] == 227)
train_ft_49.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_50['weekend'] = train_ft_50['day'].isin([5, 6]) + 0

test_ft_50['weekend'] = test_ft_50['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_50['day_of_year'] == 157) | (train_ft_50['day_of_year'] == 227)
train_ft_50.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_51['weekend'] = train_ft_51['day'].isin([5, 6]) + 0

test_ft_51['weekend'] = test_ft_51['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_51['day_of_year'] == 157) | (train_ft_51['day_of_year'] == 227)
train_ft_51.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_52['weekend'] = train_ft_52['day'].isin([5, 6]) + 0

test_ft_52['weekend'] = test_ft_52['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_52['day_of_year'] == 157) | (train_ft_52['day_of_year'] == 227)
train_ft_52.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_53['weekend'] = train_ft_53['day'].isin([5, 6]) + 0

test_ft_53['weekend'] = test_ft_53['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_53['day_of_year'] == 157) | (train_ft_53['day_of_year'] == 227)
train_ft_53.loc[mask, 'weekend'] = 1

# 휴무일 지정(0: 영업일, 1: 휴무일)
train_ft_54['weekend'] = (train_ft_54['day'] == 0) + 0

test_ft_54['weekend'] = (test_ft_54['day'] == 0) + 0

# 8월 16일, 8월 17일 휴무일로 예상.
mask = (train_ft_54['day_of_year'] == 228) | (train_ft_54['day_of_year'] == 229)
train_ft_54.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_55['weekend'] = train_ft_55['day'].isin([5, 6]) + 0

test_ft_55['weekend'] = test_ft_55['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_55['day_of_year'] == 157) | (train_ft_55['day_of_year'] == 227)
train_ft_55.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_56['weekend'] = train_ft_56['day'].isin([5, 6]) + 0

test_ft_56['weekend'] = test_ft_56['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_56['day_of_year'] == 157) | (train_ft_56['day_of_year'] == 227)
train_ft_56.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_57['weekend'] = train_ft_57['day'].isin([5, 6]) + 0

test_ft_57['weekend'] = test_ft_57['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_57['day_of_year'] == 157) | (train_ft_57['day_of_year'] == 227)
train_ft_57.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_58['weekend'] = train_ft_58['day'].isin([5, 6]) + 0

test_ft_58['weekend'] = test_ft_58['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_58['day_of_year'] == 157) | (train_ft_58['day_of_year'] == 227)
train_ft_58.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_59['weekend'] = train_ft_59['day'].isin([5, 6]) + 0

test_ft_59['weekend'] = test_ft_59['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_59['day_of_year'] == 157) | (train_ft_59['day_of_year'] == 227)
train_ft_59.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_60['weekend'] = train_ft_60['day'].isin([5, 6]) + 0

test_ft_60['weekend'] = test_ft_60['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_60['day_of_year'] == 157) | (train_ft_60['day_of_year'] == 227)
train_ft_60.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_61['weekend'] = train_ft_61['day'].isin([5, 6]) + 0

test_ft_61['weekend'] = test_ft_61['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_61['day_of_year'] == 157) | (train_ft_61['day_of_year'] == 227)
train_ft_61.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_62['weekend'] = train_ft_62['day'].isin([5, 6]) + 0

test_ft_62['weekend'] = test_ft_62['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_62['day_of_year'] == 157) | (train_ft_62['day_of_year'] == 227)
train_ft_62.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_63['weekend'] = train_ft_63['day'].isin([5, 6]) + 0

test_ft_63['weekend'] = test_ft_63['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_63['day_of_year'] == 157) | (train_ft_63['day_of_year'] == 227)
train_ft_63.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_64['weekend'] = train_ft_64['day'].isin([5, 6]) + 0

test_ft_64['weekend'] = test_ft_64['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_64['day_of_year'] == 157) | (train_ft_64['day_of_year'] == 227)
train_ft_64.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_65['weekend'] = train_ft_65['day'].isin([5, 6]) + 0

test_ft_65['weekend'] = test_ft_65['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_65['day_of_year'] == 157) | (train_ft_65['day_of_year'] == 227)
train_ft_65.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_66['weekend'] = train_ft_66['day'].isin([5, 6]) + 0

test_ft_66['weekend'] = test_ft_66['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_66['day_of_year'] == 157) | (train_ft_66['day_of_year'] == 227)
train_ft_66.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_67['weekend'] = train_ft_67['day'].isin([5, 6]) + 0

test_ft_67['weekend'] = test_ft_67['day'].isin([5, 6]) + 0

# 주말 지정(0: 주중, 1: 주말)
train_ft_67['weekend'] = train_ft_67['day'].isin([5, 6]) + 0

test_ft_67['weekend'] = test_ft_67['day'].isin([5, 6]) + 0

# 주말 지정(0: 주중, 1: 주말)
train_ft_68['weekend'] = train_ft_68['day'].isin([5, 6]) + 0

test_ft_68['weekend'] = test_ft_68['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_68['day_of_year'] == 157) | (train_ft_68['day_of_year'] == 227)
train_ft_68.loc[mask, 'weekend'] = 1




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_ft_37['closed'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_ft_37['closed'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_ft_38['closed'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See 

In [45]:
# 주말 지정(0: 주중, 1: 주말)
train_ft_69['weekend'] = train_ft_69['day'].isin([5, 6]) + 0
test_ft_69['weekend'] = test_ft_69['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_69['day_of_year'] == 157) | (train_ft_69['day_of_year'] == 227)
train_ft_69.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_70['weekend'] = train_ft_70['day'].isin([5, 6]) + 0
test_ft_70['weekend'] = test_ft_70['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_70['day_of_year'] == 157) | (train_ft_70['day_of_year'] == 227)
train_ft_70.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_71['weekend'] = train_ft_71['day'].isin([5, 6]) + 0
test_ft_71['weekend'] = test_ft_71['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_71['day_of_year'] == 157) | (train_ft_71['day_of_year'] == 227)
train_ft_71.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_72['weekend'] = train_ft_72['day'].isin([5, 6]) + 0
test_ft_72['weekend'] = test_ft_72['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_72['day_of_year'] == 157) | (train_ft_72['day_of_year'] == 227)
train_ft_72.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_73['weekend'] = train_ft_73['day'].isin([5, 6]) + 0
test_ft_73['weekend'] = test_ft_73['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_73['day_of_year'] == 157) | (train_ft_73['day_of_year'] == 227)
train_ft_73.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_74['weekend'] = train_ft_74['day'].isin([5, 6]) + 0
test_ft_74['weekend'] = test_ft_74['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_74['day_of_year'] == 157) | (train_ft_74['day_of_year'] == 227)
train_ft_74.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_75['weekend'] = train_ft_75['day'].isin([5, 6, 0]) + 0
test_ft_75['weekend'] = test_ft_75['day'].isin([5, 6, 0]) + 0

# 주말 지정(0: 주중, 1: 주말)
train_ft_76['weekend'] = train_ft_76['day'].isin([5, 6]) + 0
test_ft_76['weekend'] = test_ft_76['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_76['day_of_year'] == 157) | (train_ft_76['day_of_year'] == 227)
train_ft_76.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_77['weekend'] = train_ft_77['day'].isin([5, 6]) + 0
test_ft_77['weekend'] = test_ft_77['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_77['day_of_year'] == 157) | (train_ft_77['day_of_year'] == 227)
train_ft_77.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_78['weekend'] = train_ft_78['day'].isin([5, 6]) + 0

test_ft_78['weekend'] = test_ft_78['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_78['day_of_year'] == 157) | (train_ft_78['day_of_year'] == 227)
train_ft_78.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_79['weekend'] = train_ft_79['day'].isin([5, 6]) + 0

test_ft_79['weekend'] = test_ft_79['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_79['day_of_year'] == 157) | (train_ft_79['day_of_year'] == 227)
train_ft_79.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_80['weekend'] = train_ft_80['day'].isin([5, 6]) + 0

test_ft_80['weekend'] = test_ft_80['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_80['day_of_year'] == 157) | (train_ft_80['day_of_year'] == 227)
train_ft_80.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_82['weekend'] = train_ft_82['day'].isin([5, 6]) + 0
test_ft_82['weekend'] = test_ft_82['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_82['day_of_year'] == 157) | (train_ft_82['day_of_year'] == 227)
train_ft_82.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_83['weekend'] = train_ft_83['day'].isin([5, 6]) + 0

test_ft_83['weekend'] = test_ft_83['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_83['day_of_year'] == 157) | (train_ft_83['day_of_year'] == 227)
train_ft_83.loc[mask, 'weekend'] = 1

# 주말 지정(0: 주중, 1: 주말)
train_ft_84['weekend'] = train_ft_84['day'].isin([5, 6]) + 0
test_ft_84['weekend'] = test_ft_84['day'].isin([5, 6]) + 0

# 공휴일 주말로 지정(공휴일이 2일 밖에 존재하지 않으며, 주말과 차이가 없다고 판단)
mask = (train_ft_84['day_of_year'] == 157) | (train_ft_84['day_of_year'] == 227)
train_ft_84.loc[mask, 'weekend'] = 1

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_86['weekend'] = 0
test_ft_86['weekend'] = 0

mask_train = (train_ft_86['day_of_year'] == 161) | (train_ft_86['day_of_year'] == 177)| (train_ft_86['day_of_year'] == 191)| (train_ft_86['day_of_year'] == 205)| (train_ft_86['day_of_year'] == 222)
train_ft_86.loc[mask_train, 'weekend'] = 1

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_87['weekend'] = 0
test_ft_87['weekend'] = 0

mask_train = (train_ft_87['day_of_year'] == 163) | (train_ft_87['day_of_year'] == 177)| (train_ft_87['day_of_year'] == 191)| (train_ft_87['day_of_year'] == 205)| (train_ft_87['day_of_year'] == 226)
train_ft_87.loc[mask_train, 'weekend'] = 1

mask_test = test_ft_87['day_of_year'] == 240
test_ft_87.loc[mask_test, 'weekend'] = 1

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_88['weekend'] = 0
test_ft_88['weekend'] = 0

mask_train = (train_ft_88['day_of_year'] == 163) | (train_ft_88['day_of_year'] == 177)| (train_ft_88['day_of_year'] == 191)| (train_ft_88['day_of_year'] == 205)| (train_ft_88['day_of_year'] == 226)
train_ft_88.loc[mask_train, 'weekend'] = 1

mask_test = test_ft_88['day_of_year'] == 240
test_ft_88.loc[mask_test, 'weekend'] = 1

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_89['weekend'] = 0
test_ft_89['weekend'] = 0

mask_train = (train_ft_89['day_of_year'] == 163) | (train_ft_89['day_of_year'] == 177)| (train_ft_89['day_of_year'] == 191)| (train_ft_89['day_of_year'] == 205)| (train_ft_89['day_of_year'] == 226)
train_ft_89.loc[mask_train, 'weekend'] = 1

mask_test = test_ft_89['day_of_year'] == 240
test_ft_89.loc[mask_test, 'weekend'] = 1

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_90['weekend'] = 0
test_ft_90['weekend'] = 0

mask_train = (train_ft_90['day_of_year'] == 163) | (train_ft_90['day_of_year'] == 177)| (train_ft_90['day_of_year'] == 191)| (train_ft_90['day_of_year'] == 205)| (train_ft_90['day_of_year'] == 226)
train_ft_90.loc[mask_train, 'weekend'] = 1

mask_test = test_ft_90['day_of_year'] == 240
test_ft_90.loc[mask_test, 'weekend'] = 1

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_91['weekend'] = 0
test_ft_91['weekend'] = 0

mask_train = (train_ft_91['day_of_year'] == 163) | (train_ft_91['day_of_year'] == 177)| (train_ft_91['day_of_year'] == 191)| (train_ft_91['day_of_year'] == 205)| (train_ft_91['day_of_year'] == 226)
train_ft_91.loc[mask_train, 'weekend'] = 1

mask_test = test_ft_91['day_of_year'] == 240
test_ft_91.loc[mask_test, 'weekend'] = 1

# 휴무일 지정(1: 휴무일, 0: 영업일)
train_ft_92['weekend'] = 0
test_ft_92['weekend'] = 0

mask_train = (train_ft_92['day_of_year'] == 163) | (train_ft_92['day_of_year'] == 177)| (train_ft_92['day_of_year'] == 191)| (train_ft_92['day_of_year'] == 205)| (train_ft_92['day_of_year'] == 226)
train_ft_92.loc[mask_train, 'weekend'] = 1

mask_test = test_ft_92['day_of_year'] == 240
test_ft_92.loc[mask_test, 'weekend'] = 1




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_ft_69['weekend'] = train_ft_69['day'].isin([5, 6]) + 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_ft_69['weekend'] = test_ft_69['day'].isin([5, 6]) + 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_ft_70['weekend'] = train_ft_70['day'].isin([5, 6]) + 0
A value is trying to be 

- 공휴일 활용 피처 : weekend가 있는 건물들에 대해서 weekend의 왜도

## 개별 피처 추가 및 제거

- 이상 건물들의 train값 이상치 -> 전체 건물들의 이상치를 처리하는 방법

In [46]:
# for i in range(1,101):
#     second_largest = globals()[f'train_ft_{i}']['energy_consum'][globals()[f'train_ft_{i}']['energy_consum'] < globals()[f'train_ft_{i}']['energy_consum'].max()].nlargest(2).iloc[-1]
#     second_smallest = globals()[f'train_ft_{i}']['energy_consum'][globals()[f'train_ft_{i}']['energy_consum'] > globals()[f'train_ft_{i}']['energy_consum'].min()].nsmallest(2).iloc[-1]

#     # Replace maximum and minimum values with second largest and second smallest values
#     globals()[f'train_ft_{i}']['energy_consum'].replace(globals()[f'train_ft_{i}']['energy_consum'].max(), second_largest, inplace=True)
#     globals()[f'train_ft_{i}']['energy_consum'].replace(globals()[f'train_ft_{i}']['energy_consum'].min(), second_smallest, inplace=True)

In [47]:
# for i in range(1,101):
#     second_largest = globals()[f'test_ft_{i}']['energy_consum'][globals()[f'test_ft_{i}']['energy_consum'] < globals()[f'test_ft_{i}']['energy_consum'].max()].nlargest(2).iloc[-1]
#     second_smallest = globals()[f'test_ft_{i}']['energy_consum'][globals()[f'test_ft_{i}']['energy_consum'] > globals()[f'test_ft_{i}']['energy_consum'].min()].nsmallest(2).iloc[-1]

#     # Replace maximum and minimum values with second largest and second smallest values
#     globals()[f'test_ft_{i}']['energy_consum'].replace(globals()[f'test_ft_{i}']['energy_consum'].max(), second_largest, inplace=True)
#     globals()[f'test_ft_{i}']['energy_consum'].replace(globals()[f'test_ft_{i}']['energy_consum'].min(), second_smallest, inplace=True)

In [48]:
train_ft_1.columns


Index(['energy_consum', 'building_num', 'date_time', 'temp', 'precip', 'wind',
       'hum', 'building_type', 'time', 'day', 'month', 'week', 'day_of_year',
       'sin_time', 'cos_time', 'temp_RM', 'wind_RM', 'hum_RM', 'temp_EMA',
       'wind_EMA', 'hum_EMA', 'THI', 'feels_temp', 'CDH', 'hour_mean',
       'day_hour_mean', 'hour_std', 'heat_index', 'heat_wave',
       'periodic_avg_power_change_rate', 'periodic_avg_power_change_rate_6',
       'weekend'],
      dtype='object')

- day of year 제거

In [49]:

# # Create a list to store train_ft and test_ft DataFrames
# train_ft_list = [globals()[f'train_ft_{i}'] for i in range(1, 101)]
# test_ft_list = [globals()[f'test_ft_{i}'] for i in range(1, 101)]

# # Iterate through each index (1 to 100)
# for i in range(1, 101):
#     train_df = train_ft_list[i-1]
#     test_df = test_ft_list[i-1]

#     # Check if 'weekend' column exists in the dataframe
#     if 'weekend' in train_df.columns:
#         # Filter buildings with weekends
#         buildings_with_weekends = train_df[train_df['weekend'] == 1]['building_num'].unique()

#         # Calculate skewness for each combination of building_num, day, and time on weekends
#         weekend_skewness_df = train_df[train_df['building_num'].isin(buildings_with_weekends)].groupby(['building_num', 'day', 'time'])['energy_consum'].apply(lambda x: np.nan if x.std() == 0 else x.skew()).reset_index()
#         weekend_skewness_df.rename(columns={'energy_consum': 'weekend_skewness'}, inplace=True)

#         # Merge weekend skewness values with train_df and test_df
#         train_df = pd.merge(train_df, weekend_skewness_df, on=['building_num', 'day', 'time'], how='left')
#         test_df = pd.merge(test_df, weekend_skewness_df, on=['building_num', 'day', 'time'], how='left')

#         # Fill missing values with 0 (or any other suitable value)
#         train_df['weekend_skewness'].fillna(0, inplace=True)
#         test_df['weekend_skewness'].fillna(0, inplace=True)

#         # Update the modified DataFrames in the list
#         train_ft_list[i-1] = train_df
#         test_ft_list[i-1] = test_df

# # Update the global DataFrames with the modified ones
# for i in range(1, 101):
#     globals()[f'train_ft_{i}'] = train_ft_list[i-1]
#     globals()[f'test_ft_{i}'] = test_ft_list[i-1]


- 공휴일의 왜도

In [50]:

# for i in range(1,101):
#     if 'weekend' in globals()[f'train_ft_{i}']:

#         buildings_with_weekends = globals()[f'train_ft_{i}'][globals()[f'train_ft_{i}']['weekend'] == 1]

#         # Calculate skewness for each combination of building_num and time on weekends
#         weekend_skewness_df = globals()[f'train_ft_{i}'][globals()[f'train_ft_{i}']['building_num'].isin(buildings_with_weekends)].groupby(['building_num','day','time'])['energy_consum'].apply(lambda x: np.nan if x.std() == 0 else x.skew()).reset_index()
#         weekend_skewness_df.rename(columns={'energy_consum': 'weekend_skewness'}, inplace=True)

#         # Merge weekend skewness values with train_ft
#         globals()[f'train_ft_{i}'] = pd.merge(globals()[f'train_ft_{i}'], weekend_skewness_df, on=['building_num','day','time'], how='left')
#         globals()[f'test_ft_{i}'] = pd.merge(globals()[f'test_ft_{i}'], weekend_skewness_df, on=['building_num','day','time'], how='left')
#         # Fill missing values with 0 (or any other suitable value)
#         globals()[f'train_ft_{i}']['weekend_skewness'].fillna(0, inplace=True)
#         globals()[f'test_ft_{i}']['weekend_skewness'].fillna(0, inplace=True)

In [51]:
# columns_to_drop = ['day_hour_mean']  # 삭제할 칼럼 이름들

# target_indices = [30, 51, 54, 56, 67, 71, 80, 82]  # 특정 i 값들


# for i in range(1, 101):
#     if i in target_indices:  # i 값이 특정 값인 경우에만 칼럼 삭제
#         train_subset = globals()[f'train_ft_{i}'].drop(columns=columns_to_drop)
#         test_subset = globals()[f'test_ft_{i}'].drop(columns=columns_to_drop)
#     else:
#         train_subset = globals()[f'train_ft_{i}']
#         test_subset = globals()[f'test_ft_{i}']

#     # 데이터 프레임 갱신
#     globals()[f'train_ft_{i}'] = train_subset
#     globals()[f'test_ft_{i}'] = test_subset

In [52]:
# columns_to_drop = ['precip']  # 삭제할 칼럼 이름들
# # target_indices = [1, 9, 15, 19, 25, 28, 32, 36, 41, 79, 81, 82, 91, 92, 99, 100]  # 특정 i 값들
# target_indices = [3,9,42, 54,72,82,91,92 ]  # 특정 i 값들


# for i in range(1, 101):
#     if i in target_indices:  # i 값이 특정 값인 경우에만 칼럼 삭제
#         train_subset = globals()[f'train_ft_{i}'].drop(columns=columns_to_drop)
#         test_subset = globals()[f'test_ft_{i}'].drop(columns=columns_to_drop)
#     else:
#         train_subset = globals()[f'train_ft_{i}']
#         test_subset = globals()[f'test_ft_{i}']

#     # 데이터 프레임 갱신
#     globals()[f'train_ft_{i}'] = train_subset
#     globals()[f'test_ft_{i}'] = test_subset

In [53]:
train_ft.columns

Index(['energy_consum', 'building_num', 'date_time', 'temp', 'precip', 'wind',
       'hum', 'building_type', 'time', 'day', 'month', 'week', 'day_of_year',
       'sin_time', 'cos_time', 'temp_RM', 'wind_RM', 'hum_RM', 'temp_EMA',
       'wind_EMA', 'hum_EMA', 'THI', 'feels_temp', 'CDH', 'hour_mean',
       'day_hour_mean', 'hour_std', 'heat_index', 'heat_wave',
       'periodic_avg_power_change_rate', 'periodic_avg_power_change_rate_6'],
      dtype='object')

In [54]:
test_ft.columns

Index(['building_num', 'date_time', 'temp', 'precip', 'wind', 'hum',
       'building_type', 'time', 'day', 'month', 'week', 'day_of_year',
       'sin_time', 'cos_time', 'temp_RM', 'wind_RM', 'hum_RM', 'temp_EMA',
       'wind_EMA', 'hum_EMA', 'THI', 'feels_temp', 'CDH', 'hour_mean',
       'day_hour_mean', 'hour_std', 'heat_index', 'heat_wave',
       'periodic_avg_power_change_rate', 'periodic_avg_power_change_rate_6'],
      dtype='object')

In [55]:
train_ft_1.columns

Index(['energy_consum', 'building_num', 'date_time', 'temp', 'precip', 'wind',
       'hum', 'building_type', 'time', 'day', 'month', 'week', 'day_of_year',
       'sin_time', 'cos_time', 'temp_RM', 'wind_RM', 'hum_RM', 'temp_EMA',
       'wind_EMA', 'hum_EMA', 'THI', 'feels_temp', 'CDH', 'hour_mean',
       'day_hour_mean', 'hour_std', 'heat_index', 'heat_wave',
       'periodic_avg_power_change_rate', 'periodic_avg_power_change_rate_6',
       'weekend'],
      dtype='object')

In [56]:


columns_to_drop = [ 'temp','wind', 'hum', # RM 때문에 제거할 칼럼
                   'heat_wave', 'heat_index',  # 필요 없어서 제거할 칼럼
                   'day_of_year','building_num','date_time','building_type','CDH','hour_std','precip','hour_mean'] #일조의 변화율?
                    # 'precip', 'hour_mean']  # 기본 제거할 칼럼

for i in range(1, 101):
    globals()[f'train_ft_{i}'] = globals()[f'train_ft_{i}'].drop(columns=columns_to_drop)
    globals()[f'test_ft_{i}'] = globals()[f'test_ft_{i}'].drop(columns=columns_to_drop)

- 함수

In [57]:
train_ft.iloc[:, 2:]

Unnamed: 0,date_time,temp,precip,wind,hum,building_type,time,day,month,week,...,THI,feels_temp,CDH,hour_mean,day_hour_mean,hour_std,heat_index,heat_wave,periodic_avg_power_change_rate,periodic_avg_power_change_rate_6
0,20220602 00,18.1,0.0,0.4,67.0,건물기타,0,3,6,22,...,69.672872,28.818026,-1.850000,1713.711429,1627.80,444.306331,0.0,0,0.069171,0.021710
1,20220602 01,17.6,0.0,0.8,69.0,건물기타,1,3,6,22,...,68.942223,27.987268,-4.591667,1629.468571,1550.08,437.718795,0.0,0,0.072765,0.023154
2,20220602 02,17.2,0.0,0.7,71.0,건물기타,2,3,6,22,...,68.121589,27.080175,-8.266667,1513.305714,1431.12,410.362488,0.0,0,0.071155,0.025346
3,20220602 03,17.2,0.0,2.2,75.0,건물기타,3,3,6,22,...,67.254888,26.152269,-12.866667,1443.122857,1372.20,389.916262,0.0,0,0.067707,0.030905
4,20220602 04,17.2,0.0,2.0,81.0,건물기타,4,3,6,22,...,66.367960,25.248724,-18.333333,1452.808571,1381.72,379.995696,0.0,0,0.063264,0.033128
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201595,20220824 19,23.1,0.0,0.9,86.0,호텔및리조트,19,2,8,34,...,73.846285,30.510262,-35.641667,1012.602857,976.06,161.150392,0.0,0,0.039282,0.004240
201596,20220824 20,22.4,0.0,1.3,86.0,호텔및리조트,20,2,8,34,...,73.733854,30.441848,-33.391667,930.205714,892.92,137.040302,0.0,0,0.040570,0.004927
201597,20220824 21,21.3,0.0,1.0,92.0,호텔및리조트,21,2,8,34,...,73.494949,30.233938,-31.266667,831.714286,786.62,128.125250,0.0,0,0.053521,0.006987
201598,20220824 22,21.0,0.0,0.3,94.0,호텔및리조트,22,2,8,34,...,73.192381,29.960151,-29.483333,724.480000,667.94,112.413470,0.0,0,0.036985,0.008615


In [58]:
# 데이터셋 만들기(마지막 7일간의 데이터를 검증데이터셋으로 사용)
def make_dataset(train_df, test_df):
    train_x = train_df.iloc[:, 2:]
    train_y = train_df['energy_consum']

    test_x = test_df.iloc[:, 1:]
    return train_x, train_y, test_x

def train_test_split(train_x, train_y, test_size=168):
    y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = train_y, X = train_x, test_size = test_size)
    return x_train, y_train, x_valid, y_valid

In [59]:
# n_fold로 데이터셋 나누기
def n_fold_train_test_split(train_x, train_y, n, test_size=168, week=11):
    x_valid = train_x[test_size * (week-n-1):test_size * (week-n)]
    y_valid = train_y[test_size * (week-n-1):test_size * (week-n)]
    x_train = train_x.drop(index = list(x_valid.index))
    y_train = train_y.drop(index = list(y_valid.index))
    return x_train, y_train, x_valid, y_valid

In [60]:

# def lightgbm_smape(y_true, y_pred):
#     smape_val = _smape(y_true, y_pred)
#     return 'SMAPE', smape_val, False

# LGBMRegressor 모델
def lgbm_predict(model, x_train, y_train, x_valid, y_valid, stopping=300, eval_metric=lightgbm_smape):
    model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], callbacks = [early_stopping(stopping)], eval_metric=eval_metric)
    pred = model.predict(x_valid)
    pred = pd.Series(pred)
    pred.index = np.arange(y_valid.index[0], y_valid.index[-1]+1)
    plot_series(y_train, y_valid, pd.Series(pred), markers=[',' , ',', ','])
    smape_score = _smape(y_valid, pred)
    print(f"SMAPE_SCORE : {smape_score}")
    return smape_score

# test dataset 예측
def lgbm_test_predict(model, train_x, train_y, test_x):
    model.fit(train_x, train_y)
    lgbm_pred = model.predict(test_x)
    return lgbm_pred

In [61]:
# LGBM 모델 사용

# LGBM 모델 사용
lgbm_preds = np.array([])
fold = 4
smape_score_mean = []

def lgbm_evaluation(train_ft, test_ft) :
    global lgbm_preds

    model = LGBMRegressor(random_state=SEED, objective='regression')
    train_x, train_y, test_x = make_dataset(train_ft, test_ft)
    # 건물별 fold별 smape score
    smape_score = []
    for n in tqdm(range(fold)):
        x_train, y_train, x_valid, y_valid = n_fold_train_test_split(train_x, train_y, n=n)

        model.fit(x_train, y_train, eval_set=[(x_valid, y_valid)], callbacks = [early_stopping(120)], eval_metric=lightgbm_smape)

        pred = model.predict(x_valid)
        score = _smape(y_valid, pred)
        smape_score.append(score)

    smape_score_mean.append(np.mean(smape_score))

    lgbm_pred = lgbm_test_predict(model, train_x, train_y, test_x)
    lgbm_preds = np.concatenate([lgbm_preds, lgbm_pred])

print(smape_score_mean)

[]


## 1

In [62]:
lgbm_evaluation(train_ft_1, test_ft_1)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2643.755583
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[29]	valid_0's l2: 27318.2	valid_0's SMAPE: 4.99163
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2628.276622
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[74]	valid_0's l2: 87211.5	valid_0's SMAPE: 7.31968
You can set `force_col_wise=true` to remove the overhead.
[LightGBM]

In [63]:
train_ft_1.columns

Index(['energy_consum', 'time', 'day', 'month', 'week', 'sin_time', 'cos_time',
       'temp_RM', 'wind_RM', 'hum_RM', 'temp_EMA', 'wind_EMA', 'hum_EMA',
       'THI', 'feels_temp', 'day_hour_mean', 'periodic_avg_power_change_rate',
       'periodic_avg_power_change_rate_6', 'weekend'],
      dtype='object')

## 2

In [64]:
lgbm_evaluation(train_ft_2, test_ft_2)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1664.225552
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's l2: 42072.3	valid_0's SMAPE: 7.55667
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1655.273668
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[52]	valid_0's l2: 34019.3	valid_0's SMAPE: 7.16606
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points

## 3

In [65]:
lgbm_evaluation(train_ft_3, test_ft_3)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1432.380098
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[97]	valid_0's l2: 88969.1	valid_0's SMAPE: 14.3117
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1440.991851
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[49]	valid_0's l2: 37573.1	valid_0's SMAPE: 11.2624
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [

## 4

In [66]:
lgbm_evaluation(train_ft_4, test_ft_4)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 987.647289
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[79]	valid_0's l2: 4562.1	valid_0's SMAPE: 5.71976
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 989.391088
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[39]	valid_0's l2: 3974.14	valid_0's SMAPE: 4.94821
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [I

## 5

In [67]:
lgbm_evaluation(train_ft_5, test_ft_5)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2738.692399
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[53]	valid_0's l2: 111527	valid_0's SMAPE: 7.24101
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2723.344088
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[39]	valid_0's l2: 78309.6	valid_0's SMAPE: 6.60568
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points i

## 6

In [68]:
lgbm_evaluation(train_ft_6, test_ft_6)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1971.066754
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[48]	valid_0's l2: 3744.01	valid_0's SMAPE: 2.37449
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1961.514546
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[73]	valid_0's l2: 4577.31	valid_0's SMAPE: 2.42568
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2463
[LightGBM] [Info] Number of data points 

## 7

In [69]:
lgbm_evaluation(train_ft_7, test_ft_7)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 780.273507
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[59]	valid_0's l2: 7388.9	valid_0's SMAPE: 7.63905
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 776.322598
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[84]	valid_0's l2: 8515.47	valid_0's SMAPE: 8.66341
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2463
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Inf

In [70]:
train_ft_7['energy_consum'].max

<bound method NDFrame._add_numeric_operations.<locals>.max of 12096    552.72
12097    548.64
12098    522.96
12099    518.88
12100    536.16
          ...  
14107    906.24
14108    831.60
14109    612.96
14110    462.96
14111    364.32
Name: energy_consum, Length: 2016, dtype: float64>

## 8

In [71]:
lgbm_evaluation(train_ft_8, test_ft_8)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2460
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1323.810910
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[29]	valid_0's l2: 9237.28	valid_0's SMAPE: 5.31961
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1322.255195
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[39]	valid_0's l2: 8963.13	valid_0's SMAPE: 5.01137
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM]

## 9

In [72]:
lgbm_evaluation(train_ft_9, test_ft_9)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2322.722078
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's l2: 7252.57	valid_0's SMAPE: 2.62722
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2467
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2311.025195
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[96]	valid_0's l2: 11667.6	valid_0's SMAPE: 2.93483
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] 

## 10

In [73]:
lgbm_evaluation(train_ft_10, test_ft_10)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2459
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 4041.766622
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[11]	valid_0's l2: 130207	valid_0's SMAPE: 7.11892
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2457
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 4057.459609
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[18]	valid_0's l2: 62839.1	valid_0's SMAPE: 5.15992
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2457
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [I

## 11

In [74]:
lgbm_evaluation(train_ft_11, test_ft_11)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2457
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 1933.284155
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[33]	valid_0's l2: 9605.13	valid_0's SMAPE: 3.66872
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2462
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 1927.742208
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[41]	valid_0's l2: 23343.8	valid_0's SMAPE: 4.35572
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2457
[LightGBM] [Info] Number of data points 

## 12

In [75]:
lgbm_evaluation(train_ft_12, test_ft_12)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1438.213052
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[26]	valid_0's l2: 5889.39	valid_0's SMAPE: 3.71875
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1433.896364
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[48]	valid_0's l2: 20766.9	valid_0's SMAPE: 6.71222
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points 

## 13

In [76]:
lgbm_evaluation(train_ft_13, test_ft_13)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2459
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 2528.033117
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[23]	valid_0's l2: 41177.3	valid_0's SMAPE: 6.14025
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2457
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 2522.351558
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[11]	valid_0's l2: 36264.2	valid_0's SMAPE: 5.57516
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2457
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [

## 14

In [77]:
lgbm_evaluation(train_ft_14, test_ft_14)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1903.957921
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[20]	valid_0's l2: 156349	valid_0's SMAPE: 17.2321
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1913.234025
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[7]	valid_0's l2: 123767	valid_0's SMAPE: 16.2046
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2461
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Inf

## 15

In [78]:
lgbm_evaluation(train_ft_15, test_ft_15)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1781.858960
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[18]	valid_0's l2: 14169.6	valid_0's SMAPE: 5.22342
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1771.843587
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[29]	valid_0's l2: 10593.8	valid_0's SMAPE: 4.50714
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points 

## 16

In [79]:
lgbm_evaluation(train_ft_16, test_ft_16)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2467
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 3381.707536
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[52]	valid_0's l2: 26147.3	valid_0's SMAPE: 3.65048
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 3374.096366
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[47]	valid_0's l2: 29996.2	valid_0's SMAPE: 3.49374
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2470
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [

## 17

In [80]:
lgbm_evaluation(train_ft_17, test_ft_17)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 988.226737
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[13]	valid_0's l2: 77045.5	valid_0's SMAPE: 19.5592
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 978.002678
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[46]	valid_0's l2: 5475.3	valid_0's SMAPE: 4.6325
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [In

## 18

In [81]:
lgbm_evaluation(train_ft_18, test_ft_18)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1459.477353
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[17]	valid_0's l2: 60626	valid_0's SMAPE: 11.5803
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1446.881413
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[35]	valid_0's l2: 24170.7	valid_0's SMAPE: 6.04113
You can set `force_row_wise=true` to remove the overhead.
And if memor

## 19

In [82]:
lgbm_evaluation(train_ft_19, test_ft_19)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1435.302409
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[64]	valid_0's l2: 97299.8	valid_0's SMAPE: 9.64496
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1420.714093
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[99]	valid_0's l2: 6489.04	valid_0's SMAPE: 4.09557
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM]

## 20

In [83]:
lgbm_evaluation(train_ft_20, test_ft_20)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2458
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1739.030131
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[12]	valid_0's l2: 352404	valid_0's SMAPE: 22.1399
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2460
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1717.878183
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[67]	valid_0's l2: 19639.5	valid_0's SMAPE: 3.94996
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2458
[LightGBM] [Info] Number of data points i

## 21

In [84]:
lgbm_evaluation(train_ft_21, test_ft_21)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1993.179611
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[16]	valid_0's l2: 105527	valid_0's SMAPE: 10.1815
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2467
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1983.044522
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[51]	valid_0's l2: 20587.5	valid_0's SMAPE: 5.04535
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2467
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [I

## 22

In [85]:
lgbm_evaluation(train_ft_22, test_ft_22)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2450
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1237.727923
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[71]	valid_0's l2: 11153.6	valid_0's SMAPE: 5.44726
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2450
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1234.599935
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[29]	valid_0's l2: 3105.87	valid_0's SMAPE: 3.08315
You can set `force_col_wise=true` to remove the overhead.
[LightGBM]

## 23

In [86]:
lgbm_evaluation(train_ft_23, test_ft_23)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1267.707630
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[13]	valid_0's l2: 22991.3	valid_0's SMAPE: 7.18303
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1261.577760
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[68]	valid_0's l2: 1299.22	valid_0's SMAPE: 1.53673
You can set `force_row_wise=true` to remove the overhead.
And if mem

## 24

In [87]:
lgbm_evaluation(train_ft_24, test_ft_24)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 10084.086365
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[70]	valid_0's l2: 1.69983e+06	valid_0's SMAPE: 4.78188
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 10031.982795
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[47]	valid_0's l2: 258680	valid_0's SMAPE: 3.77635
You can set `force_row_wise=true` to remove the overhead.
And i

## 25

In [88]:
lgbm_evaluation(train_ft_25, test_ft_25)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 7298.539734
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[16]	valid_0's l2: 1.36253e+06	valid_0's SMAPE: 9.2059
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 7257.556359
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[17]	valid_0's l2: 585251	valid_0's SMAPE: 5.97986
You can set `force_row_wise=true` to remove the overhead.
And if m

## 26

In [89]:
lgbm_evaluation(train_ft_26, test_ft_26)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2460
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 3540.867272
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[15]	valid_0's l2: 233637	valid_0's SMAPE: 7.44371
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2459
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 3516.525194
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[90]	valid_0's l2: 14215.6	valid_0's SMAPE: 1.98119
You can set `force_row_wise=true` to remove the overhead.
And if memo

## 27

In [90]:
lgbm_evaluation(train_ft_27, test_ft_27)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 17477.922715
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[11]	valid_0's l2: 2.21419e+06	valid_0's SMAPE: 5.26344
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 17346.574229
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[66]	valid_0's l2: 312508	valid_0's SMAPE: 2.427
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightG

## 28

In [91]:
lgbm_evaluation(train_ft_28, test_ft_28)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1736.544155
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[14]	valid_0's l2: 81664	valid_0's SMAPE: 8.99719
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1718.856233
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[65]	valid_0's l2: 27233	valid_0's SMAPE: 5.76575
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in t

## 29

In [92]:
lgbm_evaluation(train_ft_29, test_ft_29)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1457.388393
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[10]	valid_0's l2: 47629.2	valid_0's SMAPE: 9.7305
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2462
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1438.788068
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[56]	valid_0's l2: 13640.9	valid_0's SMAPE: 4.50142
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2459
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [I

## 30

In [93]:
lgbm_evaluation(train_ft_30, test_ft_30)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 847.052192
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[5]	valid_0's l2: 25146	valid_0's SMAPE: 12.7858
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 837.882549
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[37]	valid_0's l2: 11317.7	valid_0's SMAPE: 6.83152
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Inf

## 31

In [94]:
lgbm_evaluation(train_ft_31, test_ft_31)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2471
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1358.847759
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[13]	valid_0's l2: 40915.4	valid_0's SMAPE: 8.8679
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1344.201525
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[98]	valid_0's l2: 16039.3	valid_0's SMAPE: 6.35271
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2467
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [I

## 32

In [95]:
lgbm_evaluation(train_ft_32, test_ft_32)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 9868.925245
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[29]	valid_0's l2: 2403.26	valid_0's SMAPE: 0.387629
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 9858.762259
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[38]	valid_0's l2: 1773.17	valid_0's SMAPE: 0.317571
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM]

## 33

In [96]:
lgbm_evaluation(train_ft_33, test_ft_33)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 8697.570778
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[28]	valid_0's l2: 2061.49	valid_0's SMAPE: 0.433229
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 8688.367400
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[97]	valid_0's l2: 1869.7	valid_0's SMAPE: 0.407886
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] 

## 34

In [97]:
lgbm_evaluation(train_ft_34, test_ft_34)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2461
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 3352.312791
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[29]	valid_0's l2: 1237.55	valid_0's SMAPE: 0.833209
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2461
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 3345.405000
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[33]	valid_0's l2: 4933.46	valid_0's SMAPE: 0.930291
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2461
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM]

## 35

In [98]:
lgbm_evaluation(train_ft_35, test_ft_35)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2460
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 2172.289286
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[18]	valid_0's l2: 172.31	valid_0's SMAPE: 0.500337
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2460
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 2169.637176
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[94]	valid_0's l2: 324.773	valid_0's SMAPE: 0.604384
You can set `force_row_wise=true` to remove the overhead.
And if me

## 36

In [99]:
lgbm_evaluation(train_ft_36, test_ft_36)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2460
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 2894.787208
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[21]	valid_0's l2: 399.803	valid_0's SMAPE: 0.528136
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2460
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 2897.014091
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[98]	valid_0's l2: 1482.56	valid_0's SMAPE: 1.06427
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2459
[LightGBM] [Info] Number of data points

## 37

In [100]:
lgbm_evaluation(train_ft_37, test_ft_37)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 3344.552433
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[45]	valid_0's l2: 22562.7	valid_0's SMAPE: 3.43113
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 3352.074186
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[97]	valid_0's l2: 41448.7	valid_0's SMAPE: 5.00896
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM]

## 38

In [101]:
lgbm_evaluation(train_ft_38, test_ft_38)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1807.249870
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[93]	valid_0's l2: 7105.58	valid_0's SMAPE: 4.59079
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1797.536104
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[97]	valid_0's l2: 11357.3	valid_0's SMAPE: 4.40263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM]

## 39

In [102]:
lgbm_evaluation(train_ft_39, test_ft_39)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2165.652988
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[41]	valid_0's l2: 3405.64	valid_0's SMAPE: 3.31323
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2179.524676
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[91]	valid_0's l2: 255373	valid_0's SMAPE: 15.968
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in

## 40

In [103]:
lgbm_evaluation(train_ft_40, test_ft_40)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2450
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1867.013313
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[71]	valid_0's l2: 44502.5	valid_0's SMAPE: 8.88968
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2450
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1879.347079
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[58]	valid_0's l2: 33560.5	valid_0's SMAPE: 10.4442
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2453
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [

## 41

In [104]:
lgbm_evaluation(train_ft_41, test_ft_41)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 18
[LightGBM] [Info] Start training from score 3249.501786
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's l2: 59669.6	valid_0's SMAPE: 5.34629
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 18
[LightGBM] [Info] Start training from score 3262.745942
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's l2: 82554	valid_0's SMAPE: 6.7751
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points i

## 42

In [105]:
lgbm_evaluation(train_ft_42, test_ft_42)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1588.186168
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[48]	valid_0's l2: 5701.05	valid_0's SMAPE: 2.84188
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1586.134480
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[95]	valid_0's l2: 3465.61	valid_0's SMAPE: 2.66497
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points 

## 43

In [106]:
lgbm_evaluation(train_ft_43, test_ft_43)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2450
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1888.820134
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[40]	valid_0's l2: 11243.3	valid_0's SMAPE: 6.14965
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2450
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1876.801952
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[65]	valid_0's l2: 9899.68	valid_0's SMAPE: 4.5532
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2452
[LightGBM] [Info] Number of data points i

## 44

In [107]:
lgbm_evaluation(train_ft_44, test_ft_44)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2451
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2116.943117
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[60]	valid_0's l2: 29647.9	valid_0's SMAPE: 5.75042
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2451
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2106.487013
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[39]	valid_0's l2: 21477.4	valid_0's SMAPE: 5.86344
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2453
[LightGBM] [Info] Number of data points 

## 45

In [108]:
train_ft_45.head()

Unnamed: 0,energy_consum,time,day,month,week,sin_time,cos_time,temp_RM,wind_RM,hum_RM,temp_EMA,wind_EMA,hum_EMA,THI,feels_temp,day_hour_mean,periodic_avg_power_change_rate,periodic_avg_power_change_rate_6,weekend
88704,1967.52,0,3,6,22,0.0,1.0,24.15,3.183333,39.666667,22.02795,2.485679,47.470422,69.672872,28.818026,2347.08,0.022423,0.004932,0
88705,1945.92,1,3,6,22,0.258819,0.965926,23.258333,2.9,43.583333,21.346727,2.226344,50.782665,68.942223,27.987268,2347.56,0.02616,0.005352,0
88706,1918.08,2,3,6,22,0.5,0.866025,22.325,2.583333,47.916667,20.708769,1.991522,53.893024,68.121589,27.080175,2324.88,0.02563,0.005576,0
88707,1906.08,3,3,6,22,0.707107,0.707107,21.4,2.4,52.583333,20.168959,2.023595,57.140251,67.254888,26.152269,2302.52,0.023099,0.005132,0
88708,1941.6,4,3,6,22,0.866025,0.5,20.533333,2.133333,57.0,19.712196,2.019965,60.810982,66.36796,25.248724,2375.88,0.025552,0.006059,0


In [109]:
lgbm_evaluation(train_ft_45, test_ft_45)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2949.688569
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[69]	valid_0's l2: 23180.9	valid_0's SMAPE: 3.33035
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2467
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2944.540778
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[46]	valid_0's l2: 10582.5	valid_0's SMAPE: 2.61807
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [

## 46

In [110]:
lgbm_evaluation(train_ft_46, test_ft_46)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2250.695714
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[16]	valid_0's l2: 49426.3	valid_0's SMAPE: 6.617
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2233.316753
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[78]	valid_0's l2: 27842.4	valid_0's SMAPE: 5.14283
You can set `force_row_wise=true` to remove the overhead.
And if memor

## 47

In [111]:
lgbm_evaluation(train_ft_47, test_ft_47)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 5394.258571
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[36]	valid_0's l2: 252801	valid_0's SMAPE: 6.55892
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 5347.210517
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[38]	valid_0's l2: 98251.2	valid_0's SMAPE: 4.72251
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2463
[LightGBM] [Info] Number of data points i

## 48

In [112]:
lgbm_evaluation(train_ft_48, test_ft_48)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2448
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1881.031364
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[17]	valid_0's l2: 19738.7	valid_0's SMAPE: 4.4601
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2448
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1870.853377
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's l2: 13095.7	valid_0's SMAPE: 3.44032
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2453
[LightGBM] [Info] Number of data points 

## 49

In [113]:
lgbm_evaluation(train_ft_49, test_ft_49)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2449
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 3044.299092
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[18]	valid_0's l2: 27042.1	valid_0's SMAPE: 4.04232
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2449
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 3042.842924
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[97]	valid_0's l2: 12787	valid_0's SMAPE: 2.33053
You can set `force_row_wise=true` to remove the overhead.
And if memor

## 50

In [114]:
lgbm_evaluation(train_ft_50, test_ft_50)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 3021.826135
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[18]	valid_0's l2: 28862.4	valid_0's SMAPE: 3.51618
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2467
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 3010.586038
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[97]	valid_0's l2: 11184.6	valid_0's SMAPE: 2.50552
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM]

## 51

In [115]:
lgbm_evaluation(train_ft_51, test_ft_51)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2642.552436
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[15]	valid_0's l2: 45259.5	valid_0's SMAPE: 4.83213
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2622.146396
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[90]	valid_0's l2: 23389.7	valid_0's SMAPE: 4.72518
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM]

## 52

In [116]:
lgbm_evaluation(train_ft_52, test_ft_52)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2107.180032
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[41]	valid_0's l2: 50659.4	valid_0's SMAPE: 4.59791
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2103.300162
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[43]	valid_0's l2: 6175.45	valid_0's SMAPE: 2.77513
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM]

## 53

In [117]:
lgbm_evaluation(train_ft_53, test_ft_53)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2027.086883
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[15]	valid_0's l2: 445648	valid_0's SMAPE: 26.5761
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2007.915714
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[37]	valid_0's l2: 58651.9	valid_0's SMAPE: 12.0938
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] 

## 54

In [118]:
lgbm_evaluation(train_ft_54, test_ft_54)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2461
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1837.467956
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[13]	valid_0's l2: 286906	valid_0's SMAPE: 29.2834
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2459
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1802.135359
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[63]	valid_0's l2: 70829.1	valid_0's SMAPE: 12.1464
You can set `force_row_wise=true` to remove the overhead.
And if memo

## 55

In [119]:
lgbm_evaluation(train_ft_55, test_ft_55)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2446
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1009.989092
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[14]	valid_0's l2: 1788.12	valid_0's SMAPE: 2.93432
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2446
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1007.768767
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[59]	valid_0's l2: 365.561	valid_0's SMAPE: 1.36078
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM]

## 56

In [120]:
lgbm_evaluation(train_ft_56, test_ft_56)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2467
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 4175.565195
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[21]	valid_0's l2: 9507.13	valid_0's SMAPE: 1.46467
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2467
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 4171.416428
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[58]	valid_0's l2: 1232.78	valid_0's SMAPE: 0.650607
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM

## 57

In [121]:
lgbm_evaluation(train_ft_57, test_ft_57)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1908.325616
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[34]	valid_0's l2: 54875.7	valid_0's SMAPE: 8.33689
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1898.926070
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[43]	valid_0's l2: 21450.8	valid_0's SMAPE: 5.51817
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM]

## 58

In [122]:
lgbm_evaluation(train_ft_58, test_ft_58)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2460
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2882.060777
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[31]	valid_0's l2: 1213.32	valid_0's SMAPE: 0.686967
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2460
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2879.768829
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[55]	valid_0's l2: 266.47	valid_0's SMAPE: 0.428916
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM

## 59

In [123]:
lgbm_evaluation(train_ft_59, test_ft_59)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1679.564903
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[14]	valid_0's l2: 374710	valid_0's SMAPE: 26.6675
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1656.962046
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[58]	valid_0's l2: 24344.4	valid_0's SMAPE: 5.8697
You can set `force_row_wise=true` to remove the overhead.
And if memor

## 60

In [124]:
lgbm_evaluation(train_ft_60, test_ft_60)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2944.016363
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[96]	valid_0's l2: 174135	valid_0's SMAPE: 8.93373
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2934.906234
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[44]	valid_0's l2: 25402.6	valid_0's SMAPE: 4.18082
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points i

## 61

In [125]:
lgbm_evaluation(train_ft_61, test_ft_61)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 3111.931166
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[30]	valid_0's l2: 48419.5	valid_0's SMAPE: 4.62865
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 3068.017595
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[61]	valid_0's l2: 38238.6	valid_0's SMAPE: 4.01154
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM]

## 62

In [126]:
lgbm_evaluation(train_ft_62, test_ft_62)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1146.101592
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[27]	valid_0's l2: 7002.02	valid_0's SMAPE: 5.05931
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1127.724107
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[99]	valid_0's l2: 6814.32	valid_0's SMAPE: 4.28882
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points 

## 63

In [127]:
lgbm_evaluation(train_ft_63, test_ft_63)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 751.185000
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[22]	valid_0's l2: 3115.92	valid_0's SMAPE: 5.68993
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 735.611201
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[88]	valid_0's l2: 7601.13	valid_0's SMAPE: 7.37746
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in

## 64

In [128]:
lgbm_evaluation(train_ft_64, test_ft_64)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1182.998474
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[41]	valid_0's l2: 3325.2	valid_0's SMAPE: 3.36835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1163.526721
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[78]	valid_0's l2: 7758.78	valid_0's SMAPE: 4.74714
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2461
[LightGBM] [Info] Number of data points i

## 65

In [129]:
lgbm_evaluation(train_ft_65, test_ft_65)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2450
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 561.428669
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[79]	valid_0's l2: 8343.94	valid_0's SMAPE: 8.54419
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2450
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 554.687436
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's l2: 9196.23	valid_0's SMAPE: 8.58392
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2453
[LightGBM] [Info] Number of data points i

## 66

In [130]:
lgbm_evaluation(train_ft_66, test_ft_66)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2470
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 395.685633
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[34]	valid_0's l2: 416.748	valid_0's SMAPE: 3.36644
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2470
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 391.607678
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[96]	valid_0's l2: 806.346	valid_0's SMAPE: 4.80195
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2470
[LightGBM] [Info] Number of data points in

## 67

In [131]:
lgbm_evaluation(train_ft_67, test_ft_67)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2460
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1263.213052
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[99]	valid_0's l2: 14509.4	valid_0's SMAPE: 5.78534
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2457
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1246.471623
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[97]	valid_0's l2: 32934.2	valid_0's SMAPE: 8.18329
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2457
[LightGBM] [Info] Number of data points 

## 68

In [132]:
lgbm_evaluation(train_ft_68, test_ft_68)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2021.100486
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's l2: 14050.8	valid_0's SMAPE: 3.57469
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1990.819318
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[76]	valid_0's l2: 22190	valid_0's SMAPE: 4.33683
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] 

## 69

In [133]:
lgbm_evaluation(train_ft_69, test_ft_69)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 4504.748213
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[14]	valid_0's l2: 406158	valid_0's SMAPE: 8.73394
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 4471.055031
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[60]	valid_0's l2: 59291.6	valid_0's SMAPE: 3.45898
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] 

## 70

In [134]:
lgbm_evaluation(train_ft_70, test_ft_70)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 4306.344575
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[16]	valid_0's l2: 617318	valid_0's SMAPE: 10.091
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 4318.768082
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[12]	valid_0's l2: 400063	valid_0's SMAPE: 13.7239
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [I

## 71

In [135]:
lgbm_evaluation(train_ft_71, test_ft_71)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2145.009223
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[13]	valid_0's l2: 79636	valid_0's SMAPE: 7.69494
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2131.211431
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[52]	valid_0's l2: 35087.6	valid_0's SMAPE: 3.85881
You can set `force_row_wise=true` to remove the overhead.
And if memor

## 72

In [136]:
lgbm_evaluation(train_ft_72, test_ft_72)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2450
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1277.863345
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[10]	valid_0's l2: 20470.1	valid_0's SMAPE: 9.14515
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2450
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1266.322014
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[55]	valid_0's l2: 19801.3	valid_0's SMAPE: 9.54189
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM]

## 73

In [137]:
lgbm_evaluation(train_ft_73, test_ft_73)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 3622.370908
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[18]	valid_0's l2: 67361.1	valid_0's SMAPE: 4.88945
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 3610.441036
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[26]	valid_0's l2: 33237.6	valid_0's SMAPE: 3.7861
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points i

## 74

In [138]:
lgbm_evaluation(train_ft_74, test_ft_74)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 4054.126759
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[13]	valid_0's l2: 983787	valid_0's SMAPE: 16.6492
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 4017.511564
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[87]	valid_0's l2: 109921	valid_0's SMAPE: 5.44763
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [

## 75

In [139]:
lgbm_evaluation(train_ft_75, test_ft_75)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1404.754285
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[18]	valid_0's l2: 11834	valid_0's SMAPE: 4.54197
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1400.358116
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[42]	valid_0's l2: 5816.43	valid_0's SMAPE: 3.57561
You can set `force_row_wise=true` to remove the overhead.
And if memor

## 76

In [140]:
lgbm_evaluation(train_ft_76, test_ft_76)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1200.872533
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[16]	valid_0's l2: 15950.6	valid_0's SMAPE: 5.78631
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1196.835878
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[60]	valid_0's l2: 4886.31	valid_0's SMAPE: 4.12813
You can set `force_row_wise=true` to remove the overhead.
And if mem

## 77

In [141]:
lgbm_evaluation(train_ft_77, test_ft_77)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1491.751950
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[78]	valid_0's l2: 54970.3	valid_0's SMAPE: 5.81035
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1482.949872
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[32]	valid_0's l2: 3325.67	valid_0's SMAPE: 2.77178
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM]

## 78

In [142]:
lgbm_evaluation(train_ft_78, test_ft_78)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1657.676492
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[17]	valid_0's l2: 150435	valid_0's SMAPE: 12.3398
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1643.926103
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[64]	valid_0's l2: 7932.21	valid_0's SMAPE: 3.48752
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2463
[LightGBM] [Info] Number of data points i

## 79

In [143]:
lgbm_evaluation(train_ft_79, test_ft_79)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2909.921687
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[58]	valid_0's l2: 156939	valid_0's SMAPE: 6.0808
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2889.230517
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[89]	valid_0's l2: 10225.2	valid_0's SMAPE: 2.831
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [In

## 80

In [144]:
lgbm_evaluation(train_ft_80, test_ft_80)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2575.399284
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[16]	valid_0's l2: 671253	valid_0's SMAPE: 18.4569
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2542.969284
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[44]	valid_0's l2: 157260	valid_0's SMAPE: 6.53578
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [

## 81

In [145]:
lgbm_evaluation(train_ft_81, test_ft_81)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 1440.649675
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[56]	valid_0's l2: 4546.43	valid_0's SMAPE: 3.39425
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 1435.189674
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[73]	valid_0's l2: 4603.07	valid_0's SMAPE: 2.91732
You can set `force_row_wise=true` to remove the overhead.
And if mem

## 82

In [146]:
lgbm_evaluation(train_ft_82, test_ft_82)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2299.154026
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[15]	valid_0's l2: 301549	valid_0's SMAPE: 14.5586
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 2269.918051
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[51]	valid_0's l2: 285991	valid_0's SMAPE: 9.7293
You can set `force_row_wise=true` to remove the overhead.
And if memory

## 83

In [147]:
lgbm_evaluation(train_ft_83, test_ft_83)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1456.938799
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[13]	valid_0's l2: 229430	valid_0's SMAPE: 20.2936
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1443.528734
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[65]	valid_0's l2: 3852.79	valid_0's SMAPE: 2.68329
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [I

## 84

In [148]:
lgbm_evaluation(train_ft_84, test_ft_84)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1731.645974
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[16]	valid_0's l2: 154710	valid_0's SMAPE: 13.0312
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1712.984157
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[78]	valid_0's l2: 5838.28	valid_0's SMAPE: 2.95354
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] 

## 85

In [149]:
lgbm_evaluation(train_ft_85, test_ft_85)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 2251.853050
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[35]	valid_0's l2: 6519.68	valid_0's SMAPE: 2.70669
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 2244.753180
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[85]	valid_0's l2: 9320.71	valid_0's SMAPE: 3.22579
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [

## 86

In [150]:
lgbm_evaluation(train_ft_86, test_ft_86)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1143.247401
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[47]	valid_0's l2: 2416.44	valid_0's SMAPE: 4.24177
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1147.249577
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[51]	valid_0's l2: 2135.58	valid_0's SMAPE: 3.48661
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM]

## 87

In [151]:
lgbm_evaluation(train_ft_87, test_ft_87)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1280.028459
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[42]	valid_0's l2: 26370.4	valid_0's SMAPE: 9.7732
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2469
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1270.483686
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[46]	valid_0's l2: 22801.5	valid_0's SMAPE: 9.78611
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2463
[LightGBM] [Info] Number of data points i

## 88

In [152]:
lgbm_evaluation(train_ft_88, test_ft_88)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1199.688361
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's l2: 2996.71	valid_0's SMAPE: 3.48714
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1184.698979
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[53]	valid_0's l2: 2240.7	valid_0's SMAPE: 2.57726
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2463
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [

## 89

In [153]:
lgbm_evaluation(train_ft_89, test_ft_89)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1438.431557
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[51]	valid_0's l2: 7415.62	valid_0's SMAPE: 5.1103
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1419.177662
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[79]	valid_0's l2: 7702.76	valid_0's SMAPE: 4.08759
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2461
[LightGBM] [Info] Number of data points i

## 90

In [154]:
lgbm_evaluation(train_ft_90, test_ft_90)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2468
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1385.970911
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[58]	valid_0's l2: 16907.4	valid_0's SMAPE: 6.51052
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2463
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1370.530651
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[61]	valid_0's l2: 7423.39	valid_0's SMAPE: 4.44778
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2463
[LightGBM] [Info] Number of data points 

## 91

In [155]:
lgbm_evaluation(train_ft_91, test_ft_91)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2449
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1324.893375
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[65]	valid_0's l2: 10513.9	valid_0's SMAPE: 5.90566
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2449
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1317.362986
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[53]	valid_0's l2: 5606.3	valid_0's SMAPE: 4.996
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2452
[LightGBM] [Info] Number of data points in 

## 92

In [156]:
lgbm_evaluation(train_ft_92, test_ft_92)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1290.860164
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[36]	valid_0's l2: 5600.53	valid_0's SMAPE: 4.25135
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 17
[LightGBM] [Info] Start training from score 1281.517014
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[34]	valid_0's l2: 5618.71	valid_0's SMAPE: 4.2337
You can set `force_row_wise=true` to remove the overhead.
And if memo

## 93

In [157]:
lgbm_evaluation(train_ft_93, test_ft_93)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2463
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 1101.575649
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[21]	valid_0's l2: 1853.88	valid_0's SMAPE: 3.01175
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2463
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 1097.744416
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[78]	valid_0's l2: 3238.38	valid_0's SMAPE: 3.77889
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM]

## 94

In [158]:
lgbm_evaluation(train_ft_94, test_ft_94)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 2447.989479
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[26]	valid_0's l2: 58800.3	valid_0's SMAPE: 7.12035
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 2433.491427
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[56]	valid_0's l2: 28765.8	valid_0's SMAPE: 5.36665
You can set `force_row_wise=true` to remove the overhead.
And if mem

## 95

In [159]:
lgbm_evaluation(train_ft_95, test_ft_95)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 1100.237435
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[37]	valid_0's l2: 48871.4	valid_0's SMAPE: 14.2512
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2460
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 1096.358863
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[27]	valid_0's l2: 33961.2	valid_0's SMAPE: 12.115
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2461
[LightGBM] [Info] Number of data points i

## 96

In [160]:
lgbm_evaluation(train_ft_96, test_ft_96)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2458
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 2790.737240
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[42]	valid_0's l2: 14543.7	valid_0's SMAPE: 2.93326
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2455
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 2781.333604
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[41]	valid_0's l2: 19454	valid_0's SMAPE: 3.20043
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2455
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [In

## 97

In [161]:
lgbm_evaluation(train_ft_97, test_ft_97)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2467
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 1221.920357
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[57]	valid_0's l2: 5170.84	valid_0's SMAPE: 3.97603
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2467
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 1225.312305
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[92]	valid_0's l2: 7603.25	valid_0's SMAPE: 4.54818
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2466
[LightGBM] [Info] Number of data points 

## 98

In [162]:
lgbm_evaluation(train_ft_98, test_ft_98)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2464
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 1127.404383
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[28]	valid_0's l2: 19936.8	valid_0's SMAPE: 10.1922
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2462
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 1125.321234
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[21]	valid_0's l2: 10317.1	valid_0's SMAPE: 6.71592
You can set `force_row_wise=true` to remove the overhead.
And if mem

## 99

In [163]:
lgbm_evaluation(train_ft_99, test_ft_99)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2460
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 1154.328701
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[67]	valid_0's l2: 2715.01	valid_0's SMAPE: 3.75069
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2463
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 1146.199967
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's l2: 3036.5	valid_0's SMAPE: 3.218
You can set `force_row_wise=true` to remove the overhead.
And if memor

## 100

In [164]:
lgbm_evaluation(train_ft_100, test_ft_100)

  0%|          | 0/4 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2462
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 821.444156
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[53]	valid_0's l2: 2130.29	valid_0's SMAPE: 4.38246
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2461
[LightGBM] [Info] Number of data points in the train set: 1848, number of used features: 16
[LightGBM] [Info] Start training from score 817.313247
Training until validation scores don't improve for 120 rounds
Did not meet early stopping. Best iteration is:
[43]	valid_0's l2: 2202.76	valid_0's SMAPE: 3.75282
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [

# 제출

In [165]:
# submission = pd.read_csv(f"{DATA_PATH}sample_submission.csv")

In [166]:
len(lgbm_preds)

16800

In [167]:
train_ft.columns

Index(['energy_consum', 'building_num', 'date_time', 'temp', 'precip', 'wind',
       'hum', 'building_type', 'time', 'day', 'month', 'week', 'day_of_year',
       'sin_time', 'cos_time', 'temp_RM', 'wind_RM', 'hum_RM', 'temp_EMA',
       'wind_EMA', 'hum_EMA', 'THI', 'feels_temp', 'CDH', 'hour_mean',
       'day_hour_mean', 'hour_std', 'heat_index', 'heat_wave',
       'periodic_avg_power_change_rate', 'periodic_avg_power_change_rate_6'],
      dtype='object')

In [168]:
submission.iloc[:, 1] = lgbm_preds

In [169]:
submission.to_csv(f"{DATA_PATH}submission_lgbm_1.csv", index=False)

In [170]:
submission.to_csv("submission_lgbm_1.csv", index=False)

In [172]:
pd.DataFrame(smape_score_mean)

Unnamed: 0,0
0,6.313052
1,7.625302
2,13.409566
3,5.409537
4,6.438196
...,...
95,3.423667
96,4.570077
97,8.814490
98,3.212273


In [None]:
train_ft_1.columns