## Import

In [1]:
import os
import random
from time import time
from datetime import datetime

In [2]:
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
import numpy as np
from tqdm import tqdm
import joblib
import xgboost as xgb
from xgboost import XGBRegressor

## Fixed Random-Seed

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

## Load Data

In [4]:
train_df = pd.read_csv('data/train.csv') 
test_df = pd.read_csv('data/test.csv')
building_info = pd.read_csv('data/building_info.csv')

In [5]:
train_df.head()

Unnamed: 0,num_date_time,건물번호,일시,기온(C),강수량(mm),풍속(m/s),습도(%),일조(hr),일사(MJ/m2),전력소비량(kWh)
0,1_20220601 00,1,20220601 00,18.6,,0.9,42.0,,,1085.28
1,1_20220601 01,1,20220601 01,18.0,,1.1,45.0,,,1047.36
2,1_20220601 02,1,20220601 02,17.7,,1.5,45.0,,,974.88
3,1_20220601 03,1,20220601 03,16.7,,1.4,48.0,,,953.76
4,1_20220601 04,1,20220601 04,18.4,,2.8,43.0,,,986.4


In [6]:
test_df.head()

Unnamed: 0,num_date_time,건물번호,일시,기온(C),강수량(mm),풍속(m/s),습도(%)
0,1_20220825 00,1,20220825 00,23.5,0.0,2.2,72
1,1_20220825 01,1,20220825 01,23.0,0.0,0.9,72
2,1_20220825 02,1,20220825 02,22.7,0.0,1.5,75
3,1_20220825 03,1,20220825 03,22.1,0.0,1.3,78
4,1_20220825 04,1,20220825 04,21.8,0.0,1.0,77


In [7]:
building_info.head()

Unnamed: 0,건물번호,건물유형,연면적(m2),냉방면적(m2),태양광용량(kW),ESS저장용량(kWh),PCS용량(kW)
0,1,건물기타,110634.0,39570.0,-,-,-
1,2,건물기타,122233.47,99000.0,-,-,-
2,3,건물기타,171243.0,113950.0,40,-,-
3,4,건물기타,74312.98,34419.62,60,-,-
4,5,건물기타,205884.0,150000.0,-,2557,1000


## Train & Test Data Pre-Processing

### (0) 한글 -> 영어로 번역

#### `train_df`

In [8]:
train_df = train_df.rename(columns={
    '건물번호': 'building_number',
    '일시': 'date_time',
    '기온(C)': 'temperature',
    '강수량(mm)': 'rainfall',
    '풍속(m/s)': 'windspeed',
    '습도(%)': 'humidity',
    '일조(hr)': 'sunshine',
    '일사(MJ/m2)': 'solar_radiation',
    '전력소비량(kWh)': 'power_consumption'
})

In [9]:
train_df.head()

Unnamed: 0,num_date_time,building_number,date_time,temperature,rainfall,windspeed,humidity,sunshine,solar_radiation,power_consumption
0,1_20220601 00,1,20220601 00,18.6,,0.9,42.0,,,1085.28
1,1_20220601 01,1,20220601 01,18.0,,1.1,45.0,,,1047.36
2,1_20220601 02,1,20220601 02,17.7,,1.5,45.0,,,974.88
3,1_20220601 03,1,20220601 03,16.7,,1.4,48.0,,,953.76
4,1_20220601 04,1,20220601 04,18.4,,2.8,43.0,,,986.4


#### `test_df`

In [10]:
test_df = test_df.rename(columns={
    '건물번호': 'building_number',
    '일시': 'date_time',
    '기온(C)': 'temperature',
    '강수량(mm)': 'rainfall',
    '풍속(m/s)': 'windspeed',
    '습도(%)': 'humidity',
    '일조(hr)': 'sunshine',
    '일사(MJ/m2)': 'solar_radiation',
    '전력소비량(kWh)': 'power_consumption'
})

In [11]:
test_df.head()

Unnamed: 0,num_date_time,building_number,date_time,temperature,rainfall,windspeed,humidity
0,1_20220825 00,1,20220825 00,23.5,0.0,2.2,72
1,1_20220825 01,1,20220825 01,23.0,0.0,0.9,72
2,1_20220825 02,1,20220825 02,22.7,0.0,1.5,75
3,1_20220825 03,1,20220825 03,22.1,0.0,1.3,78
4,1_20220825 04,1,20220825 04,21.8,0.0,1.0,77


#### `building_info`

In [12]:
building_info = building_info.rename(columns={
    '건물번호': 'building_number',
    '건물유형': 'building_type',
    '연면적(m2)': 'total_area',
    '냉방면적(m2)': 'cooling_area',
    '태양광용량(kW)': 'solar_power_capacity',
    'ESS저장용량(kWh)': 'ess_capacity',
    'PCS용량(kW)': 'pcs_capacity'
})

In [13]:
translation_dict = {
    '건물기타': 'Other Buildings',
    '공공': 'Public',
    '대학교': 'University',
    '데이터센터': 'Data Center',
    '백화점및아울렛': 'Department Store and Outlet',
    '병원': 'Hospital',
    '상용': 'Commercial',
    '아파트': 'Apartment',
    '연구소': 'Research Institute',
    '지식산업센터': 'Knowledge Industry Center',
    '할인마트': 'Discount Mart',
    '호텔및리조트': 'Hotel and Resort'
}

building_info['building_type'] = building_info['building_type'].replace(translation_dict)

In [14]:
building_info

Unnamed: 0,building_number,building_type,total_area,cooling_area,solar_power_capacity,ess_capacity,pcs_capacity
0,1,Other Buildings,110634.00,39570.00,-,-,-
1,2,Other Buildings,122233.47,99000.00,-,-,-
2,3,Other Buildings,171243.00,113950.00,40,-,-
3,4,Other Buildings,74312.98,34419.62,60,-,-
4,5,Other Buildings,205884.00,150000.00,-,2557,1000
...,...,...,...,...,...,...,...
95,96,Hotel and Resort,93314.00,60500.00,-,-,-
96,97,Hotel and Resort,55144.67,25880.00,-,-,-
97,98,Hotel and Resort,53578.62,17373.75,-,-,-
98,99,Hotel and Resort,53499.00,40636.00,-,-,-


### (1) `building_info`를 `train_df`와 `test_df`에 각각 병합 (기준 : `building_number`(건물번호))

In [15]:
train_df = pd.merge(train_df, building_info, on='building_number', how='left')
test_df = pd.merge(test_df, building_info, on='building_number', how='left')

In [16]:
train_df.head()

Unnamed: 0,num_date_time,building_number,date_time,temperature,rainfall,windspeed,humidity,sunshine,solar_radiation,power_consumption,building_type,total_area,cooling_area,solar_power_capacity,ess_capacity,pcs_capacity
0,1_20220601 00,1,20220601 00,18.6,,0.9,42.0,,,1085.28,Other Buildings,110634.0,39570.0,-,-,-
1,1_20220601 01,1,20220601 01,18.0,,1.1,45.0,,,1047.36,Other Buildings,110634.0,39570.0,-,-,-
2,1_20220601 02,1,20220601 02,17.7,,1.5,45.0,,,974.88,Other Buildings,110634.0,39570.0,-,-,-
3,1_20220601 03,1,20220601 03,16.7,,1.4,48.0,,,953.76,Other Buildings,110634.0,39570.0,-,-,-
4,1_20220601 04,1,20220601 04,18.4,,2.8,43.0,,,986.4,Other Buildings,110634.0,39570.0,-,-,-


### (2) 결측치 처리

In [17]:
len(train_df)

204000

In [18]:
train_df.isna().sum()

num_date_time                0
building_number              0
date_time                    0
temperature                  0
rainfall                160069
windspeed                   19
humidity                     9
sunshine                 75182
solar_radiation          87913
power_consumption            0
building_type                0
total_area                   0
cooling_area                 0
solar_power_capacity         0
ess_capacity                 0
pcs_capacity                 0
dtype: int64

In [19]:
print(len(train_df[train_df['solar_power_capacity'] == '-']))
print(len(train_df[train_df['ess_capacity'] == '-']))
print(len(train_df[train_df['pcs_capacity'] == '-']))

130560
193800
193800


In [20]:
test_df.isna().sum()

num_date_time           0
building_number         0
date_time               0
temperature             0
rainfall                0
windspeed               0
humidity                0
building_type           0
total_area              0
cooling_area            0
solar_power_capacity    0
ess_capacity            0
pcs_capacity            0
dtype: int64

- `rainfall`(강수량)을 제외하고, 결측치가 있거나, `"-"`로 표기된 부분은 해당 데이터가 0이었기 때문이라고 추정.
    - `num_date_time` 데이터는 유의미한 데이터가 아니라고 판단하여 제외하여 사용하기 위해 drop 시킴
    - 결측치와 `"-"`값을 0으로 바꿔줌

#### `train_df`

In [21]:
# train_df.drop('rainfall', axis = 1, inplace=True)
train_df.drop('num_date_time', axis = 1, inplace=True)

In [22]:
train_df.fillna(0, inplace=True)
train_df.replace("-", 0, inplace=True)

In [23]:
train_df.head()

Unnamed: 0,building_number,date_time,temperature,rainfall,windspeed,humidity,sunshine,solar_radiation,power_consumption,building_type,total_area,cooling_area,solar_power_capacity,ess_capacity,pcs_capacity
0,1,20220601 00,18.6,0.0,0.9,42.0,0.0,0.0,1085.28,Other Buildings,110634.0,39570.0,0,0,0
1,1,20220601 01,18.0,0.0,1.1,45.0,0.0,0.0,1047.36,Other Buildings,110634.0,39570.0,0,0,0
2,1,20220601 02,17.7,0.0,1.5,45.0,0.0,0.0,974.88,Other Buildings,110634.0,39570.0,0,0,0
3,1,20220601 03,16.7,0.0,1.4,48.0,0.0,0.0,953.76,Other Buildings,110634.0,39570.0,0,0,0
4,1,20220601 04,18.4,0.0,2.8,43.0,0.0,0.0,986.4,Other Buildings,110634.0,39570.0,0,0,0


#### `test_df`

In [24]:
# test_df.drop('rainfall', axis = 1, inplace=True)
test_df.drop('num_date_time', axis = 1, inplace=True)

In [25]:
test_df.fillna(0, inplace=True)
test_df.replace("-", 0, inplace=True)

In [26]:
test_df.head()

Unnamed: 0,building_number,date_time,temperature,rainfall,windspeed,humidity,building_type,total_area,cooling_area,solar_power_capacity,ess_capacity,pcs_capacity
0,1,20220825 00,23.5,0.0,2.2,72,Other Buildings,110634.0,39570.0,0,0,0
1,1,20220825 01,23.0,0.0,0.9,72,Other Buildings,110634.0,39570.0,0,0,0
2,1,20220825 02,22.7,0.0,1.5,75,Other Buildings,110634.0,39570.0,0,0,0
3,1,20220825 03,22.1,0.0,1.3,78,Other Buildings,110634.0,39570.0,0,0,0
4,1,20220825 04,21.8,0.0,1.0,77,Other Buildings,110634.0,39570.0,0,0,0


### (3) `"date_time"` 처리
`"week"`, `"month"`, `"dayofyear"`, `"dayofweek"`, `"hour"` 추출

In [27]:
train_df['date_time'] = pd.to_datetime(train_df['date_time'], format='%Y%m%d %H')

# date time feature 생성
train_df['hour'] = train_df['date_time'].dt.hour
train_df['dayofweek'] = train_df['date_time'].dt.dayofweek
train_df['dayofyear'] = train_df['date_time'].dt.dayofyear
train_df['month'] = train_df['date_time'].dt.month
train_df['week'] = train_df['date_time'].dt.isocalendar().week

In [28]:
test_df['date_time'] = pd.to_datetime(test_df['date_time'], format='%Y%m%d %H')

# date time feature 생성
test_df['hour'] = test_df['date_time'].dt.hour
test_df['dayofweek'] = test_df['date_time'].dt.dayofweek
test_df['dayofyear'] = test_df['date_time'].dt.dayofyear
test_df['month'] = test_df['date_time'].dt.month
test_df['week'] = test_df['date_time'].dt.isocalendar().week

#### (3)-(1) 휴일 및 공휴일 여부 구하기

In [29]:
# holiday_list = ["2022-06-01", "2022-06-06", "2022-08-15"] # 2022년 06월 01일부터 2022년 8월 31일까지 중 공휴일 및 국경일 리스트

In [30]:
# isHoliday_list = []

# for i in range(len(train_df)):
#     if train_df['dayofweek'][i] >= 5: # 토요일, 일요일
#         isHoliday_list.append(1)
#     elif str(train_df['date_time'][i])[:10] in holiday_list: # 공휴일, 국경일
#         isHoliday_list.append(1)
#     else:
#         isHoliday_list.append(0)
        
# train_df["holiday"] = isHoliday_list

In [31]:
# isHoliday_list = []

# for i in range(len(test_df)):
#     if test_df['dayofweek'][i] >= 5: # 토요일, 일요일
#         isHoliday_list.append(1)
#     elif str(test_df['date_time'][i])[:10] in holiday_list: # 공휴일, 국경일
#         isHoliday_list.append(1)
#     else:
#         isHoliday_list.append(0)
        
# test_df["holiday"] = isHoliday_list

#### (3)-(2) 순환성 추가 : Encoding Cyclical Features
참고 : https://www.kaggle.com/code/avanwyk/encoding-cyclical-features-for-deep-learning

##### `"hour"`데이터를 cos, sin으로 나타냄으로써 cyclical property 부여

In [32]:
train_df['hour_sin'] = np.sin(2 * np.pi * train_df["hour"]/24.0)
train_df['hour_cos'] = np.cos(2 * np.pi * train_df["hour"]/24.0)

In [33]:
test_df['hour_sin'] = np.sin(2 * np.pi * test_df["hour"]/24.0)
test_df['hour_cos'] = np.cos(2 * np.pi * test_df["hour"]/24.0)

#### (3)-(3) 사용하지 않는 열 삭제
- `"sunshine"`('일조(hr)'), `"solar_radiation"`('일사(MJ/m2)')는 test_df에 존재하지 않으므로 학습 데이터에서 제외함
- `"hour"` 데이터는 `"hour_cos"`, `"hour_sin"`으로 대체 가능하므로 삭제함

In [34]:
train_df.drop(columns=['sunshine', 'solar_radiation', 'hour'], inplace=True)

In [35]:
test_df.drop(columns=['hour'], inplace=True)

### (4) CDH(cooling degree hour, 냉방도일) feature 추가
- 냉방도일 : 기온이 실제 냉방 가동에 이르기까지의 시간적 오차

In [36]:
# https://dacon.io/competitions/official/235736/codeshare/2743?page=1&dtype=recent
def CDH(xs):
    ys = []
    for i in range(len(xs)):
        if i < 11:
            ys.append(np.sum(xs[:(i+1)]-26))
        else:
            ys.append(np.sum(xs[(i-11):(i+1)]-26))
    return np.array(ys)

In [37]:
# https://dacon.io/competitions/official/235736/codeshare/2743?page=1&dtype=recent
cdhs = np.array([])
for building_number in range(1, 101, 1):
    temp = train_df[train_df['building_number'] == building_number]
    cdh = CDH(temp['temperature'].values)
    cdhs = np.concatenate([cdhs, cdh])
train_df['CDH'] = cdhs

In [38]:
# https://dacon.io/competitions/official/235736/codeshare/2743?page=1&dtype=recent
cdhs = np.array([])
for building_number in range(1, 101, 1):
    temp = test_df[test_df['building_number'] == building_number]
    cdh = CDH(temp['temperature'].values)
    cdhs = np.concatenate([cdhs, cdh])
test_df['CDH'] = cdhs

In [39]:
test_df.head()

Unnamed: 0,building_number,date_time,temperature,rainfall,windspeed,humidity,building_type,total_area,cooling_area,solar_power_capacity,ess_capacity,pcs_capacity,dayofweek,dayofyear,month,week,hour_sin,hour_cos,CDH
0,1,2022-08-25 00:00:00,23.5,0.0,2.2,72,Other Buildings,110634.0,39570.0,0,0,0,3,237,8,34,0.0,1.0,-2.5
1,1,2022-08-25 01:00:00,23.0,0.0,0.9,72,Other Buildings,110634.0,39570.0,0,0,0,3,237,8,34,0.258819,0.965926,-5.5
2,1,2022-08-25 02:00:00,22.7,0.0,1.5,75,Other Buildings,110634.0,39570.0,0,0,0,3,237,8,34,0.5,0.866025,-8.8
3,1,2022-08-25 03:00:00,22.1,0.0,1.3,78,Other Buildings,110634.0,39570.0,0,0,0,3,237,8,34,0.707107,0.707107,-12.7
4,1,2022-08-25 04:00:00,21.8,0.0,1.0,77,Other Buildings,110634.0,39570.0,0,0,0,3,237,8,34,0.866025,0.5,-16.9


### (5) 불쾌지수(THI; Temperature-Humidity Index) feature 추가

In [40]:
train_df['THI'] = 9/5 * train_df['temperature'] - 0.55 * (1 - train_df['humidity'] / 100) * (9/5 * train_df['temperature'] - 26) + 32
# train_df['THI_level'] = pd.cut(train_df['THI'], bins = [0, 68, 75, 80, 200], labels = [1,2,3,4])

In [41]:
test_df['THI'] = 9/5 * test_df['temperature'] - 0.55 * (1 - test_df['humidity'] / 100) * (9/5 * test_df['temperature'] - 26) + 32
# test_df['THI_level'] = pd.cut(test_df['THI'], bins = [0, 68, 75, 80, 200], labels = [1,2,3,4])

<!-- 불쾌지수 수치 값(`"THI"`)을 사용하지 않고, **불쾌지수 단계**(`"THI_level"`)를 사용한다면, `"THI"` 데이터 열 삭제 -->

In [42]:
# train_df.drop(columns=['THI'], inplace=True)

In [43]:
# test_df.drop(columns=['THI'], inplace=True)

### (6) 마지막 일주일 데이터를 Validation Data로 사용

In [44]:
# validation_date_list = ["2022-08-24", "2022-08-23", "2022-08-22", "2022-08-21", "2022-08-20", "2022-08-19", "2022-08-18"]

In [45]:
# origin_train_df = train_df.copy()

In [46]:
# for i in tqdm(range(len(origin_train_df))):
#     if str(origin_train_df["date_time"][i])[:10] in validation_date_list:
#         train_df.drop([i], inplace=True)
#     else:
#         valid_df.drop([i], inplace=True)

In [47]:
# train_df

In [48]:
# train_index_list = []
# valid_index_list = []

# for i in tqdm(range(len(origin_train_df))):
#     if str(origin_train_df["date_time"][i])[:10] in validation_date_list:
#         valid_index_list.append(i)
#     else:
#         train_index_list.append(i)
        
# train_df = origin_train_df.drop(valid_index_list, inplace=False)
# valid_df = origin_train_df.drop(train_index_list, inplace=False)

In [49]:
# # 인덱스 재설정
# train_df.reset_index(drop=True, inplace=True)
# valid_df.reset_index(drop=True, inplace=True)

In [50]:
# len(train_df), len(valid_df)

### (6) One hot encoding 생성

#### (6)-(1) 건물번호 (`"building_number"`)

In [51]:
# train_df = pd.get_dummies(train_df, columns=['building_number'], drop_first=True)

In [52]:
# test_df = pd.get_dummies(test_df, columns=['building_number'], drop_first=True)

#### (6)-(2) 건물유형 (`"building_type"`)
- 건물유형마다 모델링을 한다면, `"building_type"`를 기준으로 **one hot encoding을 생성하지 않아도 됨**

In [53]:
train_df = pd.get_dummies(train_df, columns=['building_type'], drop_first=True)

In [54]:
test_df = pd.get_dummies(test_df, columns=['building_type'], drop_first=True)

In [55]:
# train_df.drop(columns=['building_type'], inplace=True)

In [56]:
# test_df.drop(columns=['building_type'], inplace=True)

### (7) 냉방 면적 대비 연면적 feature 추가 (AR : Area Ratio)

In [57]:
train_df['AR'] = train_df['cooling_area'] / train_df['total_area']

In [58]:
test_df['AR'] = test_df['cooling_area'] / test_df['total_area']

### (8) capacity 관련 열 삭제

In [59]:
# train_df.drop(columns=['solar_power_capacity', 'ess_capacity', 'pcs_capacity'], inplace=True)

In [60]:
# test_df.drop(columns=['solar_power_capacity', 'ess_capacity', 'pcs_capacity'], inplace=True)

In [61]:
# train_df['solar_power_capacity'] = train_df['solar_power_capacity'].astype(float)
# train_df['ess_capacity'] = train_df['ess_capacity'].astype(float)
# train_df['pcs_capacity'] = train_df['pcs_capacity'].astype(float)

In [62]:
# test_df['solar_power_capacity'] = test_df['solar_power_capacity'].astype(float)
# test_df['ess_capacity'] = test_df['ess_capacity'].astype(float)
# test_df['pcs_capacity'] = test_df['pcs_capacity'].astype(float)

### (8) month 열 삭제

In [63]:
train_df.drop(columns=['month'], inplace=True)

In [64]:
test_df.drop(columns=['month'], inplace=True)

In [65]:
train_df.head()

Unnamed: 0,building_number,date_time,temperature,rainfall,windspeed,humidity,power_consumption,total_area,cooling_area,solar_power_capacity,...,building_type_Department Store and Outlet,building_type_Discount Mart,building_type_Hospital,building_type_Hotel and Resort,building_type_Knowledge Industry Center,building_type_Other Buildings,building_type_Public,building_type_Research Institute,building_type_University,AR
0,1,2022-06-01 00:00:00,18.6,0.0,0.9,42.0,1085.28,110634.0,39570.0,0,...,0,0,0,0,0,1,0,0,0,0.357666
1,1,2022-06-01 01:00:00,18.0,0.0,1.1,45.0,1047.36,110634.0,39570.0,0,...,0,0,0,0,0,1,0,0,0,0.357666
2,1,2022-06-01 02:00:00,17.7,0.0,1.5,45.0,974.88,110634.0,39570.0,0,...,0,0,0,0,0,1,0,0,0,0.357666
3,1,2022-06-01 03:00:00,16.7,0.0,1.4,48.0,953.76,110634.0,39570.0,0,...,0,0,0,0,0,1,0,0,0,0.357666
4,1,2022-06-01 04:00:00,18.4,0.0,2.8,43.0,986.4,110634.0,39570.0,0,...,0,0,0,0,0,1,0,0,0,0.357666


## Regression Model Fit

In [66]:
train_df.drop(columns=['date_time'], inplace=True)

In [67]:
test_df.drop(columns=['date_time'], inplace=True)

In [68]:
train_df.drop(columns=['week'], inplace=True)

In [69]:
test_df.drop(columns=['week'], inplace=True)

In [70]:
train_x = train_df.drop(columns=['power_consumption'])
train_y = train_df['power_consumption']

### `rf` (Random Forest Regressor)

In [71]:
cell_start_time = time()

model = RandomForestRegressor()
model.fit(train_x, train_y)

cell_end_time = time()
print("CELL RUN TIME : ",cell_end_time - cell_start_time)

CELL RUN TIME :  193.03836297988892


### XGBoost

In [72]:
# cell_start_time = time()

# model = xgb.XGBRegressor()
# model.fit(train_x, train_y)

# cell_end_time = time()
# print("CELL RUN TIME : ",cell_end_time - cell_start_time)

## Model Prediction

In [73]:
test_x = test_df

In [74]:
preds = model.predict(test_x)

## Save Model

In [75]:
current_str = datetime.now().strftime('%Y-%m-%d-%H%M%S')
print(current_str)

2023-07-28-162950


In [76]:
# save
joblib.dump(model, f"./model/{current_str}_Random Forest.joblib")

['./model/2023-07-28-162950_Random Forest.joblib']

## Load Model

In [77]:
model = joblib.load(f"./model/{current_str}_Random Forest.joblib")

## Submit

In [78]:
submission = pd.read_csv('./data/sample_submission.csv')
submission

Unnamed: 0,num_date_time,answer
0,1_20220825 00,0
1,1_20220825 01,0
2,1_20220825 02,0
3,1_20220825 03,0
4,1_20220825 04,0
...,...,...
16795,100_20220831 19,0
16796,100_20220831 20,0
16797,100_20220831 21,0
16798,100_20220831 22,0


In [79]:
submission['answer'] = preds
submission

Unnamed: 0,num_date_time,answer
0,1_20220825 00,2142.3744
1,1_20220825 01,2139.8880
2,1_20220825 02,1995.8400
3,1_20220825 03,1992.7296
4,1_20220825 04,2026.3440
...,...,...
16795,100_20220831 19,861.2712
16796,100_20220831 20,783.0888
16797,100_20220831 21,757.4424
16798,100_20220831 22,687.0360


In [80]:
submission.to_csv(f'./{current_str}_Random Forest.csv', index=False)