In [1]:
import warnings
warnings.filterwarnings('ignore')

# 데이터 읽기를 위한 라이브러리
import numpy as np
np.random.seed(0)
import pandas as pd
import gc, os, time
import scipy as sp
from pandas import DataFrame, Series
from datetime import datetime, date, timedelta
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn import decomposition

# 탐색적 데이터 분석을 위한 라이브러리
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import skew, norm, probplot, boxcox

# 모델링을 위한 라이브러리
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_selection import mutual_info_regression, VarianceThreshold

import pickle

pd.set_option('display.max_columns',999)

from tqdm import tqdm
from sklearn.model_selection import TimeSeriesSplit

def RMSE(y, y_pred):
    rmse = mean_squared_error(y, y_pred) ** 0.5
    return rmse

In [2]:
# 학습용 데이터 
train_sensor = pd.read_csv('train_sensor.csv')
train_quality = pd.read_csv('train_quality.csv')
# 평가용 데이터 
predict_sensor = pd.read_csv('predict_sensor.csv')

In [3]:
def make_dataset(X, y=None):
    
    # -----------------------------------
    # train_sensor (X 인자)
    # -----------------------------------
    ''' column을 param_alias 로만 pivot table 만들기. '''
    df_X = X.copy()
    df_X = df_X.sort_values(by='end_time',ascending=True)
    df_X['step_id'] = df_X['step_id'].apply(lambda x: str(x).zfill(2))
    # step_id 와 param_alias 를 결합한 임시 컬럼 step_param 을 생성합니다. ex. 17_EPD_para4
    df_X['step_param'] = df_X[['step_id', 'param_alias']].apply(lambda x: '_'.join(x), axis=1)
    df_X_tmp = df_X.pivot_table(index = ['module_name','key_val'], columns = 'step_param', values='mean_val', aggfunc='sum')
    # 데이터 통합을 위해 인덱스를 key_val 로 재설정합니다. 
    df_X_tmp = df_X_tmp.reset_index(level=[0, 1])
    df_X_tmp.set_index('key_val', inplace=True)

    # -----------------------------------
    # 시간 데이터 
    # -----------------------------------
    ''' step별 end_time을 column으로 pivot table 만들기 '''
    df_X['end_time_tmp'] = df_X.apply(lambda x: x['step_id'] + '_end_time', axis=1)
    df_X['end_time'] = pd.to_datetime(df_X['end_time'])
    # end_time 은 센서 데이터가 각 para 별로 서버에 도달한 시간으로 스텝 내 오차가 발생할 수 있습니다. 동일 스텝 구간내 공정 완료 시간이 다른 경우, min 함수를 사용하여 최초 수집된 time을 가져옵니다.
    df_time_tmp = df_X.pivot_table(index = ['key_val'], columns = 'end_time_tmp', values='end_time', aggfunc=lambda x : min(x.unique()))
    df_time_tmp = df_time_tmp.reset_index()
    df_time_tmp.set_index('key_val', inplace=True)

    # -----------------------------------
    # train_quality (y 인자)
    # -----------------------------------

    if y is None : # 평가용 데이터 
        col_target = []
        col_idx = ['module_name', 'key_val']
        df_complete = pd.concat([df_X_tmp, df_time_tmp], axis=1).reset_index()
        df_complete.rename(columns={'index':'key_val'},inplace=True)
    else : # 학습용 데이터 
        df_y = y.copy()
        df_y.set_index('key_val', inplace=True)
        col_target = ['y']
        col_idx = ['module_name', 'key_val', 'end_dt_tm']
        # 센서 데이터, 시간데이터, 품질지표에 대하여 인덱스(key_val)기준으로 데이터프레임을 통합합니다.
        df_complete = pd.concat([df_X_tmp, df_time_tmp, df_y], axis=1).reset_index()
        # 컬럼 이름을 변경합니다.  
        df_complete.rename(columns={'msure_val':'y'}, inplace=True)
        df_complete.rename(columns={'index':'key_val'},inplace=True)


    # 컬럼 순서를 정렬합니다. 
    col_feats = df_X['step_param'].unique().tolist()
    col_feats.sort()
    col_time = [s for s in df_complete.columns.tolist() if "_end_time" in s]
    col_all = col_idx + col_target + col_feats + col_time
    df_complete = df_complete[col_all]
    # 처음 step이 시작된 시점을 기준으로 다시 정렬(APC value를 먹고 들어가는 값을 기준으로 정렬하고 싶었음.)
    df_complete = df_complete.set_index(['module_name','key_val','04_end_time']).sort_index(level=[0,2,1],ascending=True).reset_index()
    df_complete = df_complete[col_all]
    
    # 컬럼을 소문자로 변경합니다. 
    df_complete.columns = df_complete.columns.str.lower()

    return df_complete

# 학습용 데이터 
train = make_dataset(train_sensor, train_quality)
# 평가용 데이터 
predict = make_dataset(predict_sensor)

In [4]:
print(len(train.columns))
print(len(predict.columns))

676
674


In [5]:
# 전체 및 개별 공정 소요시간 변수를 생성하는 함수입니다.
def gen_duration_feats(df, lst_stepsgap):
    
    # 전체 공정 소요시간(초) 변수를 생성합니다. 
    df['gen_tmdiff'] = (df['20_end_time'] - df['04_end_time']).dt.total_seconds()
    
    # 개별 스텝간 공정 소요시간(초) 변수를 생성합니다. 
    # ex. gen_tmdiff_0406 : 04 스텝 공정 완료 시간과 06 스텝 공정 완료 시간의 차이 
    
    for stepgap in lst_stepsgap:
        df[f'gen_tmdiff_{stepgap}'] = (df[f'{stepgap[2:]}_end_time'] - df[f'{stepgap[:2]}_end_time']).dt.total_seconds()

    return df

# 4. 데이터 전처리

In [6]:
# 전처리를 위한 학습용 데이터와 평가용 데이터를 복사합니다.
df_train = train.copy()
df_predict = predict.copy()
del train

In [7]:
# -----------------------------------
# 3 장 EDA 분석에 필요한 변수를 선언합니다.
# -----------------------------------

# 센서 컬럼과 날짜 컬럼을 정의합니다. 
col_sensor = df_train.iloc[:, 4:-7].columns.tolist() 
col_time = df_train.filter(regex='end').columns.tolist() 

assert len(col_sensor) == 665
assert len(col_time) == 8 

# 3.4절 공정 소요시간 분석에 필요한 변수를 정의합니다. 
lst_steps = ['04','06','12','13','17','18', '20']
lst_stepsgap = ['0406','0612','1213','1317','1718','1820']

''' step별로 fdc para명 따로 수집 '''
lst_sensors = []
for step in lst_steps:
    _ = [col for col in col_sensor if col[:2] == step]
    lst_sensors.append(_)

sensors_nm = list(map(lambda x: x[3:], lst_sensors[0]))

# 시간과 관련한 분석을 진행하기 위하여 날짜형으로 변환합니다. 
df_train[col_time] = df_train[col_time].apply(pd.to_datetime)

In [8]:
for_col_filter = []
for step_para in lst_sensors:
    for para in step_para:
        para = para.split('_')[0]+'_'+para.split('_')[1]
        for_col_filter.append(para)
for_col_filter = sorted(list(set(for_col_filter)))

In [9]:
# 전체 및 개별 공정 소요시간 7개의 변수를 생성합니다(3.4절)
lst_stepsgap = ['0406','0612','1213','1317','1718','1820']
df_train = gen_duration_feats(df_train, lst_stepsgap)
df_predict = gen_duration_feats(df_predict, lst_stepsgap)
df_train.filter(regex='tmdiff').head(2)

Unnamed: 0,gen_tmdiff,gen_tmdiff_0406,gen_tmdiff_0612,gen_tmdiff_1213,gen_tmdiff_1317,gen_tmdiff_1718,gen_tmdiff_1820
0,1912.0,146.0,846.0,16.0,477.0,16.0,411.0
1,1911.0,145.0,847.0,16.0,476.0,16.0,411.0


# Cyclic Transformation 적용

In [10]:
''' Cyclic Transformation 적용 '''
def cyclic_transformation(df, cols):
    for col in cols:
        step = col[:2]
        df[col] = pd.to_datetime(df[col])
        df[step+'_'+'hour'] = df[col].dt.hour
        df[step+'_'+'month'] = df[col].dt.month
        df[step+'_'+'day'] = df[col].dt.day
        df[step+'_'+'weekday'] = df[col].dt.weekday
        
        ## cyclic transformation on hour
        df[step+'_'+'hour_sin'] = np.sin(2 * np.pi * df[step+'_'+'hour']/23.0)
        df[step+'_'+'hour_cos'] = np.cos(2 * np.pi * df[step+'_'+'hour']/23.0)
        ## cyclic transformation on date 
        df[step+'_'+'date_sin'] = -np.sin(2 * np.pi * (df[step+'_'+'month']+df[step+'_'+'day']/31)/12)
        df[step+'_'+'date_cos'] = -np.cos(2 * np.pi * (df[step+'_'+'month']+df[step+'_'+'day']/31)/12)
        ## cyclic transformation on month
        df[step+'_'+'month_sin'] = -np.sin(2 * np.pi * df[step+'_'+'month']/12.0)
        df[step+'_'+'month_cos'] = -np.cos(2 * np.pi * df[step+'_'+'month']/12.0)
        ## cyclic transformation on weekday
        df[step+'_'+'weekday_sin'] = -np.sin(2 * np.pi * (df[step+'_'+'weekday']+1)/7.0)
        df[step+'_'+'weekday_cos'] = -np.cos(2 * np.pi * (df[step+'_'+'weekday']+1)/7.0)
        
        df.drop(step+'_'+'month',axis=1,inplace=True)
        df.drop(step+'_'+'month_sin',axis=1,inplace=True)
        df.drop(step+'_'+'month_cos',axis=1,inplace=True)

In [11]:
endtime_col = df_train.filter(regex='end_time$').columns.tolist()
cyclic_transformation(df_train, endtime_col)
cyclic_transformation(df_predict, endtime_col)

In [12]:
df_train.head(1)

Unnamed: 0,module_name,key_val,end_dt_tm,y,04_efem_para2,04_efem_para25,04_efem_para78,04_epd_para4,04_epd_para40,04_epd_para63,04_epd_para80,04_esc_para84,04_esc_para94,04_fr_para28,04_fr_para35,04_fr_para61,04_fr_para69,04_gas_para10,04_gas_para13,04_gas_para15,04_gas_para19,04_gas_para21,04_gas_para26,04_gas_para27,04_gas_para33,04_gas_para36,04_gas_para39,04_gas_para46,04_gas_para48,04_gas_para50,04_gas_para51,04_gas_para52,04_gas_para59,04_gas_para6,04_gas_para70,04_gas_para71,04_gas_para73,04_gas_para74,04_gas_para85,04_he_para1,04_he_para22,04_he_para88,04_he_para95,04_hv_para3,04_hv_para45,04_hv_para47,04_hv_para56,04_position_para72,04_power_para14,04_power_para49,04_power_para57,04_power_para68,04_power_para76,04_power_para82,04_pressure_para91,04_temp_para11,04_temp_para12,04_temp_para17,04_temp_para18,04_temp_para20,04_temp_para23,04_temp_para24,04_temp_para32,04_temp_para38,04_temp_para53,04_temp_para54,04_temp_para55,04_temp_para58,04_temp_para60,04_temp_para65,04_temp_para66,04_temp_para79,04_temp_para86,04_temp_para87,04_temp_para92,04_temp_para93,04_time_para16,04_time_para29,04_time_para30,04_time_para34,04_time_para37,04_time_para41,04_time_para42,04_time_para43,04_time_para44,04_time_para5,04_time_para62,04_time_para64,04_time_para67,04_time_para7,04_time_para75,04_time_para77,04_time_para8,04_time_para81,04_time_para83,04_time_para89,04_time_para9,04_time_para90,04_tmp_para31,06_efem_para2,06_efem_para25,06_efem_para78,06_epd_para4,06_epd_para40,06_epd_para63,06_epd_para80,06_esc_para84,06_esc_para94,06_fr_para28,06_fr_para35,06_fr_para61,06_fr_para69,06_gas_para10,06_gas_para13,06_gas_para15,06_gas_para19,06_gas_para21,06_gas_para26,06_gas_para27,06_gas_para33,06_gas_para36,06_gas_para39,06_gas_para46,06_gas_para48,06_gas_para50,06_gas_para51,06_gas_para52,06_gas_para59,06_gas_para6,06_gas_para70,06_gas_para71,06_gas_para73,06_gas_para74,06_gas_para85,06_he_para1,06_he_para22,06_he_para88,06_he_para95,06_hv_para3,06_hv_para45,06_hv_para47,06_hv_para56,06_position_para72,06_power_para14,06_power_para49,06_power_para57,06_power_para68,06_power_para76,06_power_para82,06_pressure_para91,06_temp_para11,06_temp_para12,06_temp_para17,06_temp_para18,06_temp_para20,06_temp_para23,06_temp_para24,06_temp_para32,06_temp_para38,06_temp_para53,06_temp_para54,06_temp_para55,06_temp_para58,06_temp_para60,06_temp_para65,06_temp_para66,06_temp_para79,06_temp_para86,06_temp_para87,06_temp_para92,06_temp_para93,06_time_para16,06_time_para29,06_time_para30,06_time_para34,06_time_para37,06_time_para41,06_time_para42,06_time_para43,06_time_para44,06_time_para5,06_time_para62,06_time_para64,06_time_para67,06_time_para7,06_time_para75,06_time_para77,06_time_para8,06_time_para81,06_time_para83,06_time_para89,06_time_para9,06_time_para90,06_tmp_para31,12_efem_para2,12_efem_para25,12_efem_para78,12_epd_para4,12_epd_para40,12_epd_para63,12_epd_para80,12_esc_para84,12_esc_para94,12_fr_para28,12_fr_para35,12_fr_para61,12_fr_para69,12_gas_para10,12_gas_para13,12_gas_para15,12_gas_para19,12_gas_para21,12_gas_para26,12_gas_para27,12_gas_para33,12_gas_para36,12_gas_para39,12_gas_para46,12_gas_para48,12_gas_para50,12_gas_para51,12_gas_para52,12_gas_para59,12_gas_para6,12_gas_para70,12_gas_para71,12_gas_para73,12_gas_para74,12_gas_para85,12_he_para1,12_he_para22,12_he_para88,12_he_para95,12_hv_para3,12_hv_para45,12_hv_para47,12_hv_para56,12_position_para72,12_power_para14,12_power_para49,12_power_para57,12_power_para68,12_power_para76,12_power_para82,12_pressure_para91,12_temp_para11,12_temp_para12,12_temp_para17,12_temp_para18,12_temp_para20,12_temp_para23,12_temp_para24,12_temp_para32,12_temp_para38,12_temp_para53,12_temp_para54,12_temp_para55,12_temp_para58,12_temp_para60,12_temp_para65,12_temp_para66,12_temp_para79,12_temp_para86,12_temp_para87,12_temp_para92,12_temp_para93,12_time_para16,12_time_para29,12_time_para30,12_time_para34,12_time_para37,12_time_para41,12_time_para42,12_time_para43,12_time_para44,12_time_para5,12_time_para62,12_time_para64,12_time_para67,12_time_para7,12_time_para75,12_time_para77,12_time_para8,12_time_para81,12_time_para83,12_time_para89,12_time_para9,12_time_para90,12_tmp_para31,13_efem_para2,13_efem_para25,13_efem_para78,13_epd_para4,13_epd_para40,13_epd_para63,13_epd_para80,13_esc_para84,13_esc_para94,13_fr_para28,13_fr_para35,13_fr_para61,13_fr_para69,13_gas_para10,13_gas_para13,13_gas_para15,13_gas_para19,13_gas_para21,13_gas_para26,13_gas_para27,13_gas_para33,13_gas_para36,13_gas_para39,13_gas_para46,13_gas_para48,13_gas_para50,13_gas_para51,13_gas_para52,13_gas_para59,13_gas_para6,13_gas_para70,13_gas_para71,13_gas_para73,13_gas_para74,13_gas_para85,13_he_para1,13_he_para22,13_he_para88,13_he_para95,13_hv_para3,13_hv_para45,13_hv_para47,13_hv_para56,13_position_para72,13_power_para14,13_power_para49,13_power_para57,13_power_para68,13_power_para76,13_power_para82,13_pressure_para91,13_temp_para11,13_temp_para12,13_temp_para17,13_temp_para18,13_temp_para20,13_temp_para23,13_temp_para24,13_temp_para32,13_temp_para38,13_temp_para53,13_temp_para54,13_temp_para55,13_temp_para58,13_temp_para60,13_temp_para65,13_temp_para66,13_temp_para79,13_temp_para86,13_temp_para87,13_temp_para92,13_temp_para93,13_time_para16,13_time_para29,13_time_para30,13_time_para34,13_time_para37,13_time_para41,13_time_para42,13_time_para43,13_time_para44,13_time_para5,13_time_para62,13_time_para64,13_time_para67,13_time_para7,13_time_para75,13_time_para77,13_time_para8,13_time_para81,13_time_para83,13_time_para89,13_time_para9,13_time_para90,13_tmp_para31,17_efem_para2,17_efem_para25,17_efem_para78,17_epd_para4,17_epd_para40,17_epd_para63,17_epd_para80,17_esc_para84,17_esc_para94,17_fr_para28,17_fr_para35,17_fr_para61,17_fr_para69,17_gas_para10,17_gas_para13,17_gas_para15,17_gas_para19,17_gas_para21,17_gas_para26,17_gas_para27,17_gas_para33,17_gas_para36,17_gas_para39,17_gas_para46,17_gas_para48,17_gas_para50,17_gas_para51,17_gas_para52,17_gas_para59,17_gas_para6,17_gas_para70,17_gas_para71,17_gas_para73,17_gas_para74,17_gas_para85,17_he_para1,17_he_para22,17_he_para88,17_he_para95,17_hv_para3,17_hv_para45,17_hv_para47,17_hv_para56,17_position_para72,17_power_para14,17_power_para49,17_power_para57,17_power_para68,17_power_para76,17_power_para82,17_pressure_para91,17_temp_para11,17_temp_para12,17_temp_para17,17_temp_para18,17_temp_para20,17_temp_para23,17_temp_para24,17_temp_para32,17_temp_para38,17_temp_para53,17_temp_para54,17_temp_para55,17_temp_para58,17_temp_para60,17_temp_para65,17_temp_para66,17_temp_para79,17_temp_para86,17_temp_para87,17_temp_para92,17_temp_para93,17_time_para16,17_time_para29,17_time_para30,17_time_para34,17_time_para37,17_time_para41,17_time_para42,17_time_para43,17_time_para44,17_time_para5,17_time_para62,17_time_para64,17_time_para67,17_time_para7,17_time_para75,17_time_para77,17_time_para8,17_time_para81,17_time_para83,17_time_para89,17_time_para9,17_time_para90,17_tmp_para31,18_efem_para2,18_efem_para25,18_efem_para78,18_epd_para4,18_epd_para40,18_epd_para63,18_epd_para80,18_esc_para84,18_esc_para94,18_fr_para28,18_fr_para35,18_fr_para61,18_fr_para69,18_gas_para10,18_gas_para13,18_gas_para15,18_gas_para19,18_gas_para21,18_gas_para26,18_gas_para27,18_gas_para33,18_gas_para36,18_gas_para39,18_gas_para46,18_gas_para48,18_gas_para50,18_gas_para51,18_gas_para52,18_gas_para59,18_gas_para6,18_gas_para70,18_gas_para71,18_gas_para73,18_gas_para74,18_gas_para85,18_he_para1,18_he_para22,18_he_para88,18_he_para95,18_hv_para3,18_hv_para45,18_hv_para47,18_hv_para56,18_position_para72,18_power_para14,18_power_para49,18_power_para57,18_power_para68,18_power_para76,18_power_para82,18_pressure_para91,18_temp_para11,18_temp_para12,18_temp_para17,18_temp_para18,18_temp_para20,18_temp_para23,18_temp_para24,18_temp_para32,18_temp_para38,18_temp_para53,18_temp_para54,18_temp_para55,18_temp_para58,18_temp_para60,18_temp_para65,18_temp_para66,18_temp_para79,18_temp_para86,18_temp_para87,18_temp_para92,18_temp_para93,18_time_para16,18_time_para29,18_time_para30,18_time_para34,18_time_para37,18_time_para41,18_time_para42,18_time_para43,18_time_para44,18_time_para5,18_time_para62,18_time_para64,18_time_para67,18_time_para7,18_time_para75,18_time_para77,18_time_para8,18_time_para81,18_time_para83,18_time_para89,18_time_para9,18_time_para90,18_tmp_para31,20_efem_para2,20_efem_para25,20_efem_para78,20_epd_para4,20_epd_para40,20_epd_para63,20_epd_para80,20_esc_para84,20_esc_para94,20_fr_para28,20_fr_para35,20_fr_para61,20_fr_para69,20_gas_para10,20_gas_para13,20_gas_para15,20_gas_para19,20_gas_para21,20_gas_para26,20_gas_para27,20_gas_para33,20_gas_para36,20_gas_para39,20_gas_para46,20_gas_para48,20_gas_para50,20_gas_para51,20_gas_para52,20_gas_para59,20_gas_para6,20_gas_para70,20_gas_para71,20_gas_para73,20_gas_para74,20_gas_para85,20_he_para1,20_he_para22,20_he_para88,20_he_para95,20_hv_para3,20_hv_para45,20_hv_para47,20_hv_para56,20_position_para72,20_power_para14,20_power_para49,20_power_para57,20_power_para68,20_power_para76,20_power_para82,20_pressure_para91,20_temp_para11,20_temp_para12,20_temp_para17,20_temp_para18,20_temp_para20,20_temp_para23,20_temp_para24,20_temp_para32,20_temp_para38,20_temp_para53,20_temp_para54,20_temp_para55,20_temp_para58,20_temp_para60,20_temp_para65,20_temp_para66,20_temp_para79,20_temp_para86,20_temp_para87,20_temp_para92,20_temp_para93,20_time_para16,20_time_para29,20_time_para30,20_time_para34,20_time_para37,20_time_para41,20_time_para42,20_time_para43,20_time_para44,20_time_para5,20_time_para62,20_time_para64,20_time_para67,20_time_para7,20_time_para75,20_time_para77,20_time_para8,20_time_para81,20_time_para83,20_time_para89,20_time_para9,20_time_para90,20_tmp_para31,04_end_time,06_end_time,12_end_time,13_end_time,17_end_time,18_end_time,20_end_time,gen_tmdiff,gen_tmdiff_0406,gen_tmdiff_0612,gen_tmdiff_1213,gen_tmdiff_1317,gen_tmdiff_1718,gen_tmdiff_1820,04_hour,04_day,04_weekday,04_hour_sin,04_hour_cos,04_date_sin,04_date_cos,04_weekday_sin,04_weekday_cos,06_hour,06_day,06_weekday,06_hour_sin,06_hour_cos,06_date_sin,06_date_cos,06_weekday_sin,06_weekday_cos,12_hour,12_day,12_weekday,12_hour_sin,12_hour_cos,12_date_sin,12_date_cos,12_weekday_sin,12_weekday_cos,13_hour,13_day,13_weekday,13_hour_sin,13_hour_cos,13_date_sin,13_date_cos,13_weekday_sin,13_weekday_cos,17_hour,17_day,17_weekday,17_hour_sin,17_hour_cos,17_date_sin,17_date_cos,17_weekday_sin,17_weekday_cos,18_hour,18_day,18_weekday,18_hour_sin,18_hour_cos,18_date_sin,18_date_cos,18_weekday_sin,18_weekday_cos,20_hour,20_day,20_weekday,20_hour_sin,20_hour_cos,20_date_sin,20_date_cos,20_weekday_sin,20_weekday_cos
0,EQ10_PM1,LOT5_21,2021-10-03 07:10:22,1260.0892,1631.273,1639.727,5.06654,0.0,0.0,0.0,0.0,2999.0,0.0,-2999.0,0.0,0.0,2999.0,0.0,0.0,40.0,0.0,2.41871,30.04839,0.0,0.0,2.477097,0.0,0.0,0.0,0.0,11.6,24.95807,0.09697,2.834516,0.0,0.0,44.90645,50.1,70.26363,15.0,0.906452,0.726923,30.0,0.09,299.9667,150.0,0.241481,7.033333,0.0,0.0,1000.0,0.0,1062.933,0.0,45.02667,0.598182,25.1,-0.021212,15.0697,26.60606,150.1848,20.0,19.96061,-9.842424,149.8545,47.52941,22.94848,24.5,44.51515,21.45152,90.0,0.579091,22.04243,149.7182,35.01515,149.8545,2460.4,723.6,2460.4,2460.4,132.3333,132.3333,2460.4,2460.4,2460.4,0.0,2460.4,132.3333,2460.4,132.3333,2460.4,2460.4,132.3333,2460.4,132.3333,2460.4,132.3333,2460.4,2.351515,1631.445,1640.059,5.074046,0.0,-1.201783,0.0,0.0,2999.0,0.0,-2999.0,0.0,0.0,2999.0,0.0,0.0,0.0,0.0,3.481353,32.89549,0.0,0.0,3.674587,0.0,0.0,0.0,0.0,0.0,34.09699,179.697,3.162481,15.0,120.9215,33.00075,0.0,0.0,15.0,0.900752,0.704688,30.0,0.0,0.0,0.0,0.0,9.39697,0.0,0.007576,2000.0,0.0,1525.651,0.0,25.00379,0.59763,25.1,0.042222,15.06667,26.62222,149.9948,20.0,19.96,-12.28519,150.0393,42.75735,22.95185,24.5,36.92593,22.02148,89.99185,0.579111,23.38593,150.4793,35.00296,150.0393,2460.417,723.6166,2460.417,2460.417,132.3333,132.3333,2460.417,2460.417,2460.417,0.0,2460.417,132.3333,2460.417,132.3333,2460.417,2460.417,132.3333,2460.417,132.3333,2460.417,132.3333,2460.417,2.619259,1631.368,1642.6316,5.038704,0.0,0.0,0.0,0.0,2999.037,0.0,2999.0,0.0,0.0,2999.0,0.0,99.3,0.0,0.0,5.136,35.01454,32.99474,0.0,5.382364,0.0,132.9,0.0,0.0,0.0,33.22364,184.5947,4.430727,0.0,0.0,31.81273,36.0,0.0,20.0,1.3,1.4,50.0,0.299804,272.0,434.6111,0.560196,16.19445,6827.481,0.0,4000.0,0.0,979.4445,15001.09,19.99815,0.601053,25.1,-0.005263,14.7,14.77193,150.0,70.26667,14.95088,-31.40175,149.9702,0.0,22.94386,24.5,18.0,20.67544,90.0,0.577368,26.92456,150.1,35.0,149.9702,2460.483,723.6667,2460.483,2460.483,132.4,132.4,2460.483,2460.483,2460.483,137.0,2460.483,132.4,2460.483,132.4,2460.483,2460.483,132.4,2460.483,132.4,2460.483,132.4,2460.483,3.71579,1630.923,1639.692,5.079873,0.0,0.0,0.0,0.0,2998.9,0.0,2998.9,0.0,0.0,2999.0,0.0,144.6,0.0,0.0,4.876666,35.09167,24.66923,0.0,5.1225,0.0,34.93077,100.0,0.0,7.0,33.16667,118.6,4.275833,0.0,0.0,31.85833,0.0,0.0,20.0,1.3,1.3,50.0,0.23,272.0,434.8,0.388571,18.03,7558.8,0.0,4000.0,0.0,895.5,15001.33,15.01,0.601538,25.1,0.0,14.66154,14.0,150.0,73.5,15.03077,-30.98462,150.1923,0.0,22.94615,24.5,17.30769,20.66154,90.0,0.577692,26.9,150.1,35.0,150.1923,2460.4834,723.6667,2460.483,2460.483,132.4,132.4,2460.483,2460.4834,2460.483,137.0,2460.483,132.4,2460.483,132.4,2460.483,2460.483,132.4,2460.483,132.4,2460.483,132.4,2460.483,3.553846,1632.684,1641.842,5.084785,0.0,0.0,0.0,0.0,2999.0,0.0,2999.0,0.0,0.0,2999.0,0.0,99.3,0.0,0.0,5.144909,35.00182,47.99649,0.0,5.394,0.0,117.8983,0.0,0.0,0.0,33.20182,184.5947,4.451818,0.0,0.0,31.91636,36.0,0.0,20.0,1.3,1.392,50.0,0.290588,272.0,434.7963,0.550196,16.37408,6800.796,0.0,4000.0,0.0,977.8889,15001.11,19.99815,0.601754,25.09474,0.045614,14.69123,12.0,150.0,72.56667,14.9386,-31.54912,150.014,0.0,22.94912,24.5,14.0,20.7,89.99123,0.577895,27.05439,150.0,35.0,150.014,2460.7,723.8834,2460.7,2460.7,132.6167,132.6167,2460.7,2460.7,2460.7,137.0,2460.7,132.6167,2460.7,132.6167,2460.7,2460.7,132.6167,2460.7,132.6167,2460.7,132.6167,2460.7,3.768421,1632.077,1637.385,5.116795,0.0,0.0,0.0,0.0,2999.0,0.0,2999.0,0.0,0.0,2999.0,0.0,159.8,0.0,0.0,4.812728,35.05455,24.67692,0.0,5.018182,0.0,19.88462,100.0,0.0,7.0,33.14545,109.6,4.236363,0.0,0.0,31.93636,0.0,0.0,20.0,1.3,1.3,50.0,0.218571,272.0,434.7,0.338571,17.7,7649.3,0.0,4000.0,0.0,881.0,15001.22,15.04,0.602308,25.1,0.023077,14.7,12.0,150.0,72.8125,14.94615,-31.41538,150.0692,0.0,22.96154,24.5,14.0,20.73846,90.0,0.577692,27.11538,150.0,35.0,150.0692,2460.7,723.8834,2460.7,2460.7,132.6167,132.6167,2460.7,2460.7,2460.7,137.0,2460.7,132.6167,2460.7,132.6167,2460.7,2460.7,132.6167,2460.7,132.6167,2460.7,132.6167,2460.7,3.5,1632.209,1642.458,5.056325,999.9999,0.0,0.0,0.0,2999.0,0.0,2999.0,0.0,0.0,2999.0,0.0,60.0,0.0,0.0,4.839175,35.075,29.99478,119.9,5.357225,6.1,56.99975,0.0,99.9301,0.0,35.79625,94.91542,4.005325,0.0,0.0,28.90425,0.0,0.0,20.0,1.2985,1.283797,50.0,0.27,300.0,300.0,0.659293,10.67644,5507.023,0.0,2800.0,0.0,856.4361,10200.11,35.001,0.600323,25.1,-0.010945,14.6908,13.73632,149.9821,52.21481,14.94627,-29.72363,149.8346,0.0,22.94279,24.5,16.27115,20.19403,89.99477,0.576269,25.88433,149.942,34.93358,149.8346,2460.817,724.0167,2460.817,2460.817,132.7333,132.7333,2460.817,2460.817,2460.817,137.0,2460.817,132.7333,2460.817,132.7333,2460.817,2460.817,132.7333,2460.817,132.7333,2460.817,132.7333,2460.817,3.399254,2021-10-02 22:14:27,2021-10-02 22:16:53,2021-10-02 22:30:59,2021-10-02 22:31:15,2021-10-02 22:39:12,2021-10-02 22:39:28,2021-10-02 22:46:19,1912.0,146.0,846.0,16.0,477.0,16.0,411.0,22,2,5,-0.269797,0.962917,0.848644,-0.528964,0.781831,-0.62349,22,2,5,-0.269797,0.962917,0.848644,-0.528964,0.781831,-0.62349,22,2,5,-0.269797,0.962917,0.848644,-0.528964,0.781831,-0.62349,22,2,5,-0.269797,0.962917,0.848644,-0.528964,0.781831,-0.62349,22,2,5,-0.269797,0.962917,0.848644,-0.528964,0.781831,-0.62349,22,2,5,-0.269797,0.962917,0.848644,-0.528964,0.781831,-0.62349,22,2,5,-0.269797,0.962917,0.848644,-0.528964,0.781831,-0.62349


In [13]:
df_predict.head(1)

Unnamed: 0,module_name,key_val,04_efem_para2,04_efem_para25,04_efem_para78,04_epd_para4,04_epd_para40,04_epd_para63,04_epd_para80,04_esc_para84,04_esc_para94,04_fr_para28,04_fr_para35,04_fr_para61,04_fr_para69,04_gas_para10,04_gas_para13,04_gas_para15,04_gas_para19,04_gas_para21,04_gas_para26,04_gas_para27,04_gas_para33,04_gas_para36,04_gas_para39,04_gas_para46,04_gas_para48,04_gas_para50,04_gas_para51,04_gas_para52,04_gas_para59,04_gas_para6,04_gas_para70,04_gas_para71,04_gas_para73,04_gas_para74,04_gas_para85,04_he_para1,04_he_para22,04_he_para88,04_he_para95,04_hv_para3,04_hv_para45,04_hv_para47,04_hv_para56,04_position_para72,04_power_para14,04_power_para49,04_power_para57,04_power_para68,04_power_para76,04_power_para82,04_pressure_para91,04_temp_para11,04_temp_para12,04_temp_para17,04_temp_para18,04_temp_para20,04_temp_para23,04_temp_para24,04_temp_para32,04_temp_para38,04_temp_para53,04_temp_para54,04_temp_para55,04_temp_para58,04_temp_para60,04_temp_para65,04_temp_para66,04_temp_para79,04_temp_para86,04_temp_para87,04_temp_para92,04_temp_para93,04_time_para16,04_time_para29,04_time_para30,04_time_para34,04_time_para37,04_time_para41,04_time_para42,04_time_para43,04_time_para44,04_time_para5,04_time_para62,04_time_para64,04_time_para67,04_time_para7,04_time_para75,04_time_para77,04_time_para8,04_time_para81,04_time_para83,04_time_para89,04_time_para9,04_time_para90,04_tmp_para31,06_efem_para2,06_efem_para25,06_efem_para78,06_epd_para4,06_epd_para40,06_epd_para63,06_epd_para80,06_esc_para84,06_esc_para94,06_fr_para28,06_fr_para35,06_fr_para61,06_fr_para69,06_gas_para10,06_gas_para13,06_gas_para15,06_gas_para19,06_gas_para21,06_gas_para26,06_gas_para27,06_gas_para33,06_gas_para36,06_gas_para39,06_gas_para46,06_gas_para48,06_gas_para50,06_gas_para51,06_gas_para52,06_gas_para59,06_gas_para6,06_gas_para70,06_gas_para71,06_gas_para73,06_gas_para74,06_gas_para85,06_he_para1,06_he_para22,06_he_para88,06_he_para95,06_hv_para3,06_hv_para45,06_hv_para47,06_hv_para56,06_position_para72,06_power_para14,06_power_para49,06_power_para57,06_power_para68,06_power_para76,06_power_para82,06_pressure_para91,06_temp_para11,06_temp_para12,06_temp_para17,06_temp_para18,06_temp_para20,06_temp_para23,06_temp_para24,06_temp_para32,06_temp_para38,06_temp_para53,06_temp_para54,06_temp_para55,06_temp_para58,06_temp_para60,06_temp_para65,06_temp_para66,06_temp_para79,06_temp_para86,06_temp_para87,06_temp_para92,06_temp_para93,06_time_para16,06_time_para29,06_time_para30,06_time_para34,06_time_para37,06_time_para41,06_time_para42,06_time_para43,06_time_para44,06_time_para5,06_time_para62,06_time_para64,06_time_para67,06_time_para7,06_time_para75,06_time_para77,06_time_para8,06_time_para81,06_time_para83,06_time_para89,06_time_para9,06_time_para90,06_tmp_para31,12_efem_para2,12_efem_para25,12_efem_para78,12_epd_para4,12_epd_para40,12_epd_para63,12_epd_para80,12_esc_para84,12_esc_para94,12_fr_para28,12_fr_para35,12_fr_para61,12_fr_para69,12_gas_para10,12_gas_para13,12_gas_para15,12_gas_para19,12_gas_para21,12_gas_para26,12_gas_para27,12_gas_para33,12_gas_para36,12_gas_para39,12_gas_para46,12_gas_para48,12_gas_para50,12_gas_para51,12_gas_para52,12_gas_para59,12_gas_para6,12_gas_para70,12_gas_para71,12_gas_para73,12_gas_para74,12_gas_para85,12_he_para1,12_he_para22,12_he_para88,12_he_para95,12_hv_para3,12_hv_para45,12_hv_para47,12_hv_para56,12_position_para72,12_power_para14,12_power_para49,12_power_para57,12_power_para68,12_power_para76,12_power_para82,12_pressure_para91,12_temp_para11,12_temp_para12,12_temp_para17,12_temp_para18,12_temp_para20,12_temp_para23,12_temp_para24,12_temp_para32,12_temp_para38,12_temp_para53,12_temp_para54,12_temp_para55,12_temp_para58,12_temp_para60,12_temp_para65,12_temp_para66,12_temp_para79,12_temp_para86,12_temp_para87,12_temp_para92,12_temp_para93,12_time_para16,12_time_para29,12_time_para30,12_time_para34,12_time_para37,12_time_para41,12_time_para42,12_time_para43,12_time_para44,12_time_para5,12_time_para62,12_time_para64,12_time_para67,12_time_para7,12_time_para75,12_time_para77,12_time_para8,12_time_para81,12_time_para83,12_time_para89,12_time_para9,12_time_para90,12_tmp_para31,13_efem_para2,13_efem_para25,13_efem_para78,13_epd_para4,13_epd_para40,13_epd_para63,13_epd_para80,13_esc_para84,13_esc_para94,13_fr_para28,13_fr_para35,13_fr_para61,13_fr_para69,13_gas_para10,13_gas_para13,13_gas_para15,13_gas_para19,13_gas_para21,13_gas_para26,13_gas_para27,13_gas_para33,13_gas_para36,13_gas_para39,13_gas_para46,13_gas_para48,13_gas_para50,13_gas_para51,13_gas_para52,13_gas_para59,13_gas_para6,13_gas_para70,13_gas_para71,13_gas_para73,13_gas_para74,13_gas_para85,13_he_para1,13_he_para22,13_he_para88,13_he_para95,13_hv_para3,13_hv_para45,13_hv_para47,13_hv_para56,13_position_para72,13_power_para14,13_power_para49,13_power_para57,13_power_para68,13_power_para76,13_power_para82,13_pressure_para91,13_temp_para11,13_temp_para12,13_temp_para17,13_temp_para18,13_temp_para20,13_temp_para23,13_temp_para24,13_temp_para32,13_temp_para38,13_temp_para53,13_temp_para54,13_temp_para55,13_temp_para58,13_temp_para60,13_temp_para65,13_temp_para66,13_temp_para79,13_temp_para86,13_temp_para87,13_temp_para92,13_temp_para93,13_time_para16,13_time_para29,13_time_para30,13_time_para34,13_time_para37,13_time_para41,13_time_para42,13_time_para43,13_time_para44,13_time_para5,13_time_para62,13_time_para64,13_time_para67,13_time_para7,13_time_para75,13_time_para77,13_time_para8,13_time_para81,13_time_para83,13_time_para89,13_time_para9,13_time_para90,13_tmp_para31,17_efem_para2,17_efem_para25,17_efem_para78,17_epd_para4,17_epd_para40,17_epd_para63,17_epd_para80,17_esc_para84,17_esc_para94,17_fr_para28,17_fr_para35,17_fr_para61,17_fr_para69,17_gas_para10,17_gas_para13,17_gas_para15,17_gas_para19,17_gas_para21,17_gas_para26,17_gas_para27,17_gas_para33,17_gas_para36,17_gas_para39,17_gas_para46,17_gas_para48,17_gas_para50,17_gas_para51,17_gas_para52,17_gas_para59,17_gas_para6,17_gas_para70,17_gas_para71,17_gas_para73,17_gas_para74,17_gas_para85,17_he_para1,17_he_para22,17_he_para88,17_he_para95,17_hv_para3,17_hv_para45,17_hv_para47,17_hv_para56,17_position_para72,17_power_para14,17_power_para49,17_power_para57,17_power_para68,17_power_para76,17_power_para82,17_pressure_para91,17_temp_para11,17_temp_para12,17_temp_para17,17_temp_para18,17_temp_para20,17_temp_para23,17_temp_para24,17_temp_para32,17_temp_para38,17_temp_para53,17_temp_para54,17_temp_para55,17_temp_para58,17_temp_para60,17_temp_para65,17_temp_para66,17_temp_para79,17_temp_para86,17_temp_para87,17_temp_para92,17_temp_para93,17_time_para16,17_time_para29,17_time_para30,17_time_para34,17_time_para37,17_time_para41,17_time_para42,17_time_para43,17_time_para44,17_time_para5,17_time_para62,17_time_para64,17_time_para67,17_time_para7,17_time_para75,17_time_para77,17_time_para8,17_time_para81,17_time_para83,17_time_para89,17_time_para9,17_time_para90,17_tmp_para31,18_efem_para2,18_efem_para25,18_efem_para78,18_epd_para4,18_epd_para40,18_epd_para63,18_epd_para80,18_esc_para84,18_esc_para94,18_fr_para28,18_fr_para35,18_fr_para61,18_fr_para69,18_gas_para10,18_gas_para13,18_gas_para15,18_gas_para19,18_gas_para21,18_gas_para26,18_gas_para27,18_gas_para33,18_gas_para36,18_gas_para39,18_gas_para46,18_gas_para48,18_gas_para50,18_gas_para51,18_gas_para52,18_gas_para59,18_gas_para6,18_gas_para70,18_gas_para71,18_gas_para73,18_gas_para74,18_gas_para85,18_he_para1,18_he_para22,18_he_para88,18_he_para95,18_hv_para3,18_hv_para45,18_hv_para47,18_hv_para56,18_position_para72,18_power_para14,18_power_para49,18_power_para57,18_power_para68,18_power_para76,18_power_para82,18_pressure_para91,18_temp_para11,18_temp_para12,18_temp_para17,18_temp_para18,18_temp_para20,18_temp_para23,18_temp_para24,18_temp_para32,18_temp_para38,18_temp_para53,18_temp_para54,18_temp_para55,18_temp_para58,18_temp_para60,18_temp_para65,18_temp_para66,18_temp_para79,18_temp_para86,18_temp_para87,18_temp_para92,18_temp_para93,18_time_para16,18_time_para29,18_time_para30,18_time_para34,18_time_para37,18_time_para41,18_time_para42,18_time_para43,18_time_para44,18_time_para5,18_time_para62,18_time_para64,18_time_para67,18_time_para7,18_time_para75,18_time_para77,18_time_para8,18_time_para81,18_time_para83,18_time_para89,18_time_para9,18_time_para90,18_tmp_para31,20_efem_para2,20_efem_para25,20_efem_para78,20_epd_para4,20_epd_para40,20_epd_para63,20_epd_para80,20_esc_para84,20_esc_para94,20_fr_para28,20_fr_para35,20_fr_para61,20_fr_para69,20_gas_para10,20_gas_para13,20_gas_para15,20_gas_para19,20_gas_para21,20_gas_para26,20_gas_para27,20_gas_para33,20_gas_para36,20_gas_para39,20_gas_para46,20_gas_para48,20_gas_para50,20_gas_para51,20_gas_para52,20_gas_para59,20_gas_para6,20_gas_para70,20_gas_para71,20_gas_para73,20_gas_para74,20_gas_para85,20_he_para1,20_he_para22,20_he_para88,20_he_para95,20_hv_para3,20_hv_para45,20_hv_para47,20_hv_para56,20_position_para72,20_power_para14,20_power_para49,20_power_para57,20_power_para68,20_power_para76,20_power_para82,20_pressure_para91,20_temp_para11,20_temp_para12,20_temp_para17,20_temp_para18,20_temp_para20,20_temp_para23,20_temp_para24,20_temp_para32,20_temp_para38,20_temp_para53,20_temp_para54,20_temp_para55,20_temp_para58,20_temp_para60,20_temp_para65,20_temp_para66,20_temp_para79,20_temp_para86,20_temp_para87,20_temp_para92,20_temp_para93,20_time_para16,20_time_para29,20_time_para30,20_time_para34,20_time_para37,20_time_para41,20_time_para42,20_time_para43,20_time_para44,20_time_para5,20_time_para62,20_time_para64,20_time_para67,20_time_para7,20_time_para75,20_time_para77,20_time_para8,20_time_para81,20_time_para83,20_time_para89,20_time_para9,20_time_para90,20_tmp_para31,04_end_time,06_end_time,12_end_time,13_end_time,17_end_time,18_end_time,20_end_time,gen_tmdiff,gen_tmdiff_0406,gen_tmdiff_0612,gen_tmdiff_1213,gen_tmdiff_1317,gen_tmdiff_1718,gen_tmdiff_1820,04_hour,04_day,04_weekday,04_hour_sin,04_hour_cos,04_date_sin,04_date_cos,04_weekday_sin,04_weekday_cos,06_hour,06_day,06_weekday,06_hour_sin,06_hour_cos,06_date_sin,06_date_cos,06_weekday_sin,06_weekday_cos,12_hour,12_day,12_weekday,12_hour_sin,12_hour_cos,12_date_sin,12_date_cos,12_weekday_sin,12_weekday_cos,13_hour,13_day,13_weekday,13_hour_sin,13_hour_cos,13_date_sin,13_date_cos,13_weekday_sin,13_weekday_cos,17_hour,17_day,17_weekday,17_hour_sin,17_hour_cos,17_date_sin,17_date_cos,17_weekday_sin,17_weekday_cos,18_hour,18_day,18_weekday,18_hour_sin,18_hour_cos,18_date_sin,18_date_cos,18_weekday_sin,18_weekday_cos,20_hour,20_day,20_weekday,20_hour_sin,20_hour_cos,20_date_sin,20_date_cos,20_weekday_sin,20_weekday_cos
0,EQ10_PM1,LOT380_17,1626.8125,1635.9062,5.141955,0.0,0.0,0.0,0.0,2999.0,0.0,-2999.0,0.0,0.0,2999.0,0.0,0.0,40.0,0.0,2.296333,30.066668,0.0,0.0,2.341,0.0,0.0,0.0,0.0,9.3,24.963333,0.1,2.659333,0.0,0.0,44.883335,50.1,70.296875,15.0,0.8,0.824,30.0,0.0,0.0,0.0,0.0,7.096552,0.0,0.0,1000.0,0.0,1078.7931,0.0,44.99655,0.600625,25.1,0.0,15.084375,28.71875,150.16562,20.0,19.971874,-9.809375,149.87813,44.969696,22.946875,24.5,43.78125,21.421875,90.0,0.580313,22.05625,149.8625,35.021873,149.87813,2965.1,1228.3,2965.1,37.8,637.01666,37.8,2965.1,2965.1,2965.1,0.0,2965.1,37.8,2965.1,637.01666,2965.1,2965.1,37.8,2965.1,637.01666,2965.1,37.8,37.8,2.378125,1627.4822,1637.7142,5.141538,98.89975,-0.114427,-0.11261,0.0,2999.0,0.0,-2999.0,0.0,0.0,2999.0,0.0,0.0,0.0,0.0,3.314455,32.89909,0.0,0.0,3.538273,0.0,0.0,0.0,0.0,0.0,34.09727,179.7,2.963,15.0,120.92411,32.986362,0.0,0.0,15.0,0.789091,0.895238,30.0,0.0,0.0,0.0,0.0,9.4,0.0,0.0,2500.0,0.0,1679.6422,0.0,25.0,0.600625,25.1,0.041964,15.082143,26.267857,150.10893,20.0,19.957144,-12.7,150.04018,42.230087,22.951786,24.5,38.535713,22.144644,89.998215,0.579911,23.786608,150.44643,35.01518,150.04018,2965.1,1228.3,2965.1,37.816666,637.01666,37.816666,2965.1,2965.1,2965.1,0.0,2965.1,37.816666,2965.1,637.01666,2965.1,2965.1,37.816666,2965.1,637.01666,2965.1,37.816666,37.816666,2.603571,1626.6316,1637.2632,5.148175,0.0,0.0,0.0,0.0,2999.0,0.0,2999.0,0.0,0.0,2999.0,0.0,99.3,0.0,0.0,4.888727,35.02727,32.98772,0.0,5.198727,0.0,132.89648,0.0,0.0,0.0,33.18727,184.6,4.131273,0.0,0.0,31.896364,36.0,0.0,20.0,1.1,1.396,50.0,0.31,258.72223,484.0,0.541765,16.196297,6868.815,0.0,4000.0,0.0,981.5926,15001.151,19.998148,0.602807,25.1,0.054386,14.692983,12.017544,150.0,71.083336,14.940351,-31.598246,150.02808,0.0,22.93158,24.5,17.0,20.68772,90.00702,0.57807,26.970175,150.04736,35.0,150.02808,2965.15,1228.35,2965.15,37.85,637.06665,37.85,2965.15,2965.15,2965.15,113.2,2965.15,37.85,2965.15,637.06665,2965.15,2965.15,37.85,2965.15,637.06665,2965.15,37.85,37.85,3.691228,1626.3077,1637.3846,5.091155,0.0,0.0,0.0,0.0,2999.0,0.0,2999.0,0.0,0.0,2999.0,0.0,144.6,0.0,0.0,4.614545,35.00909,24.684616,0.0,4.924545,0.0,34.93077,100.0,0.0,7.0,33.2,118.6,3.976364,0.0,0.0,31.9,0.0,0.0,20.0,1.1,1.266667,50.0,0.248571,259.0,484.0,0.367143,18.04,7601.4,0.0,4000.0,0.0,900.4,15001.333,15.01,0.602308,25.1,0.007692,14.7,12.0,150.0,73.0,14.969231,-31.184616,150.16924,0.0,22.984615,24.5,17.0,20.638462,90.03077,0.579231,26.923077,150.0,35.0,150.16924,2965.15,1228.35,2965.15,37.85,637.06665,37.85,2965.15,2965.15,2965.15,113.2,2965.15,37.85,2965.15,637.06665,2965.15,2965.15,37.85,2965.15,637.06665,2965.15,37.85,37.85,3.461539,1626.1052,1634.3684,5.169695,0.0,0.0,0.0,0.0,2999.0,0.0,2999.0,0.0,0.0,2999.0,0.0,99.3,0.0,0.0,4.902727,35.04727,47.991226,0.0,5.218364,0.0,117.89825,0.0,0.0,0.0,33.17818,184.6,4.146545,0.0,0.0,31.901817,36.0,0.0,20.0,1.1,1.398,50.0,0.310196,258.72223,484.0,0.532157,16.366667,6837.4814,0.0,4000.0,0.0,980.2778,15001.227,19.996296,0.602807,25.1,0.003509,14.7,10.0,150.0,72.11667,14.938597,-31.589474,150.01404,0.0,22.947369,24.5,15.964912,20.708773,90.01755,0.579298,27.1,150.1,34.970177,150.01404,2965.3667,1228.55,2965.3667,38.066666,637.2833,38.066666,2965.3667,2965.3667,2965.3667,113.2,2965.3667,38.066666,2965.3667,637.2833,2965.3667,2965.3667,38.066666,2965.3667,637.2833,2965.3667,38.066666,38.066666,3.740351,1629.0,1634.5,5.194314,0.0,0.0,0.0,0.0,2999.0,0.0,2999.0,0.0,0.0,2999.0,0.0,159.8,0.0,0.0,4.557,35.02,24.708334,0.0,4.844,0.0,20.091667,100.0,0.0,7.0,33.16,109.6,3.927,0.0,0.0,31.94,0.0,0.0,20.0,1.06,1.24,50.0,0.23,258.8889,484.0,0.32,17.766666,7691.1113,0.0,4000.0,0.0,885.7778,15001.25,15.066667,0.601667,25.1,-0.2,14.7,9.916667,150.0,74.375,14.908334,-30.941668,150.24167,0.0,22.958334,24.5,15.166667,20.7,89.98333,0.5775,27.116667,150.1,35.0,150.24167,2965.3667,1228.55,2965.3667,38.066666,637.2833,38.066666,2965.3667,2965.3667,2965.3667,113.2,2965.3667,38.066666,2965.3667,637.2833,2965.3667,2965.3667,38.066666,2965.3667,637.2833,2965.3667,38.066666,38.066666,3.516667,1627.4414,1637.0997,5.138525,999.9999,0.0,0.0,0.0,2999.0051,0.0,2999.0,0.0,0.0,2999.0,0.0,60.0,0.0,0.0,4.588847,35.07193,29.995262,119.9,5.187869,7.6,56.999752,0.0,99.92918,0.0,35.80075,94.92344,3.713809,0.0,0.0,28.892733,0.0,0.0,20.0,1.097995,1.301523,50.0,0.271747,300.0,300.0,0.649772,10.699749,5522.9297,0.0,2800.0,0.0,860.7638,10200.045,35.0,0.602369,25.1,-0.00399,14.691771,12.274314,149.97456,52.044556,14.947382,-29.786533,149.83441,0.0,22.942394,24.5,17.608479,20.189028,90.002495,0.578653,25.906235,149.94913,34.942642,149.83441,2965.4834,1228.6833,2965.4834,38.2,637.4,38.2,2965.4834,2965.4834,2965.4834,113.2,2965.4834,38.2,2965.4834,637.4,2965.4834,2965.4834,38.2,2965.4834,637.4,2965.4834,38.2,38.2,3.393516,2021-10-28 08:15:36,2021-10-28 08:17:39,2021-10-28 08:30:54,2021-10-28 08:31:10,2021-10-28 08:39:06,2021-10-28 08:39:22,2021-10-28 08:46:13,1837.0,123.0,795.0,16.0,476.0,16.0,411.0,8,28,3,0.81697,-0.57668,0.543222,-0.839589,0.433884,0.900969,8,28,3,0.81697,-0.57668,0.543222,-0.839589,0.433884,0.900969,8,28,3,0.81697,-0.57668,0.543222,-0.839589,0.433884,0.900969,8,28,3,0.81697,-0.57668,0.543222,-0.839589,0.433884,0.900969,8,28,3,0.81697,-0.57668,0.543222,-0.839589,0.433884,0.900969,8,28,3,0.81697,-0.57668,0.543222,-0.839589,0.433884,0.900969,8,28,3,0.81697,-0.57668,0.543222,-0.839589,0.433884,0.900969


# Category 변수 처리

In [14]:
''' CATEGORY 변수 처리 및 NUM FEATURE 정의 '''
module2idx = {}
for i, module in enumerate(df_train['module_name'].unique()):
    module2idx[module] = i
    
# eq2idx = {}
# for i, eq in enumerate(df_train['module_name_eq'].unique()):
#     eq2idx[eq] = i
    
def col2cat(df, col, dict):
    df[col] = df[col].apply(lambda x: dict[x])
    df[col] = df[col].astype('category')
    return df[col]

# module_name cat 화
col2cat(df_train, 'module_name', module2idx)
col2cat(df_predict, 'module_name', module2idx)
# eq cat 화
# col2cat(df_train, 'module_name_eq', eq2idx)
# col2cat(df_predict, 'module_name_eq', eq2idx)

0       0
1       0
2       0
3       0
4       0
       ..
257    45
258    46
259    46
260    46
261    46
Name: module_name, Length: 262, dtype: category
Categories (47, int64): [0, 1, 2, 3, ..., 43, 44, 45, 46]

# 전체데이터에 대해서 전처리

In [15]:
df_final = df_train.copy()
df_predict_final = df_predict.copy()

In [16]:
drop_col = []
for para in for_col_filter:
    col = df_final.filter(regex='^'+para).columns.tolist()
    duplicate_deleted_df = df_final[col].T.drop_duplicates(subset=df_final[col].T.columns, keep='first').T
    if len(df_final[col].columns.difference(duplicate_deleted_df.columns))==0:  # 다른게 없으면 무시,
        continue
    else:
        drop_col.extend(df_final[col].columns.difference(duplicate_deleted_df.columns).tolist())
df_final = df_final.drop(drop_col,axis=1)
df_predict_final = df_predict_final.drop(drop_col, axis=1)

In [17]:
print('중복 열 제거 전 columns 개수 : {}'.format(len(df_train.columns)))
print('중복 열 제거 후 columns 개수 : {}'.format(len(df_final.columns)))

중복 열 제거 전 columns 개수 : 746
중복 열 제거 후 columns 개수 : 718


In [18]:
''' feature 정의'''
num_features = list(df_final.columns[df_final.dtypes==float])
num_features.remove('y')
# date_features = list(df_final.columns[df_final.dtypes==np.int64])
col_numerical = num_features
    
''' 분산 0인 col 제거 '''
thresholder = VarianceThreshold(threshold=0)
_ = thresholder.fit_transform(df_final[col_numerical])

 # 분산이 0이면 True 이므로 제거할 컬럼을 추출합니다.  
mask = ~thresholder.get_support()
cols_var_drop = np.asarray(col_numerical)[mask].tolist()
print(f'** {len(cols_var_drop)} Features to Drop by Low Variance')
print(f'{cols_var_drop}')

** 37 Features to Drop by Low Variance
['04_epd_para4', '04_he_para1', '04_power_para14', '04_temp_para24', '04_time_para5', '06_epd_para80', '06_he_para1', '06_hv_para3', '06_power_para14', '06_temp_para24', '06_time_para5', '12_epd_para4', '12_he_para1', '12_he_para95', '12_power_para49', '12_temp_para54', '13_epd_para4', '13_he_para1', '13_he_para95', '13_power_para49', '13_temp_para54', '17_epd_para4', '17_he_para1', '17_he_para95', '17_power_para49', '17_temp_para54', '18_epd_para4', '18_he_para1', '18_he_para95', '18_power_para49', '18_temp_para54', '20_epd_para40', '20_he_para1', '20_he_para95', '20_hv_para47', '20_power_para49', '20_temp_para54']


In [19]:
df_final.drop(cols_var_drop,axis=1,inplace=True)
df_predict_final.drop(cols_var_drop,axis=1,inplace=True)

# 모델링

In [20]:
from catboost import CatBoostRegressor
import xgboost as xgb

import optuna
from optuna.samplers import TPESampler
from sklearn.model_selection import LeaveOneOut, KFold ,cross_val_score
from sklearn.metrics import mean_squared_error

In [21]:
''' feature 재정의'''
without_date_num_features = list(df_final.columns[df_final.dtypes==float])
cat_features = ['module_name']
# date_features = list(df_final.columns[df_final.dtypes==np.int64])
COLS_train = without_date_num_features + cat_features
without_date_num_features.remove('y')
COLS_pred = without_date_num_features + cat_features

In [22]:
def prep_cate_feats(df_tr, df_te, feat_nm):

    df_merge = pd.concat([df_tr, df_te])

    # 컬럼명과 범주형 변수의 레벨명을 이용한 새로운 컬럼명을 자동생성합니다. 
    # ex. module_name_eq -> module_name_eq_EQ01, module_name_eq_EQ02, etc.
    df_merge = pd.get_dummies(df_merge, columns=[feat_nm])

    df_tr = df_merge.iloc[:df_tr.shape[0], :].reset_index(drop=True)
    df_te = df_merge.iloc[df_tr.shape[0]:, :].reset_index(drop=True)

    return df_tr, df_te

# module_name_eq 의 원-핫 인코딩 변수를 생성합니다.
final_one_hot, predict_one_hot = prep_cate_feats(df_final[COLS_train], df_predict_final[COLS_pred], 'module_name')

In [23]:
print('One Hot 전 columns 개수 : {}'.format(len(df_final[COLS_train].columns)))
print('One Hot 후 columns 개수 : {}'.format(len(final_one_hot.columns)))

One Hot 전 columns 개수 : 651
One Hot 후 columns 개수 : 697


In [27]:
one_hot_module_cols = final_one_hot.filter(regex='^module_name').columns.tolist()
X_cols = without_date_num_features + one_hot_module_cols

# 전체 XGB

In [28]:
def objective(trial):
    params_xgb = {
        'booster':trial.suggest_categorical('booster',['gbtree','dart']),
        "reg_lambda": trial.suggest_float("reg_lambda", 0, 1.0),
        'colsample_bytree': trial.suggest_int('colsample_bytree', 0.3,1.0),
        'subsample': trial.suggest_float('subsample', 0.3, 1.0),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),
        'n_estimators': trial.suggest_int('n_estimators', 100, 10000),
        'max_depth': trial.suggest_int("max_depth", 4, 12),
        'random_state': trial.suggest_categorical('random_state', [0]),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 300),
        'tree_method':'gpu_hist',
        'gpu_id':'0'
    }
    
    # 학습 데이터 중 일부를 검증 데이터 셋으로 분할합니다. 
    X_train, X_valid, y_train, y_valid = train_test_split(final_one_hot[X_cols], np.log1p(final_one_hot['y']), test_size=0.15, shuffle=True, random_state=71)

    model = xgb.XGBRegressor(**params_xgb)
    model.fit(
        X_train, y_train,
        eval_set=[(X_valid, y_valid)],
        early_stopping_rounds=35,
        verbose=False
    )

    xgb_pred = model.predict(X_valid)
    rmsle_val = rmse(y_valid, xgb_pred)
    
    return rmsle_val

In [29]:
sampler = TPESampler(seed=42)
study = optuna.create_study(
    study_name="xgb_parameter_opt",
    direction="minimize",
    sampler=sampler,
)
study.optimize(objective, n_trials=30)
print("Best Score:", study.best_value)
print("Best trial:", study.best_trial.params)

[32m[I 2022-07-10 22:51:44,916][0m A new study created in memory with name: xgb_parameter_opt[0m


KeyboardInterrupt: 

# 전체 CatBoostRegressor

In [46]:
def objective_CAT(trial):
    param = {
      "random_state":42,
      'learning_rate' : trial.suggest_loguniform('learning_rate', 0.01, 0.5),
      'bagging_temperature' :trial.suggest_loguniform('bagging_temperature', 0.01, 100.00),
#       "n_estimators":trial.suggest_int("n_estimators", 100, 10000),
      "max_depth":trial.suggest_int("max_depth", 4, 12),
      'random_strength' :trial.suggest_int('random_strength', 0, 30),
#       "colsample_bylevel":trial.suggest_float("colsample_bylevel", 0.4, 1.0),
      "l2_leaf_reg":trial.suggest_float("l2_leaf_reg",1e-8,3e-5),
      "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
      "max_bin": trial.suggest_int("max_bin", 200, 400),
      'od_type': trial.suggest_categorical('od_type', ['IncToDec', 'Iter']),
      'boosting_type':trial.suggest_categorical('boosting_type', ['Plain', 'Ordered']),
      'task_type':'GPU',
      'devices':'0:7',
      'iterations':100,
      'rsm':1
  }
    
    X_train, X_valid, y_train, y_valid = train_test_split(df_final[COLS_pred], np.log1p(df_final['y']), test_size=0.15, shuffle=True, random_state=71)
  
    cat = CatBoostRegressor(loss_function='RMSE', eval_metric='RMSE', **param)
    cat.fit(X_train, y_train,
            eval_set=[(X_valid,y_valid)],
            early_stopping_rounds=35, cat_features=cat_features,
            verbose=100)
    cat_pred = cat.predict(X_valid)
    rmsle_val = np.sqrt(mean_squared_error(y_valid, cat_pred))
    
    return rmsle_val

In [None]:
sampler = TPESampler(seed=42)
study = optuna.create_study(
    study_name="cat_parameter_opt",
    direction="minimize",
    sampler=sampler,
)
study.optimize(objective_CAT, n_trials=20)
print("Best Score:", study.best_value)
print("Best trial:", study.best_trial.params)
# 6.36 -> 9.4 됨.

[32m[I 2022-07-10 23:50:49,308][0m A new study created in memory with name: cat_parameter_opt[0m


0:	learn: 0.0066316	test: 0.0055544	best: 0.0055544 (0)	total: 17.5ms	remaining: 1.74s


[32m[I 2022-07-10 23:50:50,486][0m Trial 0 finished with value: 0.005554436407106352 and parameters: {'learning_rate': 0.043284502212938815, 'bagging_temperature': 63.512210106407046, 'max_depth': 10, 'random_strength': 18, 'l2_leaf_reg': 4.688999026868671e-06, 'min_child_samples': 19, 'max_bin': 211, 'od_type': 'IncToDec', 'boosting_type': 'Plain'}. Best is trial 0 with value: 0.005554436407106352.[0m


bestTest = 0.005554434329
bestIteration = 0
Shrink model to first 1 iterations.
0:	learn: 0.0064012	test: 0.0056118	best: 0.0056118 (0)	total: 12.7ms	remaining: 1.26s


[32m[I 2022-07-10 23:50:51,801][0m Trial 1 finished with value: 0.0053635930358311365 and parameters: {'learning_rate': 0.44447541666908114, 'bagging_temperature': 21.368329072358772, 'max_depth': 5, 'random_strength': 5, 'l2_leaf_reg': 5.51030125050448e-06, 'min_child_samples': 34, 'max_bin': 305, 'od_type': 'IncToDec', 'boosting_type': 'Plain'}. Best is trial 1 with value: 0.0053635930358311365.[0m


bestTest = 0.005363653329
bestIteration = 13
Shrink model to first 14 iterations.
0:	learn: 0.0066000	test: 0.0055273	best: 0.0055273 (0)	total: 27.9ms	remaining: 2.76s


[32m[I 2022-07-10 23:50:54,320][0m Trial 2 finished with value: 0.005328655676638196 and parameters: {'learning_rate': 0.03135775732257745, 'bagging_temperature': 0.2920433847181412, 'max_depth': 8, 'random_strength': 24, 'l2_leaf_reg': 5.998216726929208e-06, 'min_child_samples': 54, 'max_bin': 319, 'od_type': 'Iter', 'boosting_type': 'Plain'}. Best is trial 2 with value: 0.005328655676638196.[0m


bestTest = 0.005328658887
bestIteration = 31
Shrink model to first 32 iterations.


# LinearModels Modeling

In [None]:
from sklearn.linear_model import Ridge,ElasticNet,BayesianRidge,Lasso

#### Ridge

In [None]:
ridges = []
for i, (train, num_f) in enumerate(zip(df_trains, num_features_lst)):
    def objective_ridge(trial):
        param = {
          "random_state":42,
            'alpha':trial.suggest_float("alpha",0.1,10),
            'fit_intercept':trial.suggest_categorical('fit_intercept', [True, False]),
            'normalize':trial.suggest_categorical('normalize', [True, False]),
        }
        X = train[num_f]
        y = np.log1p(train['y'])

        model = Ridge(**param)
        X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=71)

        model.fit(
            X_train, y_train
            )
        ridge_pred = model.predict(X_valid)
        rmsle_val = np.sqrt(mean_squared_error(y_valid, ridge_pred))

        return rmsle_val
    
    sampler = TPESampler(seed=42)
    study = optuna.create_study(
            study_name="ridge_parameter_opt",
            direction="minimize",
            sampler=sampler,
            )
    study.optimize(objective_ridge, n_trials=10)
    print("Best Score:", study.best_value)
    print("Best trial:", study.best_trial.params)
    
    model = Ridge(**study.best_params)
    model.fit(train[num_f], np.log1p(train['y']))
    print('{} model training is completed'.format(i))
    ridges.append(model)


#### ElasticNet

In [None]:
ens = []
for i, (train, num_f) in enumerate(zip(df_trains, num_features_lst)):
    def objective_en(trial):
        param = {
          "random_state":42,
            'alpha':trial.suggest_float("alpha",0.1,20),
            'fit_intercept':trial.suggest_categorical('fit_intercept', [True, False]),
            'normalize':trial.suggest_categorical('normalize', [True, False]),
        }
        X = train[num_f]
        y = np.log1p(train['y'])

        model = ElasticNet(**param)
        X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=71)

        model.fit(
            X_train, y_train
            )
        en_pred = model.predict(X_valid)
        rmsle_val = np.sqrt(mean_squared_error(y_valid, en_pred))

        return rmsle_val
    
    sampler = TPESampler(seed=42)
    study = optuna.create_study(
            study_name="ridge_parameter_opt",
            direction="minimize",
            sampler=sampler,
            )
    study.optimize(objective_en, n_trials=10)
    print("Best Score:", study.best_value)
    print("Best trial:", study.best_trial.params)
    
    model = ElasticNet(**study.best_params)
    model.fit(train[num_f], np.log1p(train['y']))
    print('{} model training is completed'.format(i))
    ens.append(model)


#### BayesianRidge

In [None]:
brs = []
for i, (train, num_f) in enumerate(zip(df_trains, num_features_lst)):
    def objective_br(trial):
        param = {
            'n_iter':trial.suggest_int("n_iter",10,500),
            'alpha_2':trial.suggest_uniform("alpha_2",-10,10),
            'lambda_2' :trial.suggest_uniform('lambda_2', -10, 10),
            'fit_intercept':trial.suggest_categorical('fit_intercept', [True, False]),
            'normalize':trial.suggest_categorical('normalize', [True, False]),
        }
        X = train[num_f]
        y = np.log1p(train['y'])

        model = BayesianRidge(**param)
        X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=71)

        model.fit(
            X_train, y_train
            )
        br_pred = model.predict(X_valid)
        rmsle_val = np.sqrt(mean_squared_error(y_valid, br_pred))

        return rmsle_val
    
    sampler = TPESampler(seed=42)
    study = optuna.create_study(
            study_name="BayesianRidge_parameter_opt",
            direction="minimize",
            sampler=sampler,
            )
    study.optimize(objective_br, n_trials=10)
    print("Best Score:", study.best_value)
    print("Best trial:", study.best_trial.params)
    
    model = BayesianRidge(**study.best_params)
    model.fit(train[num_f], np.log1p(train['y']))
    print('{} model training is completed'.format(i))
    brs.append(model)


#### Lasso

In [None]:
ls = []
for i, (train, num_f) in enumerate(zip(df_trains, num_features_lst)):
    def objective_ls(trial):
        param = {
          "random_state":42,
            'alpha':trial.suggest_float("alpha",0.1,20),
            'fit_intercept':trial.suggest_categorical('fit_intercept', [True, False]),
            'normalize':trial.suggest_categorical('normalize', [True, False]),
            'warm_start':True
        }
        X = train[num_f]
        y = np.log1p(train['y'])

        model = Lasso(**param, warm_start=True)
        X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=71)

        model.fit(
            X_train, y_train
            )
        ls_pred = model.predict(X_valid)
        rmsle_val = np.sqrt(mean_squared_error(y_valid, ls_pred))

        return rmsle_val
    
    sampler = TPESampler(seed=42)
    study = optuna.create_study(
            study_name="Lasso_parameter_opt",
            direction="minimize",
            sampler=sampler,
            )
    study.optimize(objective_ls, n_trials=10)
    print("Best Score:", study.best_value)
    print("Best trial:", study.best_trial.params)
    
    model = Lasso(**study.best_params, warm_start=True)
    model.fit(train[num_f], np.log1p(train['y']))
    print('{} model training is completed'.format(i))
    ls.append(model)


In [None]:
models = [cats,ridges,ens,brs,ls]
for i,model_all_chamber in enumerate(models):
    for j, (df,model_by_chamber,num_f) in enumerate(zip(df_predicts, model_all_chamber, num_features_lst)):
        pred_by_chamber = model_by_chamber.predict(df[num_f])
        predict.loc[df.index, 'pred_{}'.format(i)] = pred_by_chamber

In [None]:
predict.isnull().values.any()

In [None]:
predict['msure_val'] = np.mean(np.exp(predict.filter(regex='^pred')), axis=1)
df_submission = predict[['key_val','msure_val']]
df_submission.head()

In [None]:
# 예측 파일을 저장합니다. 
# 제출용 파일 이름은 cds_submission_팀명_차수.csv 형태로 제출합니다.
df_submission.set_index('key_val', inplace=True)
df_submission.to_csv('cds_submission_데이터조무사_25.csv')

# PYCARET

In [30]:
from pycaret.regression import *

In [31]:
for_pycaret = final_one_hot[X_cols+['y']].copy()
for_pycaret.head(1)

Unnamed: 0,04_efem_para2,04_efem_para25,04_efem_para78,04_esc_para84,04_esc_para94,04_fr_para28,04_fr_para35,04_fr_para61,04_fr_para69,04_gas_para10,04_gas_para13,04_gas_para15,04_gas_para19,04_gas_para21,04_gas_para26,04_gas_para27,04_gas_para33,04_gas_para36,04_gas_para39,04_gas_para46,04_gas_para48,04_gas_para50,04_gas_para51,04_gas_para52,04_gas_para59,04_gas_para6,04_gas_para70,04_gas_para71,04_gas_para73,04_gas_para74,04_gas_para85,04_he_para22,04_he_para88,04_he_para95,04_hv_para3,04_hv_para45,04_hv_para47,04_hv_para56,04_position_para72,04_power_para57,04_power_para76,04_pressure_para91,04_temp_para11,04_temp_para12,04_temp_para17,04_temp_para18,04_temp_para20,04_temp_para23,04_temp_para32,04_temp_para38,04_temp_para53,04_temp_para54,04_temp_para55,04_temp_para58,04_temp_para60,04_temp_para65,04_temp_para66,04_temp_para79,04_temp_para86,04_temp_para87,04_temp_para92,04_temp_para93,04_time_para16,04_time_para29,04_time_para30,04_time_para34,04_time_para37,04_time_para41,04_time_para42,04_time_para43,04_time_para44,04_time_para62,04_time_para64,04_time_para67,04_time_para7,04_time_para75,04_time_para77,04_time_para8,04_time_para81,04_time_para83,04_time_para89,04_time_para9,04_time_para90,04_tmp_para31,06_efem_para2,06_efem_para25,06_efem_para78,06_epd_para4,06_epd_para40,06_epd_para63,06_esc_para84,06_esc_para94,06_fr_para28,06_fr_para35,06_fr_para61,06_fr_para69,06_gas_para10,06_gas_para13,06_gas_para15,06_gas_para19,06_gas_para21,06_gas_para26,06_gas_para27,06_gas_para33,06_gas_para36,06_gas_para39,06_gas_para46,06_gas_para48,06_gas_para50,06_gas_para51,06_gas_para52,06_gas_para59,06_gas_para6,06_gas_para70,06_gas_para71,06_gas_para73,06_gas_para74,06_gas_para85,06_he_para22,06_he_para88,06_he_para95,06_position_para72,06_power_para49,06_power_para57,06_power_para76,06_pressure_para91,06_temp_para11,06_temp_para12,06_temp_para17,06_temp_para18,06_temp_para20,06_temp_para23,06_temp_para32,06_temp_para38,06_temp_para53,06_temp_para54,06_temp_para55,06_temp_para58,06_temp_para60,06_temp_para65,06_temp_para66,06_temp_para79,06_temp_para86,06_temp_para87,06_temp_para92,06_temp_para93,06_time_para16,06_time_para29,06_time_para30,06_time_para34,06_time_para37,06_time_para41,06_time_para42,06_time_para43,06_time_para44,06_time_para62,06_time_para64,06_time_para67,06_time_para7,06_time_para75,06_time_para77,06_time_para8,06_time_para81,06_time_para83,06_time_para89,06_time_para9,06_time_para90,06_tmp_para31,12_efem_para2,12_efem_para25,12_efem_para78,12_esc_para84,12_esc_para94,12_fr_para28,12_fr_para35,12_fr_para61,12_fr_para69,12_gas_para10,12_gas_para13,12_gas_para15,12_gas_para19,12_gas_para21,12_gas_para26,12_gas_para27,12_gas_para33,12_gas_para36,12_gas_para39,12_gas_para46,12_gas_para48,12_gas_para50,12_gas_para51,12_gas_para52,12_gas_para59,12_gas_para6,12_gas_para70,12_gas_para71,12_gas_para73,12_gas_para74,12_gas_para85,12_he_para22,12_he_para88,12_hv_para3,12_hv_para45,12_hv_para47,12_hv_para56,12_position_para72,12_power_para14,12_power_para57,12_power_para76,12_power_para82,12_pressure_para91,12_temp_para11,12_temp_para12,12_temp_para17,12_temp_para18,12_temp_para20,12_temp_para23,12_temp_para24,12_temp_para32,12_temp_para38,12_temp_para53,12_temp_para55,12_temp_para58,12_temp_para60,12_temp_para65,12_temp_para66,12_temp_para79,12_temp_para86,12_temp_para87,12_temp_para92,12_temp_para93,12_time_para16,12_time_para29,12_time_para30,12_time_para34,12_time_para37,12_time_para41,12_time_para42,12_time_para43,12_time_para44,12_time_para5,12_time_para62,12_time_para64,12_time_para67,12_time_para7,12_time_para75,12_time_para77,12_time_para8,12_time_para81,12_time_para83,...,18_gas_para50,18_gas_para51,18_gas_para52,18_gas_para59,18_gas_para6,18_gas_para70,18_gas_para71,18_gas_para73,18_gas_para74,18_gas_para85,18_he_para22,18_he_para88,18_hv_para3,18_hv_para45,18_hv_para47,18_hv_para56,18_position_para72,18_power_para14,18_power_para57,18_power_para68,18_power_para76,18_power_para82,18_pressure_para91,18_temp_para11,18_temp_para12,18_temp_para17,18_temp_para18,18_temp_para20,18_temp_para23,18_temp_para24,18_temp_para32,18_temp_para38,18_temp_para53,18_temp_para55,18_temp_para58,18_temp_para60,18_temp_para65,18_temp_para66,18_temp_para79,18_temp_para86,18_temp_para87,18_temp_para92,18_temp_para93,18_time_para16,18_time_para29,18_time_para30,18_time_para34,18_time_para37,18_time_para41,18_time_para42,18_time_para43,18_time_para44,18_time_para5,18_time_para62,18_time_para64,18_time_para67,18_time_para7,18_time_para75,18_time_para77,18_time_para8,18_time_para81,18_time_para83,18_time_para89,18_time_para9,18_time_para90,18_tmp_para31,20_efem_para2,20_efem_para25,20_efem_para78,20_epd_para4,20_esc_para84,20_esc_para94,20_fr_para28,20_fr_para35,20_fr_para61,20_fr_para69,20_gas_para10,20_gas_para13,20_gas_para15,20_gas_para19,20_gas_para21,20_gas_para26,20_gas_para27,20_gas_para33,20_gas_para36,20_gas_para39,20_gas_para46,20_gas_para48,20_gas_para50,20_gas_para51,20_gas_para52,20_gas_para59,20_gas_para6,20_gas_para70,20_gas_para71,20_gas_para73,20_gas_para74,20_gas_para85,20_he_para22,20_he_para88,20_hv_para3,20_hv_para45,20_hv_para56,20_position_para72,20_power_para14,20_power_para57,20_power_para68,20_power_para76,20_power_para82,20_pressure_para91,20_temp_para11,20_temp_para12,20_temp_para17,20_temp_para18,20_temp_para20,20_temp_para23,20_temp_para24,20_temp_para32,20_temp_para38,20_temp_para53,20_temp_para55,20_temp_para58,20_temp_para60,20_temp_para65,20_temp_para66,20_temp_para79,20_temp_para86,20_temp_para87,20_temp_para92,20_temp_para93,20_time_para16,20_time_para29,20_time_para30,20_time_para34,20_time_para37,20_time_para41,20_time_para42,20_time_para43,20_time_para44,20_time_para5,20_time_para62,20_time_para64,20_time_para67,20_time_para7,20_time_para75,20_time_para77,20_time_para8,20_time_para81,20_time_para83,20_time_para89,20_time_para9,20_time_para90,20_tmp_para31,gen_tmdiff,gen_tmdiff_0406,gen_tmdiff_0612,gen_tmdiff_1213,gen_tmdiff_1317,gen_tmdiff_1718,gen_tmdiff_1820,04_hour_sin,04_hour_cos,04_date_sin,04_date_cos,04_weekday_sin,04_weekday_cos,06_hour_sin,06_hour_cos,06_date_sin,06_date_cos,06_weekday_sin,06_weekday_cos,12_hour_sin,12_hour_cos,12_date_sin,12_date_cos,12_weekday_sin,12_weekday_cos,13_hour_sin,13_hour_cos,13_date_sin,13_date_cos,13_weekday_sin,13_weekday_cos,17_hour_sin,17_hour_cos,17_date_sin,17_date_cos,17_weekday_sin,17_weekday_cos,18_hour_sin,18_hour_cos,18_date_sin,18_date_cos,18_weekday_sin,18_weekday_cos,20_hour_sin,20_hour_cos,20_date_sin,20_date_cos,20_weekday_sin,20_weekday_cos,module_name_0,module_name_1,module_name_2,module_name_3,module_name_4,module_name_5,module_name_6,module_name_7,module_name_8,module_name_9,module_name_10,module_name_11,module_name_12,module_name_13,module_name_14,module_name_15,module_name_16,module_name_17,module_name_18,module_name_19,module_name_20,module_name_21,module_name_22,module_name_23,module_name_24,module_name_25,module_name_26,module_name_27,module_name_28,module_name_29,module_name_30,module_name_31,module_name_32,module_name_33,module_name_34,module_name_35,module_name_36,module_name_37,module_name_38,module_name_39,module_name_40,module_name_41,module_name_42,module_name_43,module_name_44,module_name_45,module_name_46,y
0,1631.273,1639.727,5.06654,2999.0,0.0,-2999.0,0.0,0.0,2999.0,0.0,0.0,40.0,0.0,2.41871,30.04839,0.0,0.0,2.477097,0.0,0.0,0.0,0.0,11.6,24.95807,0.09697,2.834516,0.0,0.0,44.90645,50.1,70.26363,0.906452,0.726923,30.0,0.09,299.9667,150.0,0.241481,7.033333,1000.0,1062.933,45.02667,0.598182,25.1,-0.021212,15.0697,26.60606,150.1848,19.96061,-9.842424,149.8545,47.52941,22.94848,24.5,44.51515,21.45152,90.0,0.579091,22.04243,149.7182,35.01515,149.8545,2460.4,723.6,2460.4,2460.4,132.3333,132.3333,2460.4,2460.4,2460.4,2460.4,132.3333,2460.4,132.3333,2460.4,2460.4,132.3333,2460.4,132.3333,2460.4,132.3333,2460.4,2.351515,1631.445,1640.059,5.074046,0.0,-1.201783,0.0,2999.0,0.0,-2999.0,0.0,0.0,2999.0,0.0,0.0,0.0,0.0,3.481353,32.89549,0.0,0.0,3.674587,0.0,0.0,0.0,0.0,0.0,34.09699,179.697,3.162481,15.0,120.9215,33.00075,0.0,0.0,0.900752,0.704688,30.0,9.39697,0.007576,2000.0,1525.651,25.00379,0.59763,25.1,0.042222,15.06667,26.62222,149.9948,19.96,-12.28519,150.0393,42.75735,22.95185,24.5,36.92593,22.02148,89.99185,0.579111,23.38593,150.4793,35.00296,150.0393,2460.417,723.6166,2460.417,2460.417,132.3333,132.3333,2460.417,2460.417,2460.417,2460.417,132.3333,2460.417,132.3333,2460.417,2460.417,132.3333,2460.417,132.3333,2460.417,132.3333,2460.417,2.619259,1631.368,1642.6316,5.038704,2999.037,0.0,2999.0,0.0,0.0,2999.0,0.0,99.3,0.0,0.0,5.136,35.01454,32.99474,0.0,5.382364,0.0,132.9,0.0,0.0,0.0,33.22364,184.5947,4.430727,0.0,0.0,31.81273,36.0,0.0,1.3,1.4,0.299804,272.0,434.6111,0.560196,16.19445,6827.481,4000.0,979.4445,15001.09,19.99815,0.601053,25.1,-0.005263,14.7,14.77193,150.0,70.26667,14.95088,-31.40175,149.9702,22.94386,24.5,18.0,20.67544,90.0,0.577368,26.92456,150.1,35.0,149.9702,2460.483,723.6667,2460.483,2460.483,132.4,132.4,2460.483,2460.483,2460.483,137.0,2460.483,132.4,2460.483,132.4,2460.483,2460.483,132.4,2460.483,132.4,...,0.0,7.0,33.14545,109.6,4.236363,0.0,0.0,31.93636,0.0,0.0,1.3,1.3,0.218571,272.0,434.7,0.338571,17.7,7649.3,4000.0,0.0,881.0,15001.22,15.04,0.602308,25.1,0.023077,14.7,12.0,150.0,72.8125,14.94615,-31.41538,150.0692,22.96154,24.5,14.0,20.73846,90.0,0.577692,27.11538,150.0,35.0,150.0692,2460.7,723.8834,2460.7,2460.7,132.6167,132.6167,2460.7,2460.7,2460.7,137.0,2460.7,132.6167,2460.7,132.6167,2460.7,2460.7,132.6167,2460.7,132.6167,2460.7,132.6167,2460.7,3.5,1632.209,1642.458,5.056325,999.9999,2999.0,0.0,2999.0,0.0,0.0,2999.0,0.0,60.0,0.0,0.0,4.839175,35.075,29.99478,119.9,5.357225,6.1,56.99975,0.0,99.9301,0.0,35.79625,94.91542,4.005325,0.0,0.0,28.90425,0.0,0.0,1.2985,1.283797,0.27,300.0,0.659293,10.67644,5507.023,2800.0,0.0,856.4361,10200.11,35.001,0.600323,25.1,-0.010945,14.6908,13.73632,149.9821,52.21481,14.94627,-29.72363,149.8346,22.94279,24.5,16.27115,20.19403,89.99477,0.576269,25.88433,149.942,34.93358,149.8346,2460.817,724.0167,2460.817,2460.817,132.7333,132.7333,2460.817,2460.817,2460.817,137.0,2460.817,132.7333,2460.817,132.7333,2460.817,2460.817,132.7333,2460.817,132.7333,2460.817,132.7333,2460.817,3.399254,1912.0,146.0,846.0,16.0,477.0,16.0,411.0,-0.269797,0.962917,0.848644,-0.528964,0.781831,-0.62349,-0.269797,0.962917,0.848644,-0.528964,0.781831,-0.62349,-0.269797,0.962917,0.848644,-0.528964,0.781831,-0.62349,-0.269797,0.962917,0.848644,-0.528964,0.781831,-0.62349,-0.269797,0.962917,0.848644,-0.528964,0.781831,-0.62349,-0.269797,0.962917,0.848644,-0.528964,0.781831,-0.62349,-0.269797,0.962917,0.848644,-0.528964,0.781831,-0.62349,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1260.0892


In [32]:
sup = setup(for_pycaret, target = 'y', train_size = 0.8)

Unnamed: 0,Description,Value
0,session_id,5169
1,Target,y
2,Original Data,"(611, 697)"
3,Missing Values,False
4,Numeric Features,693
5,Categorical Features,3
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(488, 391)"


In [33]:
comp = compare_models(sort = 'RMSE')

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
ridge,Ridge Regression,6.1541,59.2619,7.6568,0.085,0.0061,0.0049,0.007
en,Elastic Net,6.357,62.3412,7.8381,0.067,0.0062,0.005,0.007
lightgbm,Light Gradient Boosting Machine,6.2151,62.712,7.8701,0.0533,0.0062,0.0049,0.068
catboost,CatBoost Regressor,6.2475,63.2957,7.9011,0.0516,0.0063,0.0049,10.142
lasso,Lasso Regression,6.4081,63.4645,7.9018,0.0552,0.0063,0.0051,0.277
br,Bayesian Ridge,6.4154,63.5671,7.9141,0.0494,0.0063,0.0051,0.045
et,Extra Trees Regressor,6.2154,64.0189,7.9185,0.0526,0.0063,0.0049,0.279
rf,Random Forest Regressor,6.3641,65.4506,8.0213,0.0268,0.0063,0.005,0.43
ada,AdaBoost Regressor,6.4387,66.099,8.0731,0.0072,0.0064,0.0051,0.087
gbr,Gradient Boosting Regressor,6.3411,66.7048,8.1159,-0.0037,0.0064,0.005,0.194


In [34]:
top1 = create_model('ridge', cross_validation = False)
top2 = create_model('en', cross_validation = False)
top3 = create_model('lightgbm', cross_validation = False)
top4 = create_model('catboost', cross_validation = False)
top5 = create_model('lasso', cross_validation = False)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,5.3457,46.810398,6.8418,0.1828,0.0054,0.0042


In [35]:
tuned_ridge = tune_model(top1, optimize = 'RMSE', n_iter = 10)
tuned_en = tune_model(top2, optimize = 'RMSE', n_iter = 10)
tuned_lgb = tune_model(top3, optimize = 'RMSE', n_iter = 10)
tuned_cat = tune_model(top4, optimize = 'RMSE', n_iter = 10)
tuned_lasso = tune_model(top5, optimize = 'RMSE', n_iter = 10)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,7.7952,89.9766,9.4856,0.1045,0.0075,0.0062
1,6.1387,53.0149,7.2811,-0.1676,0.0058,0.0049
2,6.7607,69.4948,8.3364,0.099,0.0066,0.0053
3,5.7242,46.3271,6.8064,0.0746,0.0054,0.0045
4,6.7836,71.1951,8.4377,0.061,0.0067,0.0054
5,5.7523,54.202,7.3622,0.0658,0.0058,0.0046
6,5.7386,52.4684,7.2435,0.0528,0.0057,0.0045
7,5.1118,39.3316,6.2715,0.085,0.005,0.004
8,7.5546,73.9302,8.5983,0.0927,0.0068,0.006
9,6.8138,86.6826,9.3103,0.06,0.0074,0.0054


### TOP5 Blending

In [36]:
''' top5 blending'''
blender_specific5 = blend_models(estimator_list = [tuned_ridge,tuned_en,tuned_lgb,tuned_cat,tuned_lasso],optimize = 'RMSE')

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,7.3319,80.1679,8.9537,0.2021,0.0071,0.0058
1,5.9928,51.1126,7.1493,-0.1257,0.0057,0.0048
2,6.1192,59.1871,7.6933,0.2326,0.0061,0.0048
3,5.4483,40.8253,6.3895,0.1845,0.0051,0.0043
4,6.2141,65.58,8.0982,0.1351,0.0064,0.0049
5,5.4892,50.2736,7.0904,0.1335,0.0056,0.0043
6,5.347,46.3828,6.8105,0.1627,0.0054,0.0042
7,4.894,36.8825,6.0731,0.142,0.0048,0.0039
8,7.2111,68.637,8.2847,0.1576,0.0065,0.0057
9,6.0656,76.5348,8.7484,0.17,0.007,0.0048


In [37]:
final_model5 = finalize_model(blender_specific5)
pred = predict_model(final_model5, data=predict_one_hot[X_cols])

In [38]:
predict['msure_val'] = pred['Label']
df_submission = predict[['key_val', 'msure_val']]
df_submission.head()

Unnamed: 0,key_val,msure_val
0,LOT380_17,1261.143394
1,LOT122_18,1260.694982
2,LOT313_18,1260.446343
3,LOT459_12,1260.933767
4,LOT459_18,1263.855695


In [39]:
df_submission.set_index('key_val', inplace=True)
df_submission.to_csv('cds_submission_데이터조무사_26.csv')

### TOP7 Blending

In [40]:
top6 = create_model('br', cross_validation = False)
top7 = create_model('et', cross_validation = False)
# top8 = create_model('rf', cross_validation = False)
# top9 = create_model('ada', cross_validation = False)
# top10 = create_model('gbr', cross_validation = False)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,5.4299,49.9592,7.0682,0.1278,0.0056,0.0043


In [41]:
tuned_br = tune_model(top6, optimize = 'RMSE', n_iter = 10)
tuned_et = tune_model(top7, optimize = 'RMSE', n_iter = 10)
# tuned_rf = tune_model(top8, optimize = 'RMSE', n_iter = 10)
# tuned_ada = tune_model(top9, optimize = 'RMSE', n_iter = 10)
# tuned_gbr = tune_model(top10, optimize = 'RMSE', n_iter = 10)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,7.2858,87.2326,9.3398,0.1318,0.0074,0.0058
1,5.9442,49.8652,7.0615,-0.0982,0.0056,0.0047
2,6.0361,59.3911,7.7066,0.23,0.0061,0.0048
3,5.6613,45.4381,6.7408,0.0923,0.0053,0.0045
4,6.6028,71.8656,8.4774,0.0522,0.0067,0.0052
5,5.7842,57.9082,7.6097,0.0019,0.006,0.0046
6,5.7463,49.4203,7.03,0.1078,0.0056,0.0045
7,5.0177,41.0228,6.4049,0.0456,0.0051,0.004
8,7.7605,80.9215,8.9956,0.0069,0.0071,0.0061
9,6.7622,95.8233,9.7889,-0.0392,0.0078,0.0054


In [42]:
''' top7 blending'''
blender_specific7 = blend_models(estimator_list = [tuned_ridge,tuned_en,tuned_lgb,tuned_cat,tuned_lasso,tuned_br,tuned_et],optimize = 'RMSE')

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,7.374,81.1319,9.0073,0.1925,0.0071,0.0058
1,5.9583,50.7896,7.1267,-0.1185,0.0056,0.0047
2,6.1049,58.8886,7.6739,0.2365,0.0061,0.0048
3,5.453,40.918,6.3967,0.1826,0.0051,0.0043
4,6.2628,66.0926,8.1297,0.1283,0.0064,0.0049
5,5.5121,51.104,7.1487,0.1192,0.0056,0.0044
6,5.3368,46.4065,6.8122,0.1622,0.0054,0.0042
7,4.9556,37.5546,6.1282,0.1263,0.0048,0.0039
8,7.2684,69.8353,8.3567,0.1429,0.0066,0.0057
9,6.1278,78.6927,8.8709,0.1466,0.0071,0.0049


In [43]:
final_model7 = finalize_model(blender_specific7)
pred = predict_model(final_model7, data=predict_one_hot[X_cols])

In [44]:
predict['msure_val'] = pred['Label']
df_submission = predict[['key_val', 'msure_val']]
df_submission.head()

Unnamed: 0,key_val,msure_val
0,LOT380_17,1261.089682
1,LOT122_18,1260.776507
2,LOT313_18,1260.406764
3,LOT459_12,1261.072806
4,LOT459_18,1263.309557


In [45]:
# 예측 파일을 저장합니다. 
# 제출용 파일 이름은 cds_submission_팀명_차수.csv 형태로 제출합니다.
df_submission.set_index('key_val', inplace=True)
df_submission.to_csv('cds_submission_데이터조무사_27.csv')

# Feature Selection

In [None]:
from probatus.feature_elimination import EarlyStoppingShapRFECV

In [None]:
# Run feature elimination
shap_elimination = EarlyStoppingShapRFECV(
    clf=model_xgb, step=0.2, cv=10, scoring='neg_mean_squared_error', early_stopping_rounds=15, n_jobs=-1, eval_metric='rmse')
report = shap_elimination.fit_compute(df_final[COLS], np.log1p(df_final['y']))

# Make plots
performance_plot = shap_elimination.plot()

In [None]:
report

In [None]:
# Get final feature set
''' num_features는 마지막에 남길 feature 수임. '''
final_features_set = shap_elimination.get_reduced_features_set(num_features=77)

The provided number of features has not been achieved at any stage of the process. You can select one of the following: [402, 322, 258, 207, 166, 133, 107, 86, 69, 56, 45, 36, 29, 24, 20, 16, 13, 11, 9, 8, 7, 6, 5, 4, 3, 2, 1]

In [None]:
final_features_set

# 예측 결과 제출

In [None]:
df_predict.head()

In [None]:
pred_X = pd.get_dummies(df_predict[COLS])

In [None]:
predict['msure_val'] = np.exp(cat.predict(df_predict_final[COLS]))
df_submission = predict[['key_val', 'msure_val']] 
df_submission.head()

In [None]:
# 예측값에 결측치가 포함되어 있는지 확인합니다.
df_submission.isnull().sum()

In [None]:
# 예측값의 갯수가 평가용 데이터의 갯수와 동일한지 확인합니다.
assert len(df_submission) == len(predict)
print(f'No. of Predict DataSet : {len(predict)}\nNo. of Submission DataSet : {len(df_submission)}')

In [None]:
# 예측 파일을 저장합니다. 
# 제출용 파일 이름은 cds_submission_팀명_차수.csv 형태로 제출합니다.
df_submission.set_index('key_val', inplace=True)
df_submission.to_csv('cds_submission_데이터조무사_12.csv')