Data: https://dacon.io/competitions/open/235576/overview/description

# **목표: 가공한 데이터의 따릉이 대여 예측 RMSE 점수 최대화**

목차:  
0.  베이스라인 모델 설계
0-1. 라이브러리 호출     
0-2. 파일 입출력    
0-3. 임시 결측값 처리 (Baseline 모델 학습용)    
0-4. 베이스라인 모델 설계 (RandomForest)   
0-4-1. 단순 스플릿 (0.7:0.3)    
0-4-2. KFold     
0-4-3. StratifiedKFold    
1. 데이터 정제    
1-1. 라이브러리 호출    
1-2. 파일 호출    
1-3. 결측치 제거    
1-3-1. Train 결측치    
1-3-2. Test 결측치     
2. 모델링     
2-1. Ridge     
2-2. Lasso    
2-3. CatBoost    
3. Submission     
3-1. Hard Voting     

# 0. BaseData + Baseline 모델 (RandomForest)

## 0-1 라이브러리 호출

In [482]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

## 0-2 파일 입출력

In [483]:
train = pd.read_csv("/content/drive/MyDrive/content/sample_data/train.csv")
test = pd.read_csv("/content/drive/MyDrive/content/sample_data/test.csv")

In [484]:
train.head()

Unnamed: 0,id,hour,hour_bef_temperature,hour_bef_precipitation,hour_bef_windspeed,hour_bef_humidity,hour_bef_visibility,hour_bef_ozone,hour_bef_pm10,hour_bef_pm2.5,count
0,3,20,16.3,1.0,1.5,89.0,576.0,0.027,76.0,33.0,49.0
1,6,13,20.1,0.0,1.4,48.0,916.0,0.042,73.0,40.0,159.0
2,7,6,13.9,0.0,0.7,79.0,1382.0,0.033,32.0,19.0,26.0
3,8,23,8.1,0.0,2.7,54.0,946.0,0.04,75.0,64.0,57.0
4,9,18,29.5,0.0,4.8,7.0,2000.0,0.057,27.0,11.0,431.0


In [485]:
train.keys()

Index(['id', 'hour', 'hour_bef_temperature', 'hour_bef_precipitation',
       'hour_bef_windspeed', 'hour_bef_humidity', 'hour_bef_visibility',
       'hour_bef_ozone', 'hour_bef_pm10', 'hour_bef_pm2.5', 'count'],
      dtype='object')

id: 아이디   
hour: 시간   
hour_bef_temperature: 기온   
hour_bef_precipitation: 비가 왔으면 1 비가 안왔으면 0   
hour_bef_windspeed: 풍속   
hour_bef_humidity: 습도    
hour_bef_visibility: 시계    
hour_bef_ozone: 오존    
hour_bef_pm10: 미세먼지(pm10)   
hour_bef_pm2.5: 미세먼지(pm2.5)  
count: 대여 수     

## 0-3 임시 결측값 처리 (Baseline 모델 학습용)

In [486]:
train.isnull().sum()

id                          0
hour                        0
hour_bef_temperature        2
hour_bef_precipitation      2
hour_bef_windspeed          9
hour_bef_humidity           2
hour_bef_visibility         2
hour_bef_ozone             76
hour_bef_pm10              90
hour_bef_pm2.5            117
count                       0
dtype: int64

In [487]:
train = train.dropna()

In [488]:
train.isnull().sum()

id                        0
hour                      0
hour_bef_temperature      0
hour_bef_precipitation    0
hour_bef_windspeed        0
hour_bef_humidity         0
hour_bef_visibility       0
hour_bef_ozone            0
hour_bef_pm10             0
hour_bef_pm2.5            0
count                     0
dtype: int64

## 0-4 베이스라인 모델 설계 (RandomForest)

### 0-4-1 단순 스플릿 (0.7:0.3)

In [489]:
model = RandomForestRegressor(n_estimators=30)

In [490]:
X = train.drop(['count'],axis=1)
Y = train['count']

In [491]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size = 0.3, random_state=42)

In [492]:
model.fit(X_train,Y_train)

In [493]:
model.score(X_test,Y_test)

0.805766062719114

### 0-4-2 KFold (5)

In [494]:
kf = KFold(n_splits=5,shuffle=True,random_state=42)

In [495]:
scores = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index],X.iloc[test_index]
    Y_train, Y_test = Y.iloc[train_index],Y.iloc[test_index]
    model.fit(X_train,Y_train)
    scores.append(model.score(X_test,Y_test))

In [496]:
max(scores), min(scores)

(0.8244827447580478, 0.6961478997435042)

### 0-4-3 StratifiedKFold (5)

In [497]:
skf = StratifiedKFold(n_splits=5)

In [498]:
scores =[]
for train_index, test_index in skf.split(X,Y):
    X_train, X_test = X.iloc[train_index],X.iloc[test_index]
    Y_train, Y_test = Y.iloc[train_index], Y.iloc[test_index]
    model.fit(X_train,Y_train)
    scores.append(model.score(X_test,Y_test))



In [499]:
scores = scores[1:len(scores)]
average = sum(scores)/len(scores)
print(average)

0.7681574065375875


### 0-Result RandomForest, Stratified KFold 기반 베이스라인 모델 설계

In [500]:
# Baseline 모델
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error
    ### 변경 가능 ###
from sklearn.ensemble import RandomForestRegressor

def basemodel(data):
    model = RandomForestRegressor(n_estimators=30)   # RandomForestRegressor, n_estimator (트리 개수)
    #################
    X = data.drop(['count'],axis=1)                 # train_X
    Y = data['count']
    skf = StratifiedKFold(n_splits=5)
    scores =[]
    for train_index, test_index in skf.split(X,Y):
        X_train, X_test = X.iloc[train_index],X.iloc[test_index]
        Y_train, Y_test = Y.iloc[train_index], Y.iloc[test_index]
        model.fit(X_train,Y_train)
        Y_pred = model.predict(X_test)
        rmse = np.sqrt(mean_squared_error(Y_test,Y_pred))
        scores.append(rmse)
    scores = scores[1:len(scores)]
    average = sum(scores)/len(scores)
    return average

# 1. 데이터 정제

## 1-1. 라이브러리 호출

In [501]:
import pandas as pd
import numpy as np

## 1-2. 파일 호출

In [502]:
original_train_df = pd.read_csv("/content/drive/MyDrive/content/sample_data/train.csv")

In [503]:
original_train_df = original_train_df.dropna()

In [504]:
train_df = pd.read_csv("/content/drive/MyDrive/content/sample_data/train.csv")

In [505]:
train_df.head()

Unnamed: 0,id,hour,hour_bef_temperature,hour_bef_precipitation,hour_bef_windspeed,hour_bef_humidity,hour_bef_visibility,hour_bef_ozone,hour_bef_pm10,hour_bef_pm2.5,count
0,3,20,16.3,1.0,1.5,89.0,576.0,0.027,76.0,33.0,49.0
1,6,13,20.1,0.0,1.4,48.0,916.0,0.042,73.0,40.0,159.0
2,7,6,13.9,0.0,0.7,79.0,1382.0,0.033,32.0,19.0,26.0
3,8,23,8.1,0.0,2.7,54.0,946.0,0.04,75.0,64.0,57.0
4,9,18,29.5,0.0,4.8,7.0,2000.0,0.057,27.0,11.0,431.0


In [506]:
test_df = pd.read_csv("/content/drive/MyDrive/content/sample_data/test.csv")

In [507]:
test_df.head()

Unnamed: 0,id,hour,hour_bef_temperature,hour_bef_precipitation,hour_bef_windspeed,hour_bef_humidity,hour_bef_visibility,hour_bef_ozone,hour_bef_pm10,hour_bef_pm2.5
0,0,7,20.7,0.0,1.3,62.0,954.0,0.041,44.0,27.0
1,1,17,30.0,0.0,5.4,33.0,1590.0,0.061,49.0,36.0
2,2,13,19.0,1.0,2.1,95.0,193.0,0.02,36.0,28.0
3,4,6,22.5,0.0,2.5,60.0,1185.0,0.027,52.0,38.0
4,5,22,14.6,1.0,3.4,93.0,218.0,0.041,18.0,15.0


## 1-3. 결측치 제거

### 1-3-1. Train 결측치

### 1-3-1-1. 결측치 확인

In [508]:
train_df.isnull().sum()

id                          0
hour                        0
hour_bef_temperature        2
hour_bef_precipitation      2
hour_bef_windspeed          9
hour_bef_humidity           2
hour_bef_visibility         2
hour_bef_ozone             76
hour_bef_pm10              90
hour_bef_pm2.5            117
count                       0
dtype: int64

### 1-3-1-2. 기온 결측치 처리

In [509]:
train_df[train_df['hour_bef_temperature'].isnull()]

Unnamed: 0,id,hour,hour_bef_temperature,hour_bef_precipitation,hour_bef_windspeed,hour_bef_humidity,hour_bef_visibility,hour_bef_ozone,hour_bef_pm10,hour_bef_pm2.5,count
934,1420,0,,,,,,,,,39.0
1035,1553,18,,,,,,,,,1.0


### Solution) 대부분의 열이 결측치이기 때문에 해당 행 삭제

In [510]:
train_df = train_df.dropna(subset=['hour_bef_temperature'])

In [511]:
train_df.isnull().sum()

id                          0
hour                        0
hour_bef_temperature        0
hour_bef_precipitation      0
hour_bef_windspeed          7
hour_bef_humidity           0
hour_bef_visibility         0
hour_bef_ozone             74
hour_bef_pm10              88
hour_bef_pm2.5            115
count                       0
dtype: int64

### 1-3-1-3. 나머지 결측치 처리

In [512]:
train_df[train_df['hour_bef_windspeed'].isnull()]

Unnamed: 0,id,hour,hour_bef_temperature,hour_bef_precipitation,hour_bef_windspeed,hour_bef_humidity,hour_bef_visibility,hour_bef_ozone,hour_bef_pm10,hour_bef_pm2.5,count
18,33,13,22.6,0.0,,41.0,987.0,0.046,64.0,39.0,208.0
244,381,1,14.1,0.0,,55.0,1992.0,,,,38.0
260,404,3,14.0,0.0,,50.0,2000.0,0.049,35.0,22.0,17.0
376,570,0,14.3,0.0,,49.0,2000.0,0.044,37.0,20.0,58.0
780,1196,20,16.5,0.0,,31.0,2000.0,0.058,39.0,18.0,181.0
1138,1717,12,21.4,0.0,,44.0,1375.0,0.044,61.0,37.0,116.0
1229,1855,2,14.0,0.0,,52.0,2000.0,0.044,37.0,20.0,20.0


### Solution) 나머지 항목은 시계열적인 특징을 갖고 있기 때문에 시계열 보간을 사용해주자

In [513]:
# train_df = train_df.interpolate(method='linear')

### Solution 실패) 자료형이 시간별로 주어졌다는 근거는 없다.

### Solution) 평균치로 넣어주자

In [514]:
train_df['hour_bef_windspeed'].fillna(int(train_df['hour_bef_windspeed'].mean()),inplace=True)
train_df['hour_bef_ozone'].fillna(int(train_df['hour_bef_ozone'].mean()),inplace=True)
train_df['hour_bef_pm10'].fillna(int(train_df['hour_bef_pm10'].mean()),inplace=True)
train_df['hour_bef_pm2.5'].fillna(int(train_df['hour_bef_pm2.5'].mean()),inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['hour_bef_windspeed'].fillna(int(train_df['hour_bef_windspeed'].mean()),inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['hour_bef_ozone'].fillna(int(train_df['hour_bef_ozone'].mean()),inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['hour_bef_pm10'].fillna(int(train_df['hour_bef_pm10'].mean()),inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http

### 1-3-1-4. 결측치 단순 제거 vs. 결측치 대체 비교

In [515]:
# 결측치 단순 제거
print(basemodel(original_train_df))



40.0003358677757


In [516]:
# # 결측치 대체 (시계열 보간)
# print(basemodel(train_df))

In [517]:
# 결측치 대체 (평균)
print(basemodel(train_df))



39.603926397045214


### result) 결측치를 단순 제거한 것이 정확도가 더 높다 -> 단순 제거 방식 채택

In [518]:
data = original_train_df

### 1-3-2. Test 결측치

### 1-3-2-1. 결측치 확인

In [519]:
test_df.isnull().sum()

id                         0
hour                       0
hour_bef_temperature       1
hour_bef_precipitation     1
hour_bef_windspeed         1
hour_bef_humidity          1
hour_bef_visibility        1
hour_bef_ozone            35
hour_bef_pm10             37
hour_bef_pm2.5            36
dtype: int64

### 1-3-2-2. 기온, 풍속, 습도, 시계 결측치 제거

In [520]:
test_df[test_df['hour_bef_temperature'].isnull()]

Unnamed: 0,id,hour,hour_bef_temperature,hour_bef_precipitation,hour_bef_windspeed,hour_bef_humidity,hour_bef_visibility,hour_bef_ozone,hour_bef_pm10,hour_bef_pm2.5
653,1943,19,,,,,,,,


### solution) 기온, 풍속, 습도, 시계는 한번에 평균치로 대체

In [521]:
test_df['hour_bef_temperature'].fillna(int(test_df['hour_bef_temperature'].mean()),inplace=True)

In [522]:
test_df['hour_bef_humidity'].fillna(int(test_df['hour_bef_humidity'].mean()),inplace=True)

In [523]:
test_df['hour_bef_windspeed'].fillna(round(test_df['hour_bef_windspeed'].mean(),1),inplace=True)

In [524]:
test_df['hour_bef_visibility'].fillna(int(test_df['hour_bef_visibility'].mean()),inplace=True)

### 1-3-2-3. 비 여부 결측치 제거

### solution) 비 여부는 한번에 최빈값으로 대체

In [525]:
test_df['hour_bef_precipitation'].fillna(test_df['hour_bef_precipitation'].mode()[0],inplace=True)

In [526]:
test_df.iloc[653]

id                        1943.0
hour                        19.0
hour_bef_temperature        23.0
hour_bef_precipitation       0.0
hour_bef_windspeed           2.4
hour_bef_humidity           56.0
hour_bef_visibility       1359.0
hour_bef_ozone               NaN
hour_bef_pm10                NaN
hour_bef_pm2.5               NaN
Name: 653, dtype: float64

### 1-3-2-4. 오존, 미세먼지 결측치 제거

### Solution) 마찬가지로 평균치로 채워버리자

In [527]:
test_df['hour_bef_ozone'].fillna(int(test_df['hour_bef_ozone'].mean()),inplace=True)

In [528]:
test_df['hour_bef_pm10'].fillna(int(test_df['hour_bef_pm10'].mean()),inplace=True)

In [529]:
test_df['hour_bef_pm2.5'].fillna(int(test_df['hour_bef_pm2.5'].mean()),inplace=True)

In [530]:
test_df.isnull().sum()

id                        0
hour                      0
hour_bef_temperature      0
hour_bef_precipitation    0
hour_bef_windspeed        0
hour_bef_humidity         0
hour_bef_visibility       0
hour_bef_ozone            0
hour_bef_pm10             0
hour_bef_pm2.5            0
dtype: int64

# 2. 모델링

In [531]:
# Baseline 모델 설계 수정
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error
    ### 변경 가능 ###
from sklearn.ensemble import RandomForestRegressor

base_tray=[]

def basemodel(data):
    global base_tray
    model = RandomForestRegressor(n_estimators=30)   # RandomForestRegressor, n_estimator (트리 개수)
    #################
    X = data.drop(['count'],axis=1)                 # train_X
    Y = data['count']
    skf = StratifiedKFold(n_splits=5)
    scores =[]
    for train_index, test_index in skf.split(X,Y):
        X_train, X_test = X.iloc[train_index],X.iloc[test_index]
        Y_train, Y_test = Y.iloc[train_index], Y.iloc[test_index]
        model.fit(X_train,Y_train)
        Y_pred = model.predict(X_test)
        rmse = np.sqrt(mean_squared_error(Y_test,Y_pred))
        scores.append(rmse)
    scores = scores[1:len(scores)]
    average = sum(scores)/len(scores)
    base_tray = model.predict(test_df)
    return average

In [532]:
result = {}

In [533]:
base_scores = []
for i in range(5):
    currScore = basemodel(data)
    base_scores.append(currScore)
baseScore = sum(base_scores[1:4])/3
result["basemodel"]=baseScore
print(baseScore)



39.57228108447743


In [534]:
print(result)

{'basemodel': 39.57228108447743}


## 2-1. Ridge

Ridge 회귀 모델이란? L2 규제를 사용하여 과적합을 줄임.    
L2 규제는 모델의 손실 함수에 가중치의 제곱합에 대한 항을 추가하여 적용.     
-> 모델이 복잡한 가중치 값을 가지지 않도록 제한함.    
-> 가중치의 크기가 커지만 규제 항의 값도 커져서 전체 손실함수가 커짐.    
-> 가중치 값을 작게 유지하면서 손실을 최소화 하는 방향으로 학습함.

In [535]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge

ridge_tray = []

def ridge(data):
    global ridge_tray
    ridge = Ridge(alpha = 1.0)
    X = data.drop(['count'],axis=1)
    Y = data['count']
    skf = StratifiedKFold(n_splits=5)
    scores =[]
    for train_index, test_index in skf.split(X,Y):
        X_train, X_test = X.iloc[train_index],X.iloc[test_index]
        Y_train, Y_test = Y.iloc[train_index], Y.iloc[test_index]
        ridge.fit(X_train,Y_train)
        Y_pred = ridge.predict(X_test)
        rmse = np.sqrt(mean_squared_error(Y_test,Y_pred))
        scores.append(rmse)
    scores = scores[1:len(scores)]
    average = sum(scores)/len(scores)
    ridge_tray = ridge.predict(test_df)
    return average

In [536]:
ridge_scores = []
for i in range(5):
    currScore = ridge(data)
    ridge_scores.append(currScore)
ridgeScore = sum(ridge_scores[1:4])/3
result["ridge"]=ridgeScore
print(ridgeScore)



53.21501237075224


## 2-2. Lasso

Lasso 회귀 모델이란? L1 규제를 사용하여 모델의 복잡성을 제어함.    
L1 규제는 손실 함수에 가중치의 절대값의 합에 대한 항을 추가하여 적용.    
-> 가중치의 절대값이 큰 경우 패널티를 부여하여 일부 가중치를 0으로 만듬.  
-> 모델의 불필요한 특성을 제거하고 중요한 특성에 집중할 수 있음.

In [537]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Lasso

lasso_tray = []

def lasso(data):
    global lasso_tray
    lasso = Lasso(alpha = 1.0)
    X = data.drop(['count'],axis=1)
    Y = data['count']
    skf = StratifiedKFold(n_splits=5)
    scores =[]
    for train_index, test_index in skf.split(X,Y):
        X_train, X_test = X.iloc[train_index],X.iloc[test_index]
        Y_train, Y_test = Y.iloc[train_index], Y.iloc[test_index]
        lasso.fit(X_train,Y_train)
        Y_pred = lasso.predict(X_test)
        rmse = np.sqrt(mean_squared_error(Y_test,Y_pred))
        scores.append(rmse)
    lasso_tray = lasso.predict(test_df)
    scores = scores[1:len(scores)]
    average = sum(scores)/len(scores)
    return average

In [538]:
lasso_scores = []
for i in range(5):
    currScore = lasso(data)
    lasso_scores.append(currScore)
lassoScore = sum(lasso_scores[1:4])/3
result["lasso"]=lassoScore
print(lassoScore)



53.666713866269724


In [539]:
result

{'basemodel': 39.57228108447743,
 'ridge': 53.21501237075224,
 'lasso': 53.666713866269724}

# 2-3. CatBoost

자동 특성 스케일링: CatBoost는 입력 특성들의 스케일을 자동으로 조정하여 모델의 안정성을 향상시킴.

범주형 변수 처리: CatBoost는 범주형 변수를 자동으로 처리할 수 있으며, 이를 통해 범주형 변수의 인코딩에 대한 번거로움을 줄여줌.

과적합 방지: CatBoost는 다양한 과적합 방지 기능을 제공하여 모델의 일반화 성능을 향상시킴.

유연한 하이퍼파라미터 조정: CatBoost는 다양한 하이퍼파라미터를 조정할 수 있으며, 이를 통해 모델의 성능을 최적화할 수 있음.

속도 및 확장성: CatBoost는 고도로 최적화된 알고리즘을 사용하여 빠르고 확장 가능한 모델 훈련을 지원함.

In [540]:
# !pip install catboost
# 한번 실행 후 주석 처리

In [541]:
from catboost import CatBoostRegressor

In [542]:
cat_tray = []

def catboost(data):
    global cat_tray
    model = CatBoostRegressor(iterations=200,learning_rate=0.1,depth=6)
    X = data.drop(['count'],axis=1)
    Y = data['count']
    skf = StratifiedKFold(n_splits=5)
    scores =[]
    for train_index, test_index in skf.split(X,Y):
        X_train, X_test = X.iloc[train_index],X.iloc[test_index]
        Y_train, Y_test = Y.iloc[train_index], Y.iloc[test_index]
        model.fit(X_train,Y_train)
        Y_pred = model.predict(X_test)
        rmse = np.sqrt(mean_squared_error(Y_test,Y_pred))
        scores.append(rmse)
    cat_tray = model.predict(test_df)
    scores = scores[1:len(scores)]
    average = sum(scores)/len(scores)
    return average

In [None]:
cat_scores = []
for i in range(5):
    catScore = catboost(data)
    cat_scores.append(catScore)
catScore = sum(cat_scores[1:4])/3
result["cat"]=catScore
print(catScore)

# 3. Submission

In [544]:
# basemodel(data)
# ridge(data)
# lasso(data)
# catboost(data)

In [545]:
submission = pd.read_csv('/content/drive/MyDrive/content/sample_data/submission.csv')
submission['count'] = base_tray # 가장 최근에 실행한 회귀 분석
submission.to_csv('/content/drive/MyDrive/content/sample_data/catboost.csv',index=False)

## 3-1. Hard Voting

In [546]:
print(len(base_tray))
print(len(ridge_tray))
print(len(lasso_tray))
print(len(cat_tray))

715
715
715
715


In [547]:
result

{'basemodel': 39.57228108447743,
 'ridge': 53.21501237075224,
 'lasso': 53.666713866269724,
 'cat': 38.64888598203282}

In [548]:
weight = {}
for i in result.keys():
    weight[i]=100-result[i]


In [549]:
weight

{'basemodel': 60.42771891552257,
 'ridge': 46.78498762924776,
 'lasso': 46.333286133730276,
 'cat': 61.35111401796718}

In [550]:
sum(weight.values())

214.89710669646777

In [551]:
tray = []
for i in range(len(base_tray)):
    curr = (base_tray[i]*weight['basemodel'] + ridge_tray[i]*weight['ridge']+lasso_tray[i]*weight['lasso']+cat_tray[i]*weight['cat'])/sum(weight.values())
    tray.append(curr)

In [552]:
submission = pd.read_csv('/content/drive/MyDrive/content/sample_data/submission.csv')
submission['count'] = tray
submission.to_csv('/content/drive/MyDrive/content/sample_data/hardvoting.csv',index=False)

In [553]:
submission

Unnamed: 0,id,count
0,0,102.347047
1,1,238.014946
2,2,77.211621
3,4,77.323998
4,5,78.589368
...,...,...
710,2148,102.282754
711,2149,79.013669
712,2165,142.858779
713,2166,191.319430


베이스라인 제출 점수: 48.6021   
CatBoost 제출 점수: 50.553    
Hardvoting 제출 점수: 54.519