In [1]:
import pandas as pd
import numpy as np
import warnings 
warnings.filterwarnings('ignore')
from sklearn.preprocessing import LabelEncoder,StandardScaler,MinMaxScaler,RobustScaler
from category_encoders.target_encoder import TargetEncoder

from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error,mean_squared_log_error,make_scorer
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import seaborn as sns
import optuna
from optuna.samplers import TPESampler


import os
from catboost import CatBoostRegressor
from tqdm import tqdm
from sklearn.model_selection import KFold


In [26]:
train = pd.read_csv("train2.csv").drop(['사망자수','중상자수','경상자수','부상자수'],axis=1)
test = pd.read_csv("test(통합본).csv")

In [27]:
def gu(address):
    return address.split()[0]
def dong(address):
    return address.split()[1]
def road_1(x):
    return x.split(' - ')[0]
def road_2(x):
    return x.split(' - ')[1]

In [28]:
for df in [train,test]:
    df['구'] = df['주소'].apply(gu)
    df['동'] = df['주소'].apply(dong)
    df.drop('주소',axis=1,inplace=True)
    df['도로구분1'] = df['도로형태'].apply(road_1)
    df['도로구분2'] = df['도로형태'].apply(road_2)
    df.drop('도로형태',axis=1,inplace=True)

In [29]:
train = train[['요일', '기상상태', '도로구분1','도로구분2', '노면상태', '사고유형', '구', '동', '연휴', '년도', '월', '일', '시간',
       '설치장소', '설치개수', 'CCTV설치대수', '급지구분_1', '급지구분_2', '급지구분_3', 'ECLO']]
test = test[['요일', '기상상태', '도로구분1','도로구분2', '노면상태', '사고유형', '구', '동', '연휴', '년도', '월', '일', '시간',
       '설치장소', '설치개수', 'CCTV설치대수', '급지구분_1', '급지구분_2', '급지구분_3']]

In [30]:
categorical_features = list(train.dtypes[train.dtypes == "object"].index)
display(categorical_features)

for i in categorical_features:
    le = LabelEncoder()
    le=le.fit(train[i]) 
    train[i]=le.transform(train[i])
    
    for case in np.unique(test[i]):
        if case not in le.classes_: 
            le.classes_ = np.append(le.classes_, case) 
    test[i]=le.transform(test[i])

['요일', '기상상태', '도로구분1', '도로구분2', '노면상태', '사고유형', '구', '동']

In [31]:
train_x = train.drop('ECLO',axis=1)
train_y = np.log(train['ECLO'])
test_x = test

In [32]:
rs = RobustScaler()
X_standard = rs.fit_transform(train_x)
test_standard = rs.transform(test_x.iloc[:,:])

In [33]:
X_train, X_valid , y_train , y_valid  = train_test_split(X_standard ,train_y, shuffle=True,test_size=0.2)

In [34]:
import numpy as np

def rmsle(y_true, y_pred, squared=True):
    assert len(y_true) == len(y_pred), "Input arrays must have the same length."

    y_true_log1p = np.log1p(y_true)
    y_pred_log1p = np.log1p(y_pred)

    if squared:
        return np.mean((y_true_log1p - y_pred_log1p) ** 2)
    else:
        return np.sqrt(np.mean((y_true_log1p - y_pred_log1p) ** 2))

def objective(trial, train_x, train_y, val_x, val_y):
    param = {
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 0.1),
        'subsample': trial.suggest_float('subsample', 0.1, 1.0),
        'learning_rate': trial.suggest_float('learning_rate', 0.0001, 0.1),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'max_depth': trial.suggest_int('max_depth',4,15),
    }
    model = CatBoostRegressor(**param)
    model.fit(train_x, train_y)
    preds = model.predict(val_x)
    score = rmsle(val_y, preds, squared=False)
    return score




In [35]:
study =  optuna.create_study(study_name='Catboost', direction='minimize',sampler=TPESampler(seed=42) )
study.optimize(lambda trial: objective(trial,X_train,  y_train ,X_valid , y_valid ),n_trials=50, timeout = 1000 )
print('Best trial:', study.best_trial.params)
print('Best score:', study.best_value)

[I 2023-12-09 16:11:54,313] A new study created in memory with name: Catboost


0:	learn: 3.2252738	total: 10.6ms	remaining: 6.55s
1:	learn: 3.2198103	total: 20ms	remaining: 6.17s
2:	learn: 3.2154793	total: 29.2ms	remaining: 5.99s
3:	learn: 3.2111195	total: 37.7ms	remaining: 5.8s
4:	learn: 3.2080135	total: 46.2ms	remaining: 5.67s
5:	learn: 3.2050730	total: 53.9ms	remaining: 5.51s
6:	learn: 3.2024472	total: 62.3ms	remaining: 5.45s
7:	learn: 3.2003389	total: 69.9ms	remaining: 5.34s
8:	learn: 3.1982198	total: 78ms	remaining: 5.29s
9:	learn: 3.1964666	total: 87.4ms	remaining: 5.32s
10:	learn: 3.1949656	total: 95.9ms	remaining: 5.3s
11:	learn: 3.1932881	total: 105ms	remaining: 5.32s
12:	learn: 3.1917091	total: 113ms	remaining: 5.29s
13:	learn: 3.1903706	total: 122ms	remaining: 5.29s
14:	learn: 3.1894106	total: 131ms	remaining: 5.26s
15:	learn: 3.1881456	total: 138ms	remaining: 5.22s
16:	learn: 3.1871120	total: 147ms	remaining: 5.2s
17:	learn: 3.1863882	total: 155ms	remaining: 5.17s
18:	learn: 3.1850628	total: 163ms	remaining: 5.14s
19:	learn: 3.1843937	total: 171ms	rem

162:	learn: 3.1111137	total: 1.22s	remaining: 3.42s
163:	learn: 3.1106101	total: 1.23s	remaining: 3.41s
164:	learn: 3.1101674	total: 1.24s	remaining: 3.4s
165:	learn: 3.1098349	total: 1.24s	remaining: 3.39s
166:	learn: 3.1093984	total: 1.25s	remaining: 3.38s
167:	learn: 3.1088838	total: 1.26s	remaining: 3.37s
168:	learn: 3.1083853	total: 1.26s	remaining: 3.36s
169:	learn: 3.1080552	total: 1.27s	remaining: 3.36s
170:	learn: 3.1075906	total: 1.28s	remaining: 3.35s
171:	learn: 3.1070056	total: 1.28s	remaining: 3.34s
172:	learn: 3.1065707	total: 1.29s	remaining: 3.33s
173:	learn: 3.1061011	total: 1.3s	remaining: 3.32s
174:	learn: 3.1058077	total: 1.31s	remaining: 3.31s
175:	learn: 3.1051075	total: 1.31s	remaining: 3.31s
176:	learn: 3.1047204	total: 1.32s	remaining: 3.3s
177:	learn: 3.1043142	total: 1.33s	remaining: 3.29s
178:	learn: 3.1038599	total: 1.33s	remaining: 3.28s
179:	learn: 3.1034950	total: 1.34s	remaining: 3.27s
180:	learn: 3.1031596	total: 1.35s	remaining: 3.26s
181:	learn: 3.1

324:	learn: 3.0318055	total: 2.45s	remaining: 2.22s
325:	learn: 3.0314936	total: 2.46s	remaining: 2.21s
326:	learn: 3.0312235	total: 2.46s	remaining: 2.2s
327:	learn: 3.0308872	total: 2.47s	remaining: 2.19s
328:	learn: 3.0306483	total: 2.48s	remaining: 2.19s
329:	learn: 3.0303177	total: 2.49s	remaining: 2.18s
330:	learn: 3.0298948	total: 2.49s	remaining: 2.17s
331:	learn: 3.0295996	total: 2.5s	remaining: 2.16s
332:	learn: 3.0292727	total: 2.51s	remaining: 2.15s
333:	learn: 3.0289326	total: 2.52s	remaining: 2.15s
334:	learn: 3.0285879	total: 2.52s	remaining: 2.14s
335:	learn: 3.0283080	total: 2.53s	remaining: 2.13s
336:	learn: 3.0280393	total: 2.54s	remaining: 2.12s
337:	learn: 3.0277117	total: 2.55s	remaining: 2.12s
338:	learn: 3.0273592	total: 2.55s	remaining: 2.11s
339:	learn: 3.0270250	total: 2.56s	remaining: 2.1s
340:	learn: 3.0267205	total: 2.57s	remaining: 2.09s
341:	learn: 3.0264642	total: 2.57s	remaining: 2.08s
342:	learn: 3.0260396	total: 2.58s	remaining: 2.08s
343:	learn: 3.0

486:	learn: 2.9845591	total: 3.67s	remaining: 995ms
487:	learn: 2.9843257	total: 3.68s	remaining: 987ms
488:	learn: 2.9841044	total: 3.68s	remaining: 980ms
489:	learn: 2.9837890	total: 3.69s	remaining: 972ms
490:	learn: 2.9835207	total: 3.7s	remaining: 965ms
491:	learn: 2.9832594	total: 3.71s	remaining: 958ms
492:	learn: 2.9829476	total: 3.72s	remaining: 950ms
493:	learn: 2.9826885	total: 3.72s	remaining: 942ms
494:	learn: 2.9824955	total: 3.73s	remaining: 935ms
495:	learn: 2.9822134	total: 3.74s	remaining: 927ms
496:	learn: 2.9819033	total: 3.75s	remaining: 920ms
497:	learn: 2.9816222	total: 3.75s	remaining: 913ms
498:	learn: 2.9813926	total: 3.76s	remaining: 905ms
499:	learn: 2.9811307	total: 3.77s	remaining: 898ms
500:	learn: 2.9807558	total: 3.78s	remaining: 890ms
501:	learn: 2.9805109	total: 3.79s	remaining: 883ms
502:	learn: 2.9802809	total: 3.79s	remaining: 875ms
503:	learn: 2.9800577	total: 3.8s	remaining: 867ms
504:	learn: 2.9799179	total: 3.81s	remaining: 860ms
505:	learn: 2.

[I 2023-12-09 16:11:59,678] Trial 0 finished with value: 0.4597561773792155 and parameters: {'l2_leaf_reg': 0.03807947176588889, 'subsample': 0.9556428757689246, 'learning_rate': 0.07322619478695937, 'n_estimators': 619, 'max_depth': 5}. Best is trial 0 with value: 0.4597561773792155.


0:	learn: 3.2243503	total: 6.08ms	remaining: 3.77s
1:	learn: 3.1978245	total: 99.3ms	remaining: 30.7s
2:	learn: 3.1831746	total: 187ms	remaining: 38.6s
3:	learn: 3.1625646	total: 268ms	remaining: 41.3s
4:	learn: 3.1458277	total: 359ms	remaining: 44.2s
5:	learn: 3.1368429	total: 447ms	remaining: 45.8s
6:	learn: 3.1238517	total: 527ms	remaining: 46.3s
7:	learn: 3.1035640	total: 615ms	remaining: 47.1s
8:	learn: 3.0922076	total: 704ms	remaining: 47.9s
9:	learn: 3.0846241	total: 787ms	remaining: 48.1s
10:	learn: 3.0708850	total: 875ms	remaining: 48.5s
11:	learn: 3.0599262	total: 963ms	remaining: 48.9s
12:	learn: 3.0451616	total: 1.05s	remaining: 49.1s
13:	learn: 3.0345599	total: 1.13s	remaining: 49.2s
14:	learn: 3.0206715	total: 1.22s	remaining: 49.4s
15:	learn: 3.0126724	total: 1.31s	remaining: 49.4s
16:	learn: 3.0067403	total: 1.4s	remaining: 49.6s
17:	learn: 2.9981957	total: 1.48s	remaining: 49.7s
18:	learn: 2.9928603	total: 1.57s	remaining: 49.8s
19:	learn: 2.9826727	total: 1.66s	remain

163:	learn: 2.0928922	total: 14.1s	remaining: 39.3s
164:	learn: 2.0867368	total: 14.2s	remaining: 39.2s
165:	learn: 2.0817744	total: 14.3s	remaining: 39.2s
166:	learn: 2.0777907	total: 14.4s	remaining: 39.1s
167:	learn: 2.0749292	total: 14.5s	remaining: 39s
168:	learn: 2.0692563	total: 14.5s	remaining: 38.9s
169:	learn: 2.0649300	total: 14.6s	remaining: 38.8s
170:	learn: 2.0607937	total: 14.7s	remaining: 38.7s
171:	learn: 2.0572370	total: 14.8s	remaining: 38.6s
172:	learn: 2.0541507	total: 14.9s	remaining: 38.6s
173:	learn: 2.0494205	total: 15s	remaining: 38.5s
174:	learn: 2.0420525	total: 15.1s	remaining: 38.4s
175:	learn: 2.0387130	total: 15.2s	remaining: 38.3s
176:	learn: 2.0356256	total: 15.2s	remaining: 38.2s
177:	learn: 2.0322446	total: 15.3s	remaining: 38.2s
178:	learn: 2.0276702	total: 15.4s	remaining: 38.1s
179:	learn: 2.0242887	total: 15.5s	remaining: 38s
180:	learn: 2.0170131	total: 15.6s	remaining: 37.9s
181:	learn: 2.0125774	total: 15.7s	remaining: 37.8s
182:	learn: 2.0078

323:	learn: 1.5191089	total: 28s	remaining: 25.7s
324:	learn: 1.5156374	total: 28.1s	remaining: 25.6s
325:	learn: 1.5144495	total: 28.2s	remaining: 25.5s
326:	learn: 1.5123691	total: 28.3s	remaining: 25.4s
327:	learn: 1.5102167	total: 28.3s	remaining: 25.3s
328:	learn: 1.5079974	total: 28.4s	remaining: 25.2s
329:	learn: 1.5054471	total: 28.5s	remaining: 25.1s
330:	learn: 1.5018605	total: 28.6s	remaining: 25.1s
331:	learn: 1.4984901	total: 28.7s	remaining: 25s
332:	learn: 1.4949464	total: 28.8s	remaining: 24.9s
333:	learn: 1.4928875	total: 28.9s	remaining: 24.8s
334:	learn: 1.4890285	total: 29s	remaining: 24.7s
335:	learn: 1.4864568	total: 29s	remaining: 24.6s
336:	learn: 1.4840985	total: 29.1s	remaining: 24.5s
337:	learn: 1.4801422	total: 29.2s	remaining: 24.5s
338:	learn: 1.4763951	total: 29.3s	remaining: 24.4s
339:	learn: 1.4738145	total: 29.4s	remaining: 24.3s
340:	learn: 1.4724920	total: 29.5s	remaining: 24.2s
341:	learn: 1.4707395	total: 29.6s	remaining: 24.1s
342:	learn: 1.468089

483:	learn: 1.1664027	total: 41.9s	remaining: 11.9s
484:	learn: 1.1643253	total: 42s	remaining: 11.8s
485:	learn: 1.1624452	total: 42.1s	remaining: 11.7s
486:	learn: 1.1602305	total: 42.2s	remaining: 11.6s
487:	learn: 1.1585101	total: 42.3s	remaining: 11.5s
488:	learn: 1.1569341	total: 42.4s	remaining: 11.4s
489:	learn: 1.1557086	total: 42.5s	remaining: 11.4s
490:	learn: 1.1536196	total: 42.5s	remaining: 11.3s
491:	learn: 1.1520867	total: 42.6s	remaining: 11.2s
492:	learn: 1.1504140	total: 42.7s	remaining: 11.1s
493:	learn: 1.1479762	total: 42.8s	remaining: 11s
494:	learn: 1.1457587	total: 42.9s	remaining: 10.9s
495:	learn: 1.1436327	total: 43s	remaining: 10.8s
496:	learn: 1.1415614	total: 43.1s	remaining: 10.7s
497:	learn: 1.1393465	total: 43.2s	remaining: 10.7s
498:	learn: 1.1371636	total: 43.2s	remaining: 10.6s
499:	learn: 1.1360090	total: 43.3s	remaining: 10.5s
500:	learn: 1.1339189	total: 43.4s	remaining: 10.4s
501:	learn: 1.1319959	total: 43.5s	remaining: 10.3s
502:	learn: 1.1303

[I 2023-12-09 16:12:55,450] Trial 1 finished with value: 0.5057223454328802 and parameters: {'l2_leaf_reg': 0.016443457513284063, 'subsample': 0.15227525095137953, 'learning_rate': 0.08663099696291603, 'n_estimators': 621, 'max_depth': 12}. Best is trial 0 with value: 0.4597561773792155.


0:	learn: 3.2245811	total: 10.5ms	remaining: 2.62s
1:	learn: 3.2182617	total: 20.3ms	remaining: 2.52s
2:	learn: 3.2133434	total: 30.3ms	remaining: 2.5s
3:	learn: 3.2085679	total: 40.6ms	remaining: 2.51s
4:	learn: 3.2043045	total: 50.2ms	remaining: 2.47s
5:	learn: 3.2011310	total: 59.3ms	remaining: 2.42s
6:	learn: 3.1982226	total: 68.7ms	remaining: 2.4s
7:	learn: 3.1956632	total: 78.3ms	remaining: 2.38s
8:	learn: 3.1929092	total: 87.8ms	remaining: 2.36s
9:	learn: 3.1908559	total: 97.4ms	remaining: 2.35s
10:	learn: 3.1894250	total: 106ms	remaining: 2.32s
11:	learn: 3.1866892	total: 116ms	remaining: 2.3s
12:	learn: 3.1848463	total: 125ms	remaining: 2.29s
13:	learn: 3.1835189	total: 134ms	remaining: 2.26s
14:	learn: 3.1814938	total: 143ms	remaining: 2.25s
15:	learn: 3.1801633	total: 153ms	remaining: 2.24s
16:	learn: 3.1787253	total: 162ms	remaining: 2.22s
17:	learn: 3.1773057	total: 171ms	remaining: 2.21s
18:	learn: 3.1760260	total: 182ms	remaining: 2.22s
19:	learn: 3.1732279	total: 191ms	

181:	learn: 3.0358100	total: 1.66s	remaining: 630ms
182:	learn: 3.0351387	total: 1.67s	remaining: 620ms
183:	learn: 3.0344096	total: 1.68s	remaining: 610ms
184:	learn: 3.0335386	total: 1.68s	remaining: 601ms
185:	learn: 3.0328811	total: 1.69s	remaining: 591ms
186:	learn: 3.0323796	total: 1.7s	remaining: 582ms
187:	learn: 3.0317060	total: 1.71s	remaining: 573ms
188:	learn: 3.0309353	total: 1.72s	remaining: 564ms
189:	learn: 3.0302587	total: 1.73s	remaining: 555ms
190:	learn: 3.0296109	total: 1.74s	remaining: 545ms
191:	learn: 3.0287789	total: 1.74s	remaining: 536ms
192:	learn: 3.0281035	total: 1.75s	remaining: 527ms
193:	learn: 3.0268597	total: 1.76s	remaining: 518ms
194:	learn: 3.0261810	total: 1.77s	remaining: 508ms
195:	learn: 3.0255639	total: 1.78s	remaining: 499ms
196:	learn: 3.0246689	total: 1.79s	remaining: 490ms
197:	learn: 3.0240250	total: 1.79s	remaining: 481ms
198:	learn: 3.0234898	total: 1.8s	remaining: 471ms
199:	learn: 3.0227360	total: 1.81s	remaining: 462ms
200:	learn: 3.

[I 2023-12-09 16:12:58,272] Trial 2 finished with value: 0.4577112753854319 and parameters: {'l2_leaf_reg': 0.0030378649352844423, 'subsample': 0.9729188669457949, 'learning_rate': 0.08326101981596214, 'n_estimators': 251, 'max_depth': 6}. Best is trial 2 with value: 0.4577112753854319.


0:	learn: 3.2267258	total: 5.87ms	remaining: 2.69s
1:	learn: 3.2228055	total: 15.6ms	remaining: 3.58s
2:	learn: 3.2183855	total: 25.1ms	remaining: 3.83s
3:	learn: 3.2140186	total: 34.1ms	remaining: 3.89s
4:	learn: 3.2105151	total: 44.1ms	remaining: 4.01s
5:	learn: 3.2073658	total: 53ms	remaining: 4.01s
6:	learn: 3.2050571	total: 60.5ms	remaining: 3.92s
7:	learn: 3.2022123	total: 70ms	remaining: 3.96s
8:	learn: 3.1998385	total: 79.1ms	remaining: 3.96s
9:	learn: 3.1976378	total: 88.6ms	remaining: 3.98s
10:	learn: 3.1953692	total: 97ms	remaining: 3.96s
11:	learn: 3.1934206	total: 106ms	remaining: 3.97s
12:	learn: 3.1916697	total: 116ms	remaining: 3.98s
13:	learn: 3.1898645	total: 126ms	remaining: 4s
14:	learn: 3.1877434	total: 135ms	remaining: 4s
15:	learn: 3.1859929	total: 144ms	remaining: 3.99s
16:	learn: 3.1854980	total: 148ms	remaining: 3.86s
17:	learn: 3.1839803	total: 157ms	remaining: 3.86s
18:	learn: 3.1827040	total: 166ms	remaining: 3.85s
19:	learn: 3.1812737	total: 175ms	remainin

163:	learn: 3.0789768	total: 1.45s	remaining: 2.61s
164:	learn: 3.0783341	total: 1.46s	remaining: 2.6s
165:	learn: 3.0773108	total: 1.46s	remaining: 2.59s
166:	learn: 3.0764885	total: 1.47s	remaining: 2.58s
167:	learn: 3.0756283	total: 1.48s	remaining: 2.58s
168:	learn: 3.0751875	total: 1.49s	remaining: 2.57s
169:	learn: 3.0744277	total: 1.5s	remaining: 2.56s
170:	learn: 3.0739640	total: 1.51s	remaining: 2.55s
171:	learn: 3.0735929	total: 1.52s	remaining: 2.54s
172:	learn: 3.0729134	total: 1.53s	remaining: 2.53s
173:	learn: 3.0726944	total: 1.53s	remaining: 2.52s
174:	learn: 3.0719655	total: 1.54s	remaining: 2.52s
175:	learn: 3.0714250	total: 1.55s	remaining: 2.5s
176:	learn: 3.0697268	total: 1.56s	remaining: 2.5s
177:	learn: 3.0692585	total: 1.57s	remaining: 2.49s
178:	learn: 3.0687792	total: 1.58s	remaining: 2.48s
179:	learn: 3.0683908	total: 1.59s	remaining: 2.47s
180:	learn: 3.0679093	total: 1.59s	remaining: 2.46s
181:	learn: 3.0675981	total: 1.6s	remaining: 2.45s
182:	learn: 3.067

324:	learn: 2.9788536	total: 2.87s	remaining: 1.19s
325:	learn: 2.9783141	total: 2.88s	remaining: 1.18s
326:	learn: 2.9778381	total: 2.88s	remaining: 1.17s
327:	learn: 2.9770742	total: 2.89s	remaining: 1.16s
328:	learn: 2.9767824	total: 2.9s	remaining: 1.16s
329:	learn: 2.9763232	total: 2.91s	remaining: 1.15s
330:	learn: 2.9757525	total: 2.92s	remaining: 1.14s
331:	learn: 2.9751499	total: 2.93s	remaining: 1.13s
332:	learn: 2.9745017	total: 2.94s	remaining: 1.12s
333:	learn: 2.9738112	total: 2.95s	remaining: 1.11s
334:	learn: 2.9732711	total: 2.96s	remaining: 1.1s
335:	learn: 2.9728968	total: 2.97s	remaining: 1.09s
336:	learn: 2.9726391	total: 2.98s	remaining: 1.09s
337:	learn: 2.9721320	total: 2.99s	remaining: 1.08s
338:	learn: 2.9714242	total: 3s	remaining: 1.07s
339:	learn: 2.9710702	total: 3s	remaining: 1.06s
340:	learn: 2.9704505	total: 3.01s	remaining: 1.05s
341:	learn: 2.9697782	total: 3.02s	remaining: 1.04s
342:	learn: 2.9693584	total: 3.03s	remaining: 1.03s
343:	learn: 2.968761

[I 2023-12-09 16:13:03,097] Trial 3 finished with value: 0.45798311837014916 and parameters: {'l2_leaf_reg': 0.01915704647548995, 'subsample': 0.373818018663584, 'learning_rate': 0.05252316752006057, 'n_estimators': 460, 'max_depth': 7}. Best is trial 2 with value: 0.4577112753854319.


459:	learn: 2.9119800	total: 4.13s	remaining: 0us
0:	learn: 3.2283931	total: 6.76ms	remaining: 2.68s
1:	learn: 3.2243862	total: 24.8ms	remaining: 4.92s
2:	learn: 3.2211499	total: 42.2ms	remaining: 5.55s
3:	learn: 3.2182570	total: 58.9ms	remaining: 5.8s
4:	learn: 3.2155010	total: 76ms	remaining: 5.97s
5:	learn: 3.2128849	total: 92.2ms	remaining: 6.02s
6:	learn: 3.2100106	total: 109ms	remaining: 6.12s
7:	learn: 3.2073638	total: 126ms	remaining: 6.15s
8:	learn: 3.2049021	total: 142ms	remaining: 6.13s
9:	learn: 3.2020687	total: 158ms	remaining: 6.13s
10:	learn: 3.1997610	total: 175ms	remaining: 6.14s
11:	learn: 3.1970756	total: 191ms	remaining: 6.15s
12:	learn: 3.1948448	total: 207ms	remaining: 6.14s
13:	learn: 3.1921908	total: 223ms	remaining: 6.11s
14:	learn: 3.1895865	total: 240ms	remaining: 6.12s
15:	learn: 3.1869034	total: 256ms	remaining: 6.11s
16:	learn: 3.1850800	total: 272ms	remaining: 6.09s
17:	learn: 3.1832841	total: 288ms	remaining: 6.08s
18:	learn: 3.1816011	total: 304ms	remai

168:	learn: 3.0347898	total: 2.76s	remaining: 3.74s
169:	learn: 3.0341184	total: 2.77s	remaining: 3.72s
170:	learn: 3.0337676	total: 2.79s	remaining: 3.71s
171:	learn: 3.0330672	total: 2.81s	remaining: 3.69s
172:	learn: 3.0320377	total: 2.83s	remaining: 3.68s
173:	learn: 3.0310609	total: 2.84s	remaining: 3.66s
174:	learn: 3.0299096	total: 2.86s	remaining: 3.64s
175:	learn: 3.0283428	total: 2.88s	remaining: 3.63s
176:	learn: 3.0273385	total: 2.89s	remaining: 3.61s
177:	learn: 3.0268137	total: 2.91s	remaining: 3.59s
178:	learn: 3.0257044	total: 2.92s	remaining: 3.58s
179:	learn: 3.0247326	total: 2.94s	remaining: 3.56s
180:	learn: 3.0240591	total: 2.96s	remaining: 3.54s
181:	learn: 3.0234147	total: 2.97s	remaining: 3.53s
182:	learn: 3.0225933	total: 2.99s	remaining: 3.51s
183:	learn: 3.0218623	total: 3s	remaining: 3.49s
184:	learn: 3.0211011	total: 3.02s	remaining: 3.48s
185:	learn: 3.0204400	total: 3.04s	remaining: 3.46s
186:	learn: 3.0194423	total: 3.05s	remaining: 3.44s
187:	learn: 3.0

339:	learn: 2.9170590	total: 5.5s	remaining: 939ms
340:	learn: 2.9166190	total: 5.52s	remaining: 923ms
341:	learn: 2.9157828	total: 5.54s	remaining: 907ms
342:	learn: 2.9151251	total: 5.55s	remaining: 890ms
343:	learn: 2.9141606	total: 5.57s	remaining: 874ms
344:	learn: 2.9135303	total: 5.59s	remaining: 858ms
345:	learn: 2.9128925	total: 5.6s	remaining: 842ms
346:	learn: 2.9120977	total: 5.62s	remaining: 826ms
347:	learn: 2.9115558	total: 5.63s	remaining: 810ms
348:	learn: 2.9108795	total: 5.65s	remaining: 794ms
349:	learn: 2.9100878	total: 5.67s	remaining: 777ms
350:	learn: 2.9093296	total: 5.68s	remaining: 761ms
351:	learn: 2.9088915	total: 5.7s	remaining: 745ms
352:	learn: 2.9076398	total: 5.72s	remaining: 729ms
353:	learn: 2.9066211	total: 5.73s	remaining: 713ms
354:	learn: 2.9062619	total: 5.75s	remaining: 696ms
355:	learn: 2.9054876	total: 5.76s	remaining: 680ms
356:	learn: 2.9052545	total: 5.78s	remaining: 664ms
357:	learn: 2.9048189	total: 5.8s	remaining: 648ms
358:	learn: 2.90

[I 2023-12-09 16:13:10,306] Trial 4 finished with value: 0.45779883974882885 and parameters: {'l2_leaf_reg': 0.06157343657751557, 'subsample': 0.22554447458683766, 'learning_rate': 0.029285250388668294, 'n_estimators': 398, 'max_depth': 9}. Best is trial 2 with value: 0.4577112753854319.


0:	learn: 3.2268005	total: 5.95ms	remaining: 3.64s
1:	learn: 3.2231628	total: 11ms	remaining: 3.37s
2:	learn: 3.2200878	total: 16.9ms	remaining: 3.44s
3:	learn: 3.2175086	total: 21.9ms	remaining: 3.34s
4:	learn: 3.2148011	total: 27.1ms	remaining: 3.3s
5:	learn: 3.2123066	total: 33ms	remaining: 3.34s
6:	learn: 3.2103157	total: 38.3ms	remaining: 3.31s
7:	learn: 3.2084886	total: 43ms	remaining: 3.25s
8:	learn: 3.2065973	total: 48.5ms	remaining: 3.25s
9:	learn: 3.2046608	total: 54ms	remaining: 3.26s
10:	learn: 3.2030116	total: 59.3ms	remaining: 3.25s
11:	learn: 3.2016322	total: 64.7ms	remaining: 3.24s
12:	learn: 3.2003803	total: 70.7ms	remaining: 3.26s
13:	learn: 3.1990520	total: 76.3ms	remaining: 3.26s
14:	learn: 3.1978459	total: 81.1ms	remaining: 3.23s
15:	learn: 3.1970731	total: 85.9ms	remaining: 3.21s
16:	learn: 3.1962326	total: 91ms	remaining: 3.19s
17:	learn: 3.1952646	total: 95.9ms	remaining: 3.17s
18:	learn: 3.1943923	total: 102ms	remaining: 3.18s
19:	learn: 3.1935742	total: 106ms	

187:	learn: 3.1540126	total: 1s	remaining: 2.27s
188:	learn: 3.1538865	total: 1.01s	remaining: 2.27s
189:	learn: 3.1538179	total: 1.01s	remaining: 2.26s
190:	learn: 3.1536992	total: 1.02s	remaining: 2.25s
191:	learn: 3.1535333	total: 1.02s	remaining: 2.25s
192:	learn: 3.1534574	total: 1.03s	remaining: 2.24s
193:	learn: 3.1533237	total: 1.04s	remaining: 2.24s
194:	learn: 3.1532080	total: 1.04s	remaining: 2.23s
195:	learn: 3.1530870	total: 1.05s	remaining: 2.23s
196:	learn: 3.1529971	total: 1.05s	remaining: 2.22s
197:	learn: 3.1527894	total: 1.06s	remaining: 2.22s
198:	learn: 3.1526836	total: 1.06s	remaining: 2.21s
199:	learn: 3.1525351	total: 1.07s	remaining: 2.21s
200:	learn: 3.1524485	total: 1.07s	remaining: 2.2s
201:	learn: 3.1522164	total: 1.08s	remaining: 2.2s
202:	learn: 3.1519873	total: 1.08s	remaining: 2.19s
203:	learn: 3.1518954	total: 1.09s	remaining: 2.19s
204:	learn: 3.1516574	total: 1.1s	remaining: 2.18s
205:	learn: 3.1515736	total: 1.1s	remaining: 2.18s
206:	learn: 3.15152

373:	learn: 3.1236901	total: 2s	remaining: 1.28s
374:	learn: 3.1235373	total: 2.01s	remaining: 1.27s
375:	learn: 3.1234626	total: 2.02s	remaining: 1.27s
376:	learn: 3.1233870	total: 2.02s	remaining: 1.26s
377:	learn: 3.1231972	total: 2.02s	remaining: 1.26s
378:	learn: 3.1231088	total: 2.03s	remaining: 1.25s
379:	learn: 3.1229962	total: 2.03s	remaining: 1.25s
380:	learn: 3.1229187	total: 2.04s	remaining: 1.24s
381:	learn: 3.1227979	total: 2.04s	remaining: 1.24s
382:	learn: 3.1227256	total: 2.05s	remaining: 1.23s
383:	learn: 3.1225467	total: 2.05s	remaining: 1.23s
384:	learn: 3.1223406	total: 2.06s	remaining: 1.22s
385:	learn: 3.1220807	total: 2.06s	remaining: 1.21s
386:	learn: 3.1219501	total: 2.07s	remaining: 1.21s
387:	learn: 3.1215957	total: 2.08s	remaining: 1.2s
388:	learn: 3.1214416	total: 2.08s	remaining: 1.2s
389:	learn: 3.1211881	total: 2.08s	remaining: 1.19s
390:	learn: 3.1210902	total: 2.09s	remaining: 1.19s
391:	learn: 3.1209316	total: 2.09s	remaining: 1.18s
392:	learn: 3.120

564:	learn: 3.0987440	total: 3.01s	remaining: 255ms
565:	learn: 3.0985958	total: 3.01s	remaining: 250ms
566:	learn: 3.0985424	total: 3.02s	remaining: 245ms
567:	learn: 3.0984568	total: 3.02s	remaining: 239ms
568:	learn: 3.0983696	total: 3.03s	remaining: 234ms
569:	learn: 3.0983257	total: 3.03s	remaining: 229ms
570:	learn: 3.0982762	total: 3.04s	remaining: 223ms
571:	learn: 3.0981562	total: 3.04s	remaining: 218ms
572:	learn: 3.0979592	total: 3.05s	remaining: 213ms
573:	learn: 3.0978663	total: 3.05s	remaining: 207ms
574:	learn: 3.0978088	total: 3.06s	remaining: 202ms
575:	learn: 3.0977468	total: 3.06s	remaining: 197ms
576:	learn: 3.0976569	total: 3.07s	remaining: 191ms
577:	learn: 3.0976036	total: 3.07s	remaining: 186ms
578:	learn: 3.0975007	total: 3.08s	remaining: 181ms
579:	learn: 3.0974019	total: 3.08s	remaining: 175ms
580:	learn: 3.0972576	total: 3.09s	remaining: 170ms
581:	learn: 3.0971387	total: 3.1s	remaining: 165ms
582:	learn: 3.0969268	total: 3.1s	remaining: 160ms
583:	learn: 3.

[I 2023-12-09 16:13:14,242] Trial 5 finished with value: 0.4565881619949031 and parameters: {'l2_leaf_reg': 0.07873242017790835, 'subsample': 0.2797064039425238, 'learning_rate': 0.0514720203975198, 'n_estimators': 613, 'max_depth': 4}. Best is trial 5 with value: 0.4565881619949031.


601:	learn: 3.0953497	total: 3.2s	remaining: 58.6ms
602:	learn: 3.0952304	total: 3.21s	remaining: 53.2ms
603:	learn: 3.0951677	total: 3.21s	remaining: 47.9ms
604:	learn: 3.0950952	total: 3.22s	remaining: 42.6ms
605:	learn: 3.0950501	total: 3.23s	remaining: 37.3ms
606:	learn: 3.0949956	total: 3.23s	remaining: 31.9ms
607:	learn: 3.0947832	total: 3.24s	remaining: 26.6ms
608:	learn: 3.0947335	total: 3.24s	remaining: 21.3ms
609:	learn: 3.0946643	total: 3.25s	remaining: 16ms
610:	learn: 3.0946006	total: 3.25s	remaining: 10.6ms
611:	learn: 3.0944069	total: 3.26s	remaining: 5.32ms
612:	learn: 3.0943528	total: 3.26s	remaining: 0us
0:	learn: 3.2300588	total: 7.47ms	remaining: 7.1s
1:	learn: 3.2268043	total: 739ms	remaining: 5m 50s
2:	learn: 3.2230938	total: 1.42s	remaining: 7m 28s
3:	learn: 3.2192746	total: 2.05s	remaining: 8m 5s
4:	learn: 3.2171492	total: 2.71s	remaining: 8m 32s
5:	learn: 3.2142552	total: 3.35s	remaining: 8m 48s
6:	learn: 3.2098653	total: 3.99s	remaining: 8m 58s
7:	learn: 3.207

[W 2023-12-09 16:13:26,966] Trial 6 failed with parameters: {'l2_leaf_reg': 0.0611469403382424, 'subsample': 0.2534717113185624, 'learning_rate': 0.006598654139229424, 'n_estimators': 952, 'max_depth': 15} because of the following error: KeyboardInterrupt('').
Traceback (most recent call last):
  File "C:\Users\User\AppData\Roaming\Python\Python39\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\User\AppData\Local\Temp\ipykernel_11772\2668193024.py", line 2, in <lambda>
    study.optimize(lambda trial: objective(trial,X_train,  y_train ,X_valid , y_valid ),n_trials=50, timeout = 1000 )
  File "C:\Users\User\AppData\Local\Temp\ipykernel_11772\3867344226.py", line 23, in objective
    model.fit(train_x, train_y)
  File "C:\Users\User\AppData\Roaming\Python\Python39\site-packages\catboost\core.py", line 5703, in fit
    return self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None

21:	learn: 3.1638285	total: 12.3s	remaining: 8m 38s


KeyboardInterrupt: 

In [None]:
model_cat = CatBoostRegressor(**study.best_trial.params)
kf = KFold(n_splits=5, shuffle=True , random_state=42)

ensemble_predicts= []
scores =[]


for train_idx, val_idx in tqdm(kf.split(X_standard), total=5, desc="Processing folds"):
    X_t, X_val = X_standard[train_idx], X_standard[val_idx]
    y_t, y_val = train_y[train_idx], train_y[val_idx]
    
    # 두 모델 모두 학습
    model_cat.fit(X_t, y_t)
    
    # 각 모델로부터 Validation set에 대한 예측을 평균내어 앙상블 예측 생성
    val_pred = model_cat.predict(X_val)
    
    # Validation set에 대한 대회 평가 산식 계산 후 저장
    scores.append(rmsle(y_val, val_pred))
    
    # test 데이터셋에 대한 예측 수행 후 저장
    model_cat_pred = np.exp(model_cat.predict(test_standard))
    model_cat_pred = np.where(model_cat_pred < 0, 0, model_cat_pred)
    
    ensemble_predicts.append(model_cat_pred)

# K-fold 모든 예측의 평균을 계산하여 fold별 모델들의 앙상블 예측 생성
final_predictions = np.mean(ensemble_predicts, axis=0)

# 각 fold에서의 Validation Metric Score와 전체 평균 Validation Metric Score출력
print("Validation : RMSLE scores for each fold:", scores)
print("Validation : RMSLE:", np.mean(scores))

In [14]:
final_predictions

array([3.84142443, 3.44403305, 5.15275654, ..., 4.3555348 , 4.17960141,
       4.71029388])

In [15]:
submission = pd.read_csv('C:/Users/User/Desktop/대구교통사고예측/sample_submission.csv')
submission['ECLO'] = final_predictions
submission.to_csv('C:/Users/User/Desktop/대구교통사고예측/optuna_cat2.csv',index=False)
submission

Unnamed: 0,ID,ECLO
0,ACCIDENT_39609,3.841424
1,ACCIDENT_39610,3.444033
2,ACCIDENT_39611,5.152757
3,ACCIDENT_39612,4.068527
4,ACCIDENT_39613,4.952406
...,...,...
10958,ACCIDENT_50567,4.605680
10959,ACCIDENT_50568,4.218216
10960,ACCIDENT_50569,4.355535
10961,ACCIDENT_50570,4.179601


In [22]:
min(final_predictions),max(final_predictions),np.mean(final_predictions)

(2.0027562345033463, 5.996977754134176, 3.9406467058559342)

In [17]:
p = pd.read_csv('C:/Users/User/Desktop/대구교통사고예측/cat_pred_0.4269.csv')

In [18]:
p

Unnamed: 0,ID,ECLO
0,ACCIDENT_39609,3.861853
1,ACCIDENT_39610,3.451107
2,ACCIDENT_39611,5.133610
3,ACCIDENT_39612,4.464113
4,ACCIDENT_39613,4.625894
...,...,...
10958,ACCIDENT_50567,4.242687
10959,ACCIDENT_50568,4.097948
10960,ACCIDENT_50569,4.295058
10961,ACCIDENT_50570,4.184488


In [21]:
min(p['ECLO']),max(p['ECLO']),np.mean(p['ECLO'])

(2.500139643718016, 5.888244422157497, 4.051512172002897)

In [36]:
xg = pd.read_csv('C:/Users/User/Desktop/대구교통사고예측/optuna_xgb2.csv')
xg

Unnamed: 0,ID,ECLO
0,ACCIDENT_39609,3.367258
1,ACCIDENT_39610,3.224864
2,ACCIDENT_39611,4.509228
3,ACCIDENT_39612,3.952304
4,ACCIDENT_39613,3.980799
...,...,...
10958,ACCIDENT_50567,3.866031
10959,ACCIDENT_50568,3.967485
10960,ACCIDENT_50569,4.005776
10961,ACCIDENT_50570,3.986940


In [37]:
min(xg['ECLO']),max(xg['ECLO']),np.mean(xg['ECLO'])

(2.4403484, 4.7770643, 3.6671455118945544)

In [39]:
fin = []
for a,b in zip(final_predictions,xg['ECLO']):
    fin.append((a+b)/2)

In [41]:
submission = pd.read_csv('C:/Users/User/Desktop/대구교통사고예측/sample_submission.csv')
submission['ECLO'] = fin
submission.to_csv('C:/Users/User/Desktop/대구교통사고예측/optuna_cat2andxgb2.csv',index=False)
submission

Unnamed: 0,ID,ECLO
0,ACCIDENT_39609,3.604341
1,ACCIDENT_39610,3.334449
2,ACCIDENT_39611,4.830992
3,ACCIDENT_39612,4.010415
4,ACCIDENT_39613,4.466603
...,...,...
10958,ACCIDENT_50567,4.235856
10959,ACCIDENT_50568,4.092851
10960,ACCIDENT_50569,4.180655
10961,ACCIDENT_50570,4.083271
