In [1]:
import pandas as pd
import numpy as np
import os
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
matplotlib.rcParams['axes.unicode_minus'] = False
pd.options.mode.chained_assignment = None  # default='warn'
plt.rcParams['font.family'] = 'Malgun Gothic'
font = {'size': 16}
matplotlib.rc('font', **font)
tqdm.tqdm.pandas()
import gc

In [2]:
%%time
training_data_df = pd.read_csv('./Prepped Data/training_data_20220125.csv')
print(training_data_df.shape)
training_data_df.head()

(5068169, 19)
Wall time: 7.52 s


Unnamed: 0,주소_건축년도,건축년도,전용면적(㎡),월세(만원),x좌표,y좌표,계약날짜일수_2006년기준,계약날짜기준_건물연식,계약날짜일수_sin,계약날짜일수_cos,거래유형,층,부동산유형,브랜드,대장구분명,지목,구,동,거래/보증금(만원)
0,서울특별시 강남구 개포동 12 1991,1991,25.35,12,2.066072,4.435861,4070,26,0.811539,0.584298,0,0,0,0,0,0,0,0,2253
1,서울특별시 강남구 개포동 12 1991,1991,25.35,10,2.066072,4.435861,4133,26,0.895839,-0.444378,0,1,0,0,0,0,0,0,730
2,서울특별시 강남구 개포동 12 1991,1991,25.35,4,2.066072,4.435861,4260,26,-0.880012,-0.474951,0,1,0,0,0,0,0,0,1815
3,서울특별시 강남구 개포동 12 1991,1991,25.35,17,2.066072,4.435861,4470,27,0.999769,0.021516,0,2,0,0,0,0,0,0,3865
4,서울특별시 강남구 개포동 12 1991,1991,25.35,7,2.066072,4.435861,4577,27,-0.247022,-0.96901,0,3,0,0,0,0,0,0,3062


In [3]:
basedir = './Training/Training3_20220125/cat_col_maps/'

In [4]:
filenames = [f for f in os.listdir(basedir)]
filenames

['거래유형.xlsx',
 '구.xlsx',
 '대장구분명.xlsx',
 '동.xlsx',
 '부동산유형.xlsx',
 '브랜드.xlsx',
 '지목.xlsx',
 '층.xlsx']

In [5]:
cat_cols = [col.replace('.xlsx', '') for col in filenames]
cat_cols

['거래유형', '구', '대장구분명', '동', '부동산유형', '브랜드', '지목', '층']

# Training - Catboost Regressor

In [6]:
import catboost

In [7]:
from sklearn.model_selection import GroupKFold

In [8]:
gkfold = GroupKFold(n_splits=5)

In [9]:
#count = 0
models_list = []
for i, (train_idx, val_idx) in enumerate(gkfold.split(training_data_df, groups=training_data_df['주소_건축년도'])):
    print('-------------- Fold {} starting ---------------'.format(i))
    
    print(i)
    print(len(train_idx), len(val_idx))
    
    x_train = training_data_df.drop(columns=['주소_건축년도', '거래/보증금(만원)']).loc[train_idx, :]
    y_train = training_data_df['거래/보증금(만원)'].loc[train_idx]

    x_val = training_data_df.drop(columns=['주소_건축년도', '거래/보증금(만원)']).loc[val_idx, :]
    y_val = training_data_df['거래/보증금(만원)'].loc[val_idx]

    train_pool = catboost.Pool(x_train, y_train, cat_features=cat_cols)
    val_pool = catboost.Pool(x_val, y_val, cat_features=cat_cols)

    model = catboost.CatBoostRegressor(
        iterations=100000, depth=6, task_type='CPU', loss_function='Quantile:alpha=0.5'
    )

    model.fit(train_pool, eval_set=val_pool, verbose=50, early_stopping_rounds=100)

    model.save_model('./Training/Training3_20220125/catboost_quantile05_{}'.format(i))
    
    print('-------------- Fold {} ending ---------------'.format(i))
    #count += 1

-------------- Fold 0 starting ---------------
0
4054535 1013634
0:	learn: 9218.6573890	test: 9195.8078392	best: 9195.8078392 (0)	total: 2.1s	remaining: 2d 10h 12m 32s
50:	learn: 5193.4121045	test: 5217.1421965	best: 5217.1421965 (50)	total: 1m 34s	remaining: 2d 3h 35m 35s
100:	learn: 4218.1775110	test: 4249.1074555	best: 4249.1074555 (100)	total: 3m 9s	remaining: 2d 3h 56m 17s
150:	learn: 3777.8405799	test: 3825.9170512	best: 3825.9170512 (150)	total: 4m 45s	remaining: 2d 4h 29m 7s
200:	learn: 3537.8656359	test: 3613.6199359	best: 3613.6199359 (200)	total: 6m 19s	remaining: 2d 4h 19m 1s
250:	learn: 3379.7395807	test: 3470.2809154	best: 3470.2809154 (250)	total: 7m 55s	remaining: 2d 4h 27m 54s
300:	learn: 3269.0045382	test: 3379.7755426	best: 3379.7755426 (300)	total: 9m 30s	remaining: 2d 4h 28m 3s
350:	learn: 3156.4690249	test: 3289.8314357	best: 3289.8314357 (350)	total: 11m 6s	remaining: 2d 4h 34m 1s
400:	learn: 3085.0209809	test: 3237.4257902	best: 3237.4257902 (400)	total: 12m 43s

3700:	learn: 2137.3130786	test: 2610.7142137	best: 2610.7024295 (3699)	total: 1h 57m 4s	remaining: 2d 2h 46m 18s
3750:	learn: 2133.8762944	test: 2608.7271599	best: 2608.7271599 (3750)	total: 1h 58m 38s	remaining: 2d 2h 44m 21s
3800:	learn: 2129.3732893	test: 2606.0444897	best: 2606.0444897 (3800)	total: 2h 15s	remaining: 2d 2h 43m 30s
3850:	learn: 2125.3354873	test: 2604.3579165	best: 2604.3494878 (3848)	total: 2h 1m 48s	remaining: 2d 2h 41m 6s
3900:	learn: 2121.0182918	test: 2602.4920643	best: 2602.4920643 (3900)	total: 2h 3m 21s	remaining: 2d 2h 38m 59s
3950:	learn: 2117.3513537	test: 2600.8467190	best: 2600.7470068 (3947)	total: 2h 4m 58s	remaining: 2d 2h 38m 10s
4000:	learn: 2113.4566695	test: 2599.8423869	best: 2599.8423869 (4000)	total: 2h 6m 32s	remaining: 2d 2h 36m 4s
4050:	learn: 2109.1347064	test: 2597.3997322	best: 2597.3997322 (4050)	total: 2h 8m 5s	remaining: 2d 2h 33m 45s
4100:	learn: 2104.9461598	test: 2595.3736657	best: 2595.3736657 (4100)	total: 2h 9m 38s	remaining: 2d

650:	learn: 2846.0245054	test: 2831.9055933	best: 2831.9055933 (650)	total: 19m 30s	remaining: 2d 1h 36m 40s
700:	learn: 2810.0587587	test: 2804.8586609	best: 2804.8586609 (700)	total: 21m 1s	remaining: 2d 1h 38m 38s
750:	learn: 2775.3199636	test: 2781.8477535	best: 2781.8477535 (750)	total: 22m 33s	remaining: 2d 1h 40m 25s
800:	learn: 2744.8659397	test: 2760.9009590	best: 2760.9009590 (800)	total: 24m 4s	remaining: 2d 1h 40m 33s
850:	learn: 2718.3219409	test: 2742.1540386	best: 2742.1540386 (850)	total: 25m 35s	remaining: 2d 1h 41m 9s
900:	learn: 2696.7375677	test: 2727.0390279	best: 2727.0390279 (900)	total: 27m 6s	remaining: 2d 1h 42m 20s
950:	learn: 2674.5870150	test: 2709.1951420	best: 2709.1951420 (950)	total: 28m 38s	remaining: 2d 1h 42m 30s
1000:	learn: 2655.4659174	test: 2696.6905165	best: 2696.6905165 (1000)	total: 30m 11s	remaining: 2d 1h 46m 20s
1050:	learn: 2635.2186786	test: 2683.2248764	best: 2683.2248764 (1050)	total: 31m 41s	remaining: 2d 1h 44m 6s
1100:	learn: 2615.95

4350:	learn: 2118.1515533	test: 2391.4653084	best: 2391.4408454 (4347)	total: 2h 12m 9s	remaining: 2d 25m 13s
4400:	learn: 2114.5846171	test: 2389.8070301	best: 2389.8070301 (4400)	total: 2h 13m 41s	remaining: 2d 23m 58s
4450:	learn: 2110.4771023	test: 2387.5415522	best: 2387.5415522 (4450)	total: 2h 15m 12s	remaining: 2d 22m 28s
4500:	learn: 2107.1505181	test: 2387.0841380	best: 2387.0841380 (4500)	total: 2h 16m 45s	remaining: 2d 21m 36s
4550:	learn: 2103.5353599	test: 2386.0863186	best: 2386.0447169 (4543)	total: 2h 18m 16s	remaining: 2d 19m 59s
4600:	learn: 2100.2051234	test: 2384.7729774	best: 2384.7729774 (4600)	total: 2h 19m 45s	remaining: 2d 17m 55s
4650:	learn: 2096.5692020	test: 2382.3478826	best: 2382.3191808 (4641)	total: 2h 21m 16s	remaining: 2d 16m 23s
4700:	learn: 2093.3818810	test: 2380.6246450	best: 2380.6246450 (4700)	total: 2h 22m 48s	remaining: 2d 14m 56s
4750:	learn: 2089.9652837	test: 2379.6645965	best: 2379.6120313 (4749)	total: 2h 24m 20s	remaining: 2d 13m 37s
48

3050:	learn: 2220.5991588	test: 2530.8404993	best: 2530.8404993 (3050)	total: 1h 33m 3s	remaining: 2d 1h 16m 52s
3100:	learn: 2215.5937402	test: 2528.0408744	best: 2527.8973046 (3094)	total: 1h 34m 34s	remaining: 2d 1h 15m 14s
3150:	learn: 2209.8771497	test: 2523.9917909	best: 2523.9917909 (3150)	total: 1h 36m 5s	remaining: 2d 1h 13m 36s
3200:	learn: 2205.0264349	test: 2522.1222539	best: 2522.1222539 (3200)	total: 1h 37m 37s	remaining: 2d 1h 12m 4s
3250:	learn: 2200.0049565	test: 2518.9727630	best: 2518.5239206 (3244)	total: 1h 39m 9s	remaining: 2d 1h 10m 59s
3300:	learn: 2194.7731630	test: 2516.3549413	best: 2516.3549413 (3300)	total: 1h 40m 40s	remaining: 2d 1h 9m 13s
3350:	learn: 2189.8461864	test: 2513.4930913	best: 2513.4930913 (3350)	total: 1h 42m 14s	remaining: 2d 1h 8m 48s
3400:	learn: 2185.1078152	test: 2509.8782808	best: 2509.8782808 (3400)	total: 1h 43m 46s	remaining: 2d 1h 7m 30s
3450:	learn: 2179.5770147	test: 2507.3872416	best: 2507.2980510 (3435)	total: 1h 45m 17s	remain

6750:	learn: 1963.8927378	test: 2411.9044463	best: 2411.8942382 (6745)	total: 3h 26m 30s	remaining: 1d 23h 32m 23s
6800:	learn: 1961.7555535	test: 2410.7061531	best: 2410.7061531 (6800)	total: 3h 28m 2s	remaining: 1d 23h 30m 57s
6850:	learn: 1959.9422292	test: 2409.9685712	best: 2409.9685712 (6850)	total: 3h 29m 33s	remaining: 1d 23h 29m 19s
6900:	learn: 1958.0202668	test: 2408.8063972	best: 2408.8063234 (6899)	total: 3h 31m 5s	remaining: 1d 23h 27m 40s
6950:	learn: 1956.1389617	test: 2408.2052013	best: 2408.2052013 (6950)	total: 3h 32m 37s	remaining: 1d 23h 26m 13s
7000:	learn: 1954.1602577	test: 2407.6507797	best: 2407.6507797 (7000)	total: 3h 34m 9s	remaining: 1d 23h 24m 53s
7050:	learn: 1952.1327997	test: 2406.9428918	best: 2406.9063767 (7047)	total: 3h 35m 41s	remaining: 1d 23h 23m 13s
7100:	learn: 1950.2598010	test: 2406.0362347	best: 2406.0224378 (7099)	total: 3h 37m 13s	remaining: 1d 23h 21m 51s
7150:	learn: 1948.2778231	test: 2405.7154316	best: 2405.5311595 (7127)	total: 3h 38

1850:	learn: 2334.1284427	test: 3087.7515938	best: 3087.7515938 (1850)	total: 56m 14s	remaining: 2d 1h 42m 26s
1900:	learn: 2323.8419285	test: 3080.8181656	best: 3080.8181656 (1900)	total: 57m 45s	remaining: 2d 1h 40m 48s
1950:	learn: 2313.5308373	test: 3073.1762430	best: 3073.1762430 (1950)	total: 59m 16s	remaining: 2d 1h 38m 29s
2000:	learn: 2304.7628199	test: 3066.5128982	best: 3066.5128982 (2000)	total: 1h 47s	remaining: 2d 1h 37m 2s
2050:	learn: 2296.3599822	test: 3061.6689643	best: 3061.6502193 (2049)	total: 1h 2m 18s	remaining: 2d 1h 35m 30s
2100:	learn: 2287.6057659	test: 3056.1320982	best: 3056.1320982 (2100)	total: 1h 3m 48s	remaining: 2d 1h 33m 12s
2150:	learn: 2279.9588360	test: 3051.8199093	best: 3051.8199093 (2150)	total: 1h 5m 19s	remaining: 2d 1h 31m 36s
2200:	learn: 2271.9674666	test: 3044.7565332	best: 3044.7565332 (2200)	total: 1h 6m 51s	remaining: 2d 1h 31m 6s
2250:	learn: 2264.5386632	test: 3040.9975166	best: 3040.9975166 (2250)	total: 1h 8m 23s	remaining: 2d 1h 30

5550:	learn: 1993.6720034	test: 2864.4319262	best: 2864.4319262 (5550)	total: 2h 49m 17s	remaining: 2d 26s
5600:	learn: 1991.5779062	test: 2863.1732764	best: 2863.1713222 (5599)	total: 2h 50m 48s	remaining: 1d 23h 58m 48s
5650:	learn: 1989.0808837	test: 2861.3493093	best: 2861.3493093 (5650)	total: 2h 52m 20s	remaining: 1d 23h 57m 26s
5700:	learn: 1986.4971006	test: 2859.4378326	best: 2859.4378326 (5700)	total: 2h 53m 54s	remaining: 1d 23h 56m 27s
5750:	learn: 1983.7637673	test: 2856.5464887	best: 2856.5464887 (5750)	total: 2h 55m 26s	remaining: 1d 23h 55m 10s
5800:	learn: 1981.4365259	test: 2856.0379287	best: 2855.4661855 (5791)	total: 2h 56m 58s	remaining: 1d 23h 53m 55s
5850:	learn: 1978.9572103	test: 2854.6327703	best: 2854.6327703 (5850)	total: 2h 58m 32s	remaining: 1d 23h 52m 52s
5900:	learn: 1976.5467856	test: 2853.1625655	best: 2853.1625655 (5900)	total: 3h 3s	remaining: 1d 23h 51m 15s
5950:	learn: 1973.4714491	test: 2851.0848183	best: 2851.0848183 (5950)	total: 3h 1m 33s	remai

9150:	learn: 1856.3533897	test: 2785.3611816	best: 2785.3586763 (9147)	total: 4h 39m 55s	remaining: 1d 22h 19m
9200:	learn: 1854.7473853	test: 2783.7540302	best: 2783.7540302 (9200)	total: 4h 41m 25s	remaining: 1d 22h 17m 14s
9250:	learn: 1853.1946637	test: 2782.8878724	best: 2782.8878724 (9250)	total: 4h 42m 58s	remaining: 1d 22h 15m 53s
9300:	learn: 1851.6953757	test: 2782.4152115	best: 2782.4152115 (9300)	total: 4h 44m 30s	remaining: 1d 22h 14m 25s
9350:	learn: 1850.3242406	test: 2781.2162578	best: 2781.2162578 (9350)	total: 4h 46m 3s	remaining: 1d 22h 13m
9400:	learn: 1849.0715628	test: 2781.1903028	best: 2781.1903028 (9400)	total: 4h 47m 35s	remaining: 1d 22h 11m 35s
9450:	learn: 1847.5185741	test: 2780.2897933	best: 2780.1035786 (9448)	total: 4h 49m 7s	remaining: 1d 22h 10m 7s
9500:	learn: 1846.0652175	test: 2779.4901672	best: 2779.4901672 (9500)	total: 4h 50m 40s	remaining: 1d 22h 8m 41s
9550:	learn: 1844.5740840	test: 2778.9685571	best: 2778.9685571 (9550)	total: 4h 52m 11s	rem

850:	learn: 2720.9648503	test: 2812.5380197	best: 2812.5380197 (850)	total: 25m 42s	remaining: 2d 1h 55m 21s
900:	learn: 2695.4185490	test: 2795.8432335	best: 2795.8432335 (900)	total: 27m 13s	remaining: 2d 1h 54m 57s
950:	learn: 2671.3223886	test: 2777.9490821	best: 2777.9490821 (950)	total: 28m 44s	remaining: 2d 1h 52m 48s
1000:	learn: 2651.0993617	test: 2763.9090344	best: 2763.9090344 (1000)	total: 30m 17s	remaining: 2d 1h 55m 4s
1050:	learn: 2625.7473226	test: 2747.3132147	best: 2747.3132147 (1050)	total: 31m 48s	remaining: 2d 1h 54m 57s
1100:	learn: 2604.2498117	test: 2732.1017750	best: 2732.1017750 (1100)	total: 33m 20s	remaining: 2d 1h 54m 44s
1150:	learn: 2584.8969260	test: 2717.3583422	best: 2717.3583422 (1150)	total: 34m 53s	remaining: 2d 1h 56m
1200:	learn: 2567.3900685	test: 2708.2430925	best: 2708.2430925 (1200)	total: 36m 24s	remaining: 2d 1h 55m 43s
1250:	learn: 2550.1991277	test: 2697.3994092	best: 2697.3994092 (1250)	total: 37m 55s	remaining: 2d 1h 53m 45s
1300:	learn:

4550:	learn: 2090.1298111	test: 2444.9943184	best: 2444.9101788 (4549)	total: 2h 18m 51s	remaining: 2d 32m 27s
4600:	learn: 2086.0773287	test: 2442.8336158	best: 2442.8336158 (4600)	total: 2h 20m 26s	remaining: 2d 31m 49s
4650:	learn: 2082.2632155	test: 2440.5131375	best: 2440.5131375 (4650)	total: 2h 22m	remaining: 2d 31m 9s
4700:	learn: 2078.5516559	test: 2439.4334634	best: 2439.4334634 (4700)	total: 2h 23m 35s	remaining: 2d 30m 45s
4750:	learn: 2075.3659496	test: 2438.2716843	best: 2438.2716843 (4750)	total: 2h 25m 6s	remaining: 2d 29m 19s
4800:	learn: 2072.8319363	test: 2437.1310952	best: 2437.1310952 (4800)	total: 2h 26m 40s	remaining: 2d 28m 25s
4850:	learn: 2069.7387376	test: 2435.7092480	best: 2435.6783352 (4849)	total: 2h 28m 14s	remaining: 2d 27m 47s
4900:	learn: 2066.3499824	test: 2434.6365921	best: 2434.6274450 (4899)	total: 2h 29m 50s	remaining: 2d 27m 38s
4950:	learn: 2063.5338845	test: 2433.4850651	best: 2433.4811561 (4918)	total: 2h 31m 25s	remaining: 2d 26m 57s
5000:	l

In [None]:
count = 0
models_list = []
for train_idx, val_idx in gkfold.split(training_data_df, groups=training_data_df['주소_건축년도']):
    print('-------------- Fold {} starting ---------------'.format(count))
    
    x_train = training_data_df.drop(columns=['주소_건축년도', '거래/보증금(만원)']).loc[train_idx, :]
    y_train = training_data_df['거래/보증금(만원)'].loc[train_idx]
    
    x_val = training_data_df.drop(columns=['주소_건축년도', '거래/보증금(만원)']).loc[val_idx, :]
    y_val = training_data_df['거래/보증금(만원)'].loc[val_idx]
    
    train_pool = catboost.Pool(x_train, y_train, cat_features=cat_cols)
    val_pool = catboost.Pool(x_val, y_val, cat_features=cat_cols)
    
    model = catboost.CatBoostRegressor(iterations=10000, depth=6, task_type='CPU', loss_function='Quantile:alpha=0.25')
    
    model.fit(train_pool, eval_set=val_pool, verbose=50, early_stopping_rounds=100)
    
    model.save_model('./Training/Training3_20220125/catboost_quantile025_{}'.format(count))
    
    print('-------------- Fold {} ending ---------------'.format(count))
    count += 1

-------------- Fold 0 starting ---------------
0:	learn: 6035.0182536	test: 6070.5539414	best: 6070.5539414 (0)	total: 2.05s	remaining: 5h 41m 24s
50:	learn: 3781.6280362	test: 3809.2600317	best: 3809.2600317 (50)	total: 1m 29s	remaining: 4h 49m 30s
100:	learn: 3209.8269758	test: 3227.0656413	best: 3227.0656413 (100)	total: 2m 57s	remaining: 4h 49m 9s
150:	learn: 2940.7796610	test: 2956.9188939	best: 2956.9188939 (150)	total: 4m 27s	remaining: 4h 50m 55s
200:	learn: 2728.8364070	test: 2755.7973052	best: 2755.7973052 (200)	total: 5m 58s	remaining: 4h 50m 53s
250:	learn: 2600.9805856	test: 2648.7712126	best: 2648.7712126 (250)	total: 7m 35s	remaining: 4h 54m 33s
300:	learn: 2517.9978001	test: 2577.7210617	best: 2577.7210617 (300)	total: 9m 13s	remaining: 4h 57m 19s
350:	learn: 2454.3480268	test: 2521.8877934	best: 2521.8877934 (350)	total: 10m 48s	remaining: 4h 57m 7s
400:	learn: 2402.0698201	test: 2477.3317836	best: 2477.3317836 (400)	total: 12m 25s	remaining: 4h 57m 35s
450:	learn: 235

3800:	learn: 1725.0852593	test: 1997.0631998	best: 1997.0631998 (3800)	total: 1h 59m 44s	remaining: 3h 15m 17s
3850:	learn: 1722.3088514	test: 1995.2244697	best: 1995.2244697 (3850)	total: 2h 1m 15s	remaining: 3h 13m 36s
3900:	learn: 1719.2346190	test: 1993.5248827	best: 1993.5248827 (3900)	total: 2h 2m 45s	remaining: 3h 11m 56s
3950:	learn: 1716.6375580	test: 1992.0244510	best: 1992.0244510 (3950)	total: 2h 4m 15s	remaining: 3h 10m 14s
4000:	learn: 1713.8585632	test: 1990.8379113	best: 1990.8379113 (4000)	total: 2h 5m 45s	remaining: 3h 8m 32s
4050:	learn: 1711.3508646	test: 1989.5277805	best: 1989.5277805 (4050)	total: 2h 7m 15s	remaining: 3h 6m 53s
4100:	learn: 1708.4641160	test: 1987.2575070	best: 1987.2575070 (4100)	total: 2h 8m 45s	remaining: 3h 5m 13s
4150:	learn: 1704.6195953	test: 1985.5162045	best: 1985.5162045 (4150)	total: 2h 10m 15s	remaining: 3h 3m 32s
4200:	learn: 1701.9679568	test: 1983.8270605	best: 1983.7933678 (4199)	total: 2h 11m 46s	remaining: 3h 1m 53s
4250:	learn:

7550:	learn: 1566.0269041	test: 1924.0265188	best: 1923.9512086 (7540)	total: 3h 54m 57s	remaining: 1h 16m 12s
7600:	learn: 1564.5924076	test: 1923.3794094	best: 1923.3723808 (7599)	total: 3h 56m 33s	remaining: 1h 14m 39s
7650:	learn: 1562.9927245	test: 1922.7795422	best: 1922.7795422 (7650)	total: 3h 58m 10s	remaining: 1h 13m 7s
7700:	learn: 1561.5736221	test: 1922.2440265	best: 1922.2440265 (7700)	total: 3h 59m 44s	remaining: 1h 11m 34s
7750:	learn: 1560.2204938	test: 1921.7084639	best: 1921.7084639 (7750)	total: 4h 1m 17s	remaining: 1h 10m
7800:	learn: 1558.6745402	test: 1921.3420278	best: 1921.2276207 (7792)	total: 4h 2m 47s	remaining: 1h 8m 26s
7850:	learn: 1557.4403965	test: 1920.7114880	best: 1920.7114880 (7850)	total: 4h 4m 18s	remaining: 1h 6m 52s
7900:	learn: 1555.8555359	test: 1920.4420323	best: 1920.4229234 (7898)	total: 4h 5m 49s	remaining: 1h 5m 18s
7950:	learn: 1554.3710728	test: 1920.0085543	best: 1919.9767623 (7942)	total: 4h 7m 22s	remaining: 1h 3m 44s
8000:	learn: 15

2250:	learn: 1888.0727556	test: 1976.9985923	best: 1976.9985923 (2250)	total: 1h 9m 52s	remaining: 4h 31s
2300:	learn: 1882.4974576	test: 1973.9499549	best: 1973.9499549 (2300)	total: 1h 11m 24s	remaining: 3h 58m 55s
2350:	learn: 1876.9643477	test: 1970.7139734	best: 1970.7139734 (2350)	total: 1h 12m 56s	remaining: 3h 57m 19s
2400:	learn: 1871.5021856	test: 1967.5916943	best: 1967.5843739 (2399)	total: 1h 14m 27s	remaining: 3h 55m 37s
2450:	learn: 1866.0302734	test: 1963.6977717	best: 1963.6977717 (2450)	total: 1h 15m 57s	remaining: 3h 53m 57s
2500:	learn: 1860.6103993	test: 1960.6464041	best: 1960.6464041 (2500)	total: 1h 17m 28s	remaining: 3h 52m 18s
2550:	learn: 1854.5369401	test: 1956.5347425	best: 1956.5347425 (2550)	total: 1h 18m 59s	remaining: 3h 50m 38s
2600:	learn: 1849.4212800	test: 1954.4912622	best: 1954.4912622 (2600)	total: 1h 20m 31s	remaining: 3h 49m 5s
2650:	learn: 1844.4638439	test: 1952.0277738	best: 1952.0277738 (2650)	total: 1h 22m 3s	remaining: 3h 47m 28s


In [None]:
count = 0
models_list = []
for train_idx, val_idx in gkfold.split(training_data_df, groups=training_data_df['주소_건축년도']):
    print('-------------- Fold {} starting ---------------'.format(count))
    
    x_train = training_data_df.drop(columns=['주소_건축년도', '거래/보증금(만원)']).loc[train_idx, :]
    y_train = training_data_df['거래/보증금(만원)'].loc[train_idx]
    
    x_val = training_data_df.drop(columns=['주소_건축년도', '거래/보증금(만원)']).loc[val_idx, :]
    y_val = training_data_df['거래/보증금(만원)'].loc[val_idx]
    
    train_pool = catboost.Pool(x_train, y_train, cat_features=cat_cols)
    val_pool = catboost.Pool(x_val, y_val, cat_features=cat_cols)
    
    model = catboost.CatBoostRegressor(iterations=10000, depth=6, task_type='CPU', loss_function='Quantile:alpha=0.75')
    
    model.fit(train_pool, eval_set=val_pool, verbose=50, early_stopping_rounds=100)
    
    model.save_model('./Training/Training3_20220125/catboost_quantile075_{}'.format(count))
    
    print('-------------- Fold {} ending ---------------'.format(count))
    count += 1

In [None]:
count = 0
models_list = []
for train_idx, val_idx in gkfold.split(training_data_df, groups=training_data_df['주소_건축년도']):
    print('-------------- Fold {} starting ---------------'.format(count))
    
    x_train = training_data_df.drop(columns=['주소_건축년도', '거래/보증금(만원)']).loc[train_idx, :]
    y_train = training_data_df['거래/보증금(만원)'].loc[train_idx]
    
    x_val = training_data_df.drop(columns=['주소_건축년도', '거래/보증금(만원)']).loc[val_idx, :]
    y_val = training_data_df['거래/보증금(만원)'].loc[val_idx]
    
    train_pool = catboost.Pool(x_train, y_train, cat_features=cat_cols)
    val_pool = catboost.Pool(x_val, y_val, cat_features=cat_cols)
    
    model = catboost.CatBoostRegressor(iterations=10000, depth=6, task_type='CPU', loss_function='Quantile:alpha=0.125')
    
    model.fit(train_pool, eval_set=val_pool, verbose=50, early_stopping_rounds=100)
    
    model.save_model('./Training/Training3_20220125/catboost_quantile0125_{}'.format(count))
    
    print('-------------- Fold {} ending ---------------'.format(count))
    count += 1

In [None]:
count = 0
models_list = []
for train_idx, val_idx in gkfold.split(training_data_df, groups=training_data_df['주소_건축년도']):
    print('-------------- Fold {} starting ---------------'.format(count))
    
    x_train = training_data_df.drop(columns=['주소_건축년도', '거래/보증금(만원)']).loc[train_idx, :]
    y_train = training_data_df['거래/보증금(만원)'].loc[train_idx]
    
    x_val = training_data_df.drop(columns=['주소_건축년도', '거래/보증금(만원)']).loc[val_idx, :]
    y_val = training_data_df['거래/보증금(만원)'].loc[val_idx]
    
    train_pool = catboost.Pool(x_train, y_train, cat_features=cat_cols)
    val_pool = catboost.Pool(x_val, y_val, cat_features=cat_cols)
    
    model = catboost.CatBoostRegressor(iterations=10000, depth=6, task_type='CPU', loss_function='Quantile:alpha=0.875')
    
    model.fit(train_pool, eval_set=val_pool, verbose=50, early_stopping_rounds=100)
    
    model.save_model('./Training/Training3_20220125/catboost_quantile0875_{}'.format(count))
    
    print('-------------- Fold {} ending ---------------'.format(count))
    count += 1