In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error
import warnings
warnings.filterwarnings('ignore')
import gc

import lightgbm as lgb
from catboost import CatBoostRegressor

In [2]:
Train_data = pd.read_hdf('output/train_tree.h5', 'df')
Test_data = pd.read_hdf('output/test_tree.h5', 'df')

In [3]:
numerical_cols = Train_data.columns
feature_cols = [col for col in numerical_cols if col not in ['price','SaleID']]

In [4]:
X_data = Train_data[feature_cols]
X_test = Test_data[feature_cols]
print(X_data.shape)
print(X_test.shape)

(149999, 83)
(50000, 83)


In [5]:
X_data = np.array(X_data)
X_test = np.array(X_test)
Y_data = np.array(Train_data['price'])

In [6]:
# 自定义损失函数
def myFeval(preds, xgbtrain):
    label = xgbtrain.get_label()
    score = mean_absolute_error(np.expm1(label), np.expm1(preds))
    return 'myFeval', score, False

In [7]:
param = {'boosting_type': 'gbdt',
         'num_leaves': 31,
         'max_depth': -1,
         "lambda_l2": 2,  # 防止过拟合
         'min_data_in_leaf': 20,  # 防止过拟合，好像都不用怎么调
         'objective': 'regression_l1',
         'learning_rate': 0.01,
         "min_child_samples": 20,

         "feature_fraction": 0.8,
         "bagging_freq": 1,
         "bagging_fraction": 0.8,
         "bagging_seed": 11,
         "metric": 'mae',
         }

In [None]:
folds = KFold(n_splits=10, shuffle=True, random_state=2018)
oof_lgb = np.zeros(len(X_data))
predictions_lgb = np.zeros(len(X_test))
predictions_train_lgb = np.zeros(len(X_data))
for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_data, Y_data)):
    print("fold n°{}".format(fold_ + 1))
    trn_data = lgb.Dataset(X_data[trn_idx], Y_data[trn_idx])
    val_data = lgb.Dataset(X_data[val_idx], Y_data[val_idx])

    num_round = 100000000
    clf = lgb.train(param, trn_data, num_round, valid_sets=[trn_data, val_data], verbose_eval=300,
                    early_stopping_rounds=600, feval=myFeval)
    oof_lgb[val_idx] = clf.predict(X_data[val_idx], num_iteration=clf.best_iteration)
    predictions_lgb += clf.predict(X_test, num_iteration=clf.best_iteration) / folds.n_splits
    predictions_train_lgb += clf.predict(X_data, num_iteration=clf.best_iteration) / folds.n_splits

print("lightgbm score: {:<8.8f}".format(mean_absolute_error(np.expm1(oof_lgb), np.expm1(Y_data))))

fold n°1
Training until validation scores don't improve for 600 rounds
[300]	training's l1: 0.20224	training's myFeval: 1215.74	valid_1's l1: 0.206036	valid_1's myFeval: 1232.25
[600]	training's l1: 0.147329	training's myFeval: 770.428	valid_1's l1: 0.150638	valid_1's myFeval: 784.087
[900]	training's l1: 0.134711	training's myFeval: 658.554	valid_1's l1: 0.138508	valid_1's myFeval: 675.938
[1200]	training's l1: 0.128669	training's myFeval: 610.332	valid_1's l1: 0.132912	valid_1's myFeval: 630.629
[1500]	training's l1: 0.124717	training's myFeval: 581.008	valid_1's l1: 0.129267	valid_1's myFeval: 603.304
[1800]	training's l1: 0.122147	training's myFeval: 560.118	valid_1's l1: 0.126907	valid_1's myFeval: 583.947
[2100]	training's l1: 0.120143	training's myFeval: 544.53	valid_1's l1: 0.125083	valid_1's myFeval: 569.67
[2400]	training's l1: 0.118583	training's myFeval: 532.845	valid_1's l1: 0.123667	valid_1's myFeval: 558.887
[2700]	training's l1: 0.117224	training's myFeval: 522.783	vali

[22800]	training's l1: 0.09534	training's myFeval: 387.417	valid_1's l1: 0.110111	valid_1's myFeval: 471.057
[23100]	training's l1: 0.0952217	training's myFeval: 386.739	valid_1's l1: 0.110079	valid_1's myFeval: 470.884
[23400]	training's l1: 0.0950939	training's myFeval: 386.027	valid_1's l1: 0.110044	valid_1's myFeval: 470.621
[23700]	training's l1: 0.0949708	training's myFeval: 385.342	valid_1's l1: 0.110003	valid_1's myFeval: 470.36
[24000]	training's l1: 0.0948498	training's myFeval: 384.6	valid_1's l1: 0.109975	valid_1's myFeval: 470.156
[24300]	training's l1: 0.0947149	training's myFeval: 383.857	valid_1's l1: 0.109942	valid_1's myFeval: 469.942
[24600]	training's l1: 0.0945694	training's myFeval: 383.149	valid_1's l1: 0.109907	valid_1's myFeval: 469.736
[24900]	training's l1: 0.0944639	training's myFeval: 382.467	valid_1's l1: 0.109874	valid_1's myFeval: 469.531
[25200]	training's l1: 0.0943178	training's myFeval: 381.747	valid_1's l1: 0.109838	valid_1's myFeval: 469.342
[25500

[45000]	training's l1: 0.0880365	training's myFeval: 348.297	valid_1's l1: 0.108742	valid_1's myFeval: 462.3
[45300]	training's l1: 0.0879716	training's myFeval: 347.918	valid_1's l1: 0.108733	valid_1's myFeval: 462.207
[45600]	training's l1: 0.0879015	training's myFeval: 347.538	valid_1's l1: 0.108716	valid_1's myFeval: 462.117
[45900]	training's l1: 0.087832	training's myFeval: 347.139	valid_1's l1: 0.108704	valid_1's myFeval: 462.08
[46200]	training's l1: 0.0877539	training's myFeval: 346.788	valid_1's l1: 0.108693	valid_1's myFeval: 462.006
[46500]	training's l1: 0.0876805	training's myFeval: 346.436	valid_1's l1: 0.10868	valid_1's myFeval: 461.912
[46800]	training's l1: 0.0876072	training's myFeval: 346.058	valid_1's l1: 0.10867	valid_1's myFeval: 461.835
[47100]	training's l1: 0.0875185	training's myFeval: 345.65	valid_1's l1: 0.108663	valid_1's myFeval: 461.779
[47400]	training's l1: 0.0874579	training's myFeval: 345.34	valid_1's l1: 0.10866	valid_1's myFeval: 461.736
[47700]	tr

[67500]	training's l1: 0.0835374	training's myFeval: 326.347	valid_1's l1: 0.108216	valid_1's myFeval: 458.804
[67800]	training's l1: 0.0835001	training's myFeval: 326.136	valid_1's l1: 0.108211	valid_1's myFeval: 458.761
[68100]	training's l1: 0.0834615	training's myFeval: 325.906	valid_1's l1: 0.108209	valid_1's myFeval: 458.77
[68400]	training's l1: 0.083422	training's myFeval: 325.72	valid_1's l1: 0.108206	valid_1's myFeval: 458.742
[68700]	training's l1: 0.0833696	training's myFeval: 325.483	valid_1's l1: 0.108201	valid_1's myFeval: 458.711
[69000]	training's l1: 0.0833152	training's myFeval: 325.247	valid_1's l1: 0.10819	valid_1's myFeval: 458.665
[69300]	training's l1: 0.0832697	training's myFeval: 325.032	valid_1's l1: 0.108181	valid_1's myFeval: 458.629
[69600]	training's l1: 0.083226	training's myFeval: 324.799	valid_1's l1: 0.108181	valid_1's myFeval: 458.618
[69900]	training's l1: 0.0831761	training's myFeval: 324.604	valid_1's l1: 0.108176	valid_1's myFeval: 458.603
[70200

[19200]	training's l1: 0.0973532	training's myFeval: 396.208	valid_1's l1: 0.108318	valid_1's myFeval: 468.733
[19500]	training's l1: 0.0972054	training's myFeval: 395.484	valid_1's l1: 0.10828	valid_1's myFeval: 468.568
[19800]	training's l1: 0.0970566	training's myFeval: 394.618	valid_1's l1: 0.108248	valid_1's myFeval: 468.408
[20100]	training's l1: 0.0969063	training's myFeval: 393.795	valid_1's l1: 0.108208	valid_1's myFeval: 468.155
[20400]	training's l1: 0.0967485	training's myFeval: 393.019	valid_1's l1: 0.108168	valid_1's myFeval: 467.984
[20700]	training's l1: 0.0966003	training's myFeval: 392.199	valid_1's l1: 0.108126	valid_1's myFeval: 467.759
[21000]	training's l1: 0.0964602	training's myFeval: 391.429	valid_1's l1: 0.108089	valid_1's myFeval: 467.597
[21300]	training's l1: 0.09632	training's myFeval: 390.698	valid_1's l1: 0.108059	valid_1's myFeval: 467.4
[21600]	training's l1: 0.0961848	training's myFeval: 389.931	valid_1's l1: 0.108018	valid_1's myFeval: 467.198
[21900

[41700]	training's l1: 0.0890572	training's myFeval: 353.943	valid_1's l1: 0.106705	valid_1's myFeval: 459.939
[42000]	training's l1: 0.0889869	training's myFeval: 353.566	valid_1's l1: 0.106697	valid_1's myFeval: 459.869
[42300]	training's l1: 0.0889103	training's myFeval: 353.217	valid_1's l1: 0.106685	valid_1's myFeval: 459.8
[42600]	training's l1: 0.0888287	training's myFeval: 352.831	valid_1's l1: 0.106667	valid_1's myFeval: 459.712
[42900]	training's l1: 0.0887505	training's myFeval: 352.446	valid_1's l1: 0.106657	valid_1's myFeval: 459.671
[43200]	training's l1: 0.088672	training's myFeval: 352.098	valid_1's l1: 0.106652	valid_1's myFeval: 459.631
[43500]	training's l1: 0.0886039	training's myFeval: 351.812	valid_1's l1: 0.106639	valid_1's myFeval: 459.572
[43800]	training's l1: 0.0885291	training's myFeval: 351.503	valid_1's l1: 0.106638	valid_1's myFeval: 459.539
[44100]	training's l1: 0.0884643	training's myFeval: 351.239	valid_1's l1: 0.106632	valid_1's myFeval: 459.514
[444

[600]	training's l1: 0.147166	training's myFeval: 772.464	valid_1's l1: 0.149619	valid_1's myFeval: 765.835
[900]	training's l1: 0.134653	training's myFeval: 660.802	valid_1's l1: 0.137784	valid_1's myFeval: 667.959
[1200]	training's l1: 0.128558	training's myFeval: 611.258	valid_1's l1: 0.132148	valid_1's myFeval: 623.329
[1500]	training's l1: 0.124585	training's myFeval: 581.194	valid_1's l1: 0.128688	valid_1's myFeval: 596.475
[1800]	training's l1: 0.121834	training's myFeval: 559.764	valid_1's l1: 0.126357	valid_1's myFeval: 578.435
[2100]	training's l1: 0.119868	training's myFeval: 544.219	valid_1's l1: 0.124721	valid_1's myFeval: 565.978
[2400]	training's l1: 0.118245	training's myFeval: 531.3	valid_1's l1: 0.123339	valid_1's myFeval: 555.598
[2700]	training's l1: 0.117005	training's myFeval: 521.465	valid_1's l1: 0.122299	valid_1's myFeval: 547.645
[3000]	training's l1: 0.11591	training's myFeval: 513.714	valid_1's l1: 0.121416	valid_1's myFeval: 541.788
[3300]	training's l1: 0.

[23100]	training's l1: 0.0948174	training's myFeval: 383.953	valid_1's l1: 0.110945	valid_1's myFeval: 477.804
[23400]	training's l1: 0.0947059	training's myFeval: 383.209	valid_1's l1: 0.11091	valid_1's myFeval: 477.612
[23700]	training's l1: 0.0945732	training's myFeval: 382.56	valid_1's l1: 0.110881	valid_1's myFeval: 477.435
[24000]	training's l1: 0.0944663	training's myFeval: 381.88	valid_1's l1: 0.11085	valid_1's myFeval: 477.254
[24300]	training's l1: 0.0943535	training's myFeval: 381.305	valid_1's l1: 0.110822	valid_1's myFeval: 477.089
[24600]	training's l1: 0.0942227	training's myFeval: 380.56	valid_1's l1: 0.110792	valid_1's myFeval: 476.947
[24900]	training's l1: 0.0940984	training's myFeval: 379.865	valid_1's l1: 0.110771	valid_1's myFeval: 476.812
[25200]	training's l1: 0.0939617	training's myFeval: 379.101	valid_1's l1: 0.110742	valid_1's myFeval: 476.604
[25500]	training's l1: 0.0938395	training's myFeval: 378.498	valid_1's l1: 0.110716	valid_1's myFeval: 476.467
[25800

[45600]	training's l1: 0.0873326	training's myFeval: 345.944	valid_1's l1: 0.109803	valid_1's myFeval: 471.46
[45900]	training's l1: 0.0872587	training's myFeval: 345.583	valid_1's l1: 0.109798	valid_1's myFeval: 471.46
[46200]	training's l1: 0.0871839	training's myFeval: 345.248	valid_1's l1: 0.109794	valid_1's myFeval: 471.43
[46500]	training's l1: 0.0871136	training's myFeval: 344.912	valid_1's l1: 0.109793	valid_1's myFeval: 471.397
[46800]	training's l1: 0.0870349	training's myFeval: 344.53	valid_1's l1: 0.109794	valid_1's myFeval: 471.406
[47100]	training's l1: 0.0869894	training's myFeval: 344.172	valid_1's l1: 0.109787	valid_1's myFeval: 471.363
[47400]	training's l1: 0.0868961	training's myFeval: 343.663	valid_1's l1: 0.109784	valid_1's myFeval: 471.318
[47700]	training's l1: 0.0868353	training's myFeval: 343.388	valid_1's l1: 0.10978	valid_1's myFeval: 471.273
[48000]	training's l1: 0.0867531	training's myFeval: 343.021	valid_1's l1: 0.109772	valid_1's myFeval: 471.23
[48300]

[12300]	training's l1: 0.101888	training's myFeval: 423.432	valid_1's l1: 0.10969	valid_1's myFeval: 470.698
[12600]	training's l1: 0.101662	training's myFeval: 422.05	valid_1's l1: 0.109604	valid_1's myFeval: 470.16
[12900]	training's l1: 0.101462	training's myFeval: 420.855	valid_1's l1: 0.10953	valid_1's myFeval: 469.618
[13200]	training's l1: 0.101234	training's myFeval: 419.486	valid_1's l1: 0.109436	valid_1's myFeval: 469.091
[13500]	training's l1: 0.100992	training's myFeval: 418.112	valid_1's l1: 0.109349	valid_1's myFeval: 468.511
[13800]	training's l1: 0.100777	training's myFeval: 416.966	valid_1's l1: 0.109264	valid_1's myFeval: 468.014
[14100]	training's l1: 0.100518	training's myFeval: 415.624	valid_1's l1: 0.109182	valid_1's myFeval: 467.549
[14400]	training's l1: 0.100271	training's myFeval: 414.354	valid_1's l1: 0.109099	valid_1's myFeval: 467.074
[14700]	training's l1: 0.100067	training's myFeval: 413.155	valid_1's l1: 0.109024	valid_1's myFeval: 466.586
[15000]	traini

[34800]	training's l1: 0.0909152	training's myFeval: 362.524	valid_1's l1: 0.106783	valid_1's myFeval: 452.132
[35100]	training's l1: 0.0908244	training's myFeval: 362.085	valid_1's l1: 0.10677	valid_1's myFeval: 452.054
[35400]	training's l1: 0.0907077	training's myFeval: 361.565	valid_1's l1: 0.106759	valid_1's myFeval: 451.953
[35700]	training's l1: 0.0905938	training's myFeval: 361.064	valid_1's l1: 0.106741	valid_1's myFeval: 451.839
[36000]	training's l1: 0.0905024	training's myFeval: 360.581	valid_1's l1: 0.10672	valid_1's myFeval: 451.765
[36300]	training's l1: 0.0904256	training's myFeval: 360.197	valid_1's l1: 0.106704	valid_1's myFeval: 451.672
[36600]	training's l1: 0.090331	training's myFeval: 359.786	valid_1's l1: 0.106696	valid_1's myFeval: 451.605
[36900]	training's l1: 0.0902483	training's myFeval: 359.292	valid_1's l1: 0.106681	valid_1's myFeval: 451.493
[37200]	training's l1: 0.0901496	training's myFeval: 358.836	valid_1's l1: 0.106673	valid_1's myFeval: 451.421
[375

[600]	training's l1: 0.147444	training's myFeval: 773.312	valid_1's l1: 0.149646	valid_1's myFeval: 774.555
[900]	training's l1: 0.134893	training's myFeval: 661.384	valid_1's l1: 0.137194	valid_1's myFeval: 661.712
[1200]	training's l1: 0.129004	training's myFeval: 612.758	valid_1's l1: 0.131481	valid_1's myFeval: 614.648
[1500]	training's l1: 0.124919	training's myFeval: 583.252	valid_1's l1: 0.127667	valid_1's myFeval: 586.969
[1800]	training's l1: 0.122159	training's myFeval: 561.624	valid_1's l1: 0.125198	valid_1's myFeval: 567.147
[2100]	training's l1: 0.120051	training's myFeval: 545.407	valid_1's l1: 0.123355	valid_1's myFeval: 552.912
[2400]	training's l1: 0.118462	training's myFeval: 532.789	valid_1's l1: 0.122041	valid_1's myFeval: 542.682
[2700]	training's l1: 0.117185	training's myFeval: 522.566	valid_1's l1: 0.121017	valid_1's myFeval: 534.745
[3000]	training's l1: 0.116039	training's myFeval: 514.384	valid_1's l1: 0.120091	valid_1's myFeval: 528.537
[3300]	training's l1:

[23100]	training's l1: 0.0950952	training's myFeval: 384.787	valid_1's l1: 0.109701	valid_1's myFeval: 468.451
[23400]	training's l1: 0.0949471	training's myFeval: 384.122	valid_1's l1: 0.10967	valid_1's myFeval: 468.309
[23700]	training's l1: 0.0947935	training's myFeval: 383.408	valid_1's l1: 0.109642	valid_1's myFeval: 468.167
[24000]	training's l1: 0.094636	training's myFeval: 382.663	valid_1's l1: 0.109613	valid_1's myFeval: 468.022
[24300]	training's l1: 0.0944995	training's myFeval: 381.872	valid_1's l1: 0.109573	valid_1's myFeval: 467.847
[24600]	training's l1: 0.0943907	training's myFeval: 381.184	valid_1's l1: 0.109549	valid_1's myFeval: 467.707
[24900]	training's l1: 0.0942568	training's myFeval: 380.42	valid_1's l1: 0.109517	valid_1's myFeval: 467.503
[25200]	training's l1: 0.0941157	training's myFeval: 379.766	valid_1's l1: 0.109492	valid_1's myFeval: 467.343
[25500]	training's l1: 0.0939649	training's myFeval: 379.047	valid_1's l1: 0.109457	valid_1's myFeval: 467.152
[258

[45600]	training's l1: 0.0877468	training's myFeval: 347.861	valid_1's l1: 0.108434	valid_1's myFeval: 462.142
[45900]	training's l1: 0.0876784	training's myFeval: 347.51	valid_1's l1: 0.10842	valid_1's myFeval: 462.091
[46200]	training's l1: 0.0876175	training's myFeval: 347.25	valid_1's l1: 0.108411	valid_1's myFeval: 462.038
[46500]	training's l1: 0.0875487	training's myFeval: 346.933	valid_1's l1: 0.108406	valid_1's myFeval: 461.989
[46800]	training's l1: 0.0874837	training's myFeval: 346.638	valid_1's l1: 0.108396	valid_1's myFeval: 461.931
[47100]	training's l1: 0.0874089	training's myFeval: 346.249	valid_1's l1: 0.108382	valid_1's myFeval: 461.879
[47400]	training's l1: 0.0873322	training's myFeval: 345.927	valid_1's l1: 0.108369	valid_1's myFeval: 461.837
[47700]	training's l1: 0.0872561	training's myFeval: 345.588	valid_1's l1: 0.108358	valid_1's myFeval: 461.775
[48000]	training's l1: 0.0871748	training's myFeval: 345.183	valid_1's l1: 0.108352	valid_1's myFeval: 461.722
[483

[68100]	training's l1: 0.0833113	training's myFeval: 326.863	valid_1's l1: 0.107962	valid_1's myFeval: 459.555
[68400]	training's l1: 0.0832429	training's myFeval: 326.552	valid_1's l1: 0.107961	valid_1's myFeval: 459.506
[68700]	training's l1: 0.0831872	training's myFeval: 326.28	valid_1's l1: 0.107953	valid_1's myFeval: 459.448
[69000]	training's l1: 0.0831273	training's myFeval: 326.041	valid_1's l1: 0.107958	valid_1's myFeval: 459.487
Early stopping, best iteration is:
[68690]	training's l1: 0.0831898	training's myFeval: 326.297	valid_1's l1: 0.107952	valid_1's myFeval: 459.448
fold n°6
Training until validation scores don't improve for 600 rounds
[300]	training's l1: 0.20208	training's myFeval: 1215.45	valid_1's l1: 0.202529	valid_1's myFeval: 1220.17
[600]	training's l1: 0.147421	training's myFeval: 769.531	valid_1's l1: 0.149554	valid_1's myFeval: 790.427
[900]	training's l1: 0.135013	training's myFeval: 659.758	valid_1's l1: 0.137199	valid_1's myFeval: 672.628
[1200]	training's

[21000]	training's l1: 0.0963983	training's myFeval: 392.615	valid_1's l1: 0.10918	valid_1's myFeval: 465.537
[21300]	training's l1: 0.0962574	training's myFeval: 391.838	valid_1's l1: 0.109134	valid_1's myFeval: 465.248
[21600]	training's l1: 0.0961277	training's myFeval: 391.05	valid_1's l1: 0.109101	valid_1's myFeval: 465.032
[21900]	training's l1: 0.0959955	training's myFeval: 390.277	valid_1's l1: 0.109058	valid_1's myFeval: 464.743
[22200]	training's l1: 0.0958594	training's myFeval: 389.609	valid_1's l1: 0.109013	valid_1's myFeval: 464.501
[22500]	training's l1: 0.0956767	training's myFeval: 388.715	valid_1's l1: 0.108963	valid_1's myFeval: 464.239
[22800]	training's l1: 0.0955231	training's myFeval: 387.947	valid_1's l1: 0.108924	valid_1's myFeval: 464.035
[23100]	training's l1: 0.0953532	training's myFeval: 387.069	valid_1's l1: 0.108877	valid_1's myFeval: 463.749
[23400]	training's l1: 0.0952113	training's myFeval: 386.343	valid_1's l1: 0.108838	valid_1's myFeval: 463.49
[237

[43500]	training's l1: 0.0884778	training's myFeval: 351.86	valid_1's l1: 0.107473	valid_1's myFeval: 455.214
[43800]	training's l1: 0.0883931	training's myFeval: 351.442	valid_1's l1: 0.107464	valid_1's myFeval: 455.16
[44100]	training's l1: 0.0883248	training's myFeval: 351.072	valid_1's l1: 0.107453	valid_1's myFeval: 455.094
[44400]	training's l1: 0.0882666	training's myFeval: 350.758	valid_1's l1: 0.107437	valid_1's myFeval: 455.023
[44700]	training's l1: 0.0881976	training's myFeval: 350.395	valid_1's l1: 0.107422	valid_1's myFeval: 454.938
[45000]	training's l1: 0.0881329	training's myFeval: 350.047	valid_1's l1: 0.107408	valid_1's myFeval: 454.865
[45300]	training's l1: 0.0880681	training's myFeval: 349.683	valid_1's l1: 0.107405	valid_1's myFeval: 454.81
[45600]	training's l1: 0.0880105	training's myFeval: 349.427	valid_1's l1: 0.107393	valid_1's myFeval: 454.752
[45900]	training's l1: 0.0879552	training's myFeval: 349.12	valid_1's l1: 0.107382	valid_1's myFeval: 454.698
[4620

In [None]:
# 测试集输出
predictions = predictions_lgb
predictions[predictions < 0] = 0
sub = pd.DataFrame()
sub['SaleID'] = TestA_data.SaleID
sub['price'] = predictions
sub.to_csv('submit/lgb_test.csv', index=False)

In [None]:
oof_lgb[oof_lgb < 0] = 0
sub = pd.DataFrame()
sub['SaleID'] = Train_data.SaleID
sub['price'] = oof_lgb
sub.to_csv('submit/lgb_train.csv', index=False)

In [None]:
kfolder = KFold(n_splits=10, shuffle=True, random_state=2018)
oof_cb = np.zeros(len(X_data))
predictions_cb = np.zeros(len(X_test))
predictions_train_cb = np.zeros(len(X_data))
kfold = kfolder.split(X_data, Y_data)
fold_ = 0
for train_index, vali_index in kfold:
    fold_ = fold_ + 1
    print("fold n°{}".format(fold_))
    k_x_train = X_data[train_index]
    k_y_train = Y_data[train_index]
    k_x_vali = X_data[vali_index]
    k_y_vali = Y_data[vali_index]
    cb_params = {
        'n_estimators': 100000000,
        'loss_function': 'MAE',
        'eval_metric': 'MAE',
        'learning_rate': 0.01,
        'depth': 6,
        'use_best_model': True,
        'subsample': 0.6,
        'bootstrap_type': 'Bernoulli',
        'reg_lambda': 3,
        'one_hot_max_size': 2,
    }
    model_cb = CatBoostRegressor(**cb_params)
    # train the model
    model_cb.fit(k_x_train, k_y_train, eval_set=[(k_x_vali, k_y_vali)], verbose=300, early_stopping_rounds=600)
    oof_cb[vali_index] = model_cb.predict(k_x_vali, ntree_end=model_cb.best_iteration_)
    predictions_cb += model_cb.predict(X_test, ntree_end=model_cb.best_iteration_) / kfolder.n_splits
    predictions_train_cb += model_cb.predict(X_data, ntree_end=model_cb.best_iteration_) / kfolder.n_splits

print("catboost score: {:<8.8f}".format(mean_absolute_error(np.expm1(oof_cb), np.expm1(Y_data))))

In [None]:
# 测试集输出
predictions = predictions_cb
predictions[predictions < 0] = 0
sub = pd.DataFrame()
sub['SaleID'] = TestA_data.SaleID
sub['price'] = predictions
sub.to_csv('submit/cab_test.csv', index=False)

In [None]:
# 验证集输出
oof_cb[oof_cb < 0] = 0
sub = pd.DataFrame()
sub['SaleID'] = Train_data.SaleID
sub['price'] = oof_cb
sub.to_csv('submit/cab_train.csv', index=False)