In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os 
import gc
from tqdm import tqdm, tqdm_notebook
from scipy.stats import skew  # for some statistics
from scipy.special import boxcox1p
from scipy.stats import boxcox_normmax
from sklearn.linear_model import ElasticNetCV, LassoCV, RidgeCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import KFold, cross_val_score, StratifiedKFold
from sklearn.metrics import mean_squared_error, f1_score, roc_auc_score
from mlxtend.regressor import StackingCVRegressor
import datetime
import time
import lightgbm as lgb
import xgboost as xgb
from xgboost import XGBRegressor

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore')

pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)
%matplotlib inline

In [2]:
data=pd.read_csv('../mid_data/data_rolling_holiday.csv')

In [13]:
data.head()

Unnamed: 0,adcode,bodyType,id,model,regMonth,regYear,popularity,carCommentVolum,newsReplyVolum,label,province_rank,date_block_num,class_id,total_sale_last1M,total_last12M_sum,total_last12M_mean,sale_last1M,sale_last1M_sum,sale_last2M_sum,sale_last3M_sum,sale_last6M_sum,sale_last12M_sum,sale_last2M_mean,sale_last3M_mean,sale_last6M_mean,sale_last12M_mean,sale_last2M_std,sale_last3M_std,sale_last6M_std,sale_last12M_std,sale_last2M_max,sale_last3M_max,sale_last6M_max,sale_last12M_max,spring_festival
0,310000,SUV,0,3c974920a76ac9c1,1,2016,1479.0,11.0,106.0,292.0,3,1,0,,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,1
1,310000,SUV,0,3c974920a76ac9c1,2,2016,2355.0,6.0,2255.0,134.0,3,2,0,548.416667,0.0,0.0,292.0,292.0,,,,,,,,,,,,,,,,,0
2,310000,SUV,0,3c974920a76ac9c1,3,2016,1794.0,13.0,861.0,323.0,3,3,0,191.833333,0.0,0.0,134.0,134.0,426.0,,,,213.0,,,,111.722871,,,,292.0,,,,-1
3,310000,SUV,0,3c974920a76ac9c1,4,2016,1260.0,16.0,105.0,266.0,3,4,0,412.55,0.0,0.0,323.0,323.0,457.0,749.0,,,228.5,249.666667,,,133.643182,101.362386,,,323.0,323.0,,,-2
4,310000,SUV,0,3c974920a76ac9c1,5,2016,1148.0,28.0,2107.0,271.0,3,5,0,370.583333,0.0,0.0,266.0,266.0,589.0,723.0,,,294.5,241.0,,,40.305087,96.94844,,,323.0,323.0,,,1000


# lightgbm

In [3]:
#cate_feat=['adcode','bodyType', 'model','province_rank','regMonth','regYear','class_id']#,'date_block_num']
cate_feat=['adcode','bodyType', 'model','province_rank','regMonth','regYear','class_id']#,'date_block_num']
#data['province_rank'].astype('int')
#print(1)
for i in cate_feat:
    data[i] = data[i].astype('category')#都转化为类别类型
'''
features=["adcode","bodyType","model","regMonth","regYear","popularity","carCommentVolum","newsReplyVolum",
          "label","province_rank","class_id","total_sale_last1M","total_last12M_sum","total_last12M_mean",
          "sale_last1M","sale_last1M_sum","sale_last2M_sum","sale_last3M_sum","sale_last6M_sum","sale_last12M_sum",
          "sale_last2M_mean","sale_last3M_mean","sale_last6M_mean","sale_last12M_mean","sale_last2M_std",
          "sale_last3M_std","sale_last6M_std","sale_last12M_std","sale_last2M_max","sale_last3M_max",
          "sale_last6M_max","sale_last12M_max","spring_festival","id"]
'''
features=["adcode","bodyType","model","regMonth","regYear","popularity",
          "label","province_rank","spring_festival","id","class_id",
          "total_sale_last1M","total_last12M_sum","total_last12M_mean",
          "sale_last1M","sale_last2M_mean","sale_last3M_mean","sale_last6M_mean","spring_festival",
          "sale_last2M_std","sale_last3M_std","sale_last6M_std","sale_last12M_std"]

In [4]:
X_train = data[data.date_block_num <= 20][features]
Y_train = data[data.date_block_num <= 20]['label']
X_valid = data[(data.date_block_num > 20) & (data.date_block_num <25)][features]
Y_valid = data[(data.date_block_num > 20) & (data.date_block_num <25)]['label']
X_train_set = data[data.date_block_num < 25][features]
Y_train_set = data[data.date_block_num < 25]['label']
X_test = data[data.date_block_num >= 25][features]
X_test=X_test.sort_index(by=['id'],ascending=True)

In [90]:
ts = time.time()

model1 = lgb.LGBMRegressor(objective='regression', metric='rmse',
                    num_leaves=4,learning_rate=0.01, n_estimators=5000,
                    max_bin=400, bagging_fraction=0.75,bagging_freq=5, 
                    bagging_seed=7,feature_fraction=0.2,feature_fraction_seed=7,
                    verbose=2
                    #min_data_in_leaf=2,
                    #min_sum_hessian_in_leaf=11
                    )
model1.fit(X_train, Y_train, eval_metric="rmse", 
          eval_set=[(X_train, Y_train), (X_valid, Y_valid)], 
          categorical_feature=cate_feat,
          verbose=True, early_stopping_rounds = 50)
time.time() - ts

[1]	training's rmse: 745.761	valid_1's rmse: 922.183
Training until validation scores don't improve for 50 rounds.
[2]	training's rmse: 739.426	valid_1's rmse: 914.398
[3]	training's rmse: 737.735	valid_1's rmse: 912.601
[4]	training's rmse: 737.506	valid_1's rmse: 912.216
[5]	training's rmse: 735.629	valid_1's rmse: 910.366
[6]	training's rmse: 729.432	valid_1's rmse: 902.749
[7]	training's rmse: 727.769	valid_1's rmse: 900.991
[8]	training's rmse: 726.375	valid_1's rmse: 899.188
[9]	training's rmse: 723.69	valid_1's rmse: 896.564
[10]	training's rmse: 719.829	valid_1's rmse: 892.66
[11]	training's rmse: 718.577	valid_1's rmse: 891.046
[12]	training's rmse: 718.36	valid_1's rmse: 890.681
[13]	training's rmse: 718.108	valid_1's rmse: 890.324
[14]	training's rmse: 714.337	valid_1's rmse: 886.508
[15]	training's rmse: 714.091	valid_1's rmse: 886.163
[16]	training's rmse: 710.34	valid_1's rmse: 882.312
[17]	training's rmse: 708.621	valid_1's rmse: 880.565
[18]	training's rmse: 708.166	val

[177]	training's rmse: 445.726	valid_1's rmse: 576.971
[178]	training's rmse: 445.319	valid_1's rmse: 576.394
[179]	training's rmse: 444.973	valid_1's rmse: 575.891
[180]	training's rmse: 441.654	valid_1's rmse: 571.538
[181]	training's rmse: 441.552	valid_1's rmse: 571.406
[182]	training's rmse: 439.768	valid_1's rmse: 569.563
[183]	training's rmse: 439.688	valid_1's rmse: 569.416
[184]	training's rmse: 439.245	valid_1's rmse: 568.99
[185]	training's rmse: 438.673	valid_1's rmse: 568.34
[186]	training's rmse: 438.528	valid_1's rmse: 568.104
[187]	training's rmse: 438.193	valid_1's rmse: 567.723
[188]	training's rmse: 437.779	valid_1's rmse: 567.042
[189]	training's rmse: 436.877	valid_1's rmse: 566.151
[190]	training's rmse: 433.5	valid_1's rmse: 561.881
[191]	training's rmse: 432.636	valid_1's rmse: 560.925
[192]	training's rmse: 430.843	valid_1's rmse: 559.061
[193]	training's rmse: 430.738	valid_1's rmse: 558.922
[194]	training's rmse: 427.464	valid_1's rmse: 554.799
[195]	training

[387]	training's rmse: 297.902	valid_1's rmse: 394.709
[388]	training's rmse: 297.826	valid_1's rmse: 394.602
[389]	training's rmse: 297.772	valid_1's rmse: 394.546
[390]	training's rmse: 297.698	valid_1's rmse: 394.442
[391]	training's rmse: 297.646	valid_1's rmse: 394.388
[392]	training's rmse: 297.546	valid_1's rmse: 394.254
[393]	training's rmse: 297.208	valid_1's rmse: 393.808
[394]	training's rmse: 297.165	valid_1's rmse: 393.854
[395]	training's rmse: 297.019	valid_1's rmse: 393.632
[396]	training's rmse: 295.234	valid_1's rmse: 391.138
[397]	training's rmse: 295.181	valid_1's rmse: 391.069
[398]	training's rmse: 294.307	valid_1's rmse: 390.093
[399]	training's rmse: 294.256	valid_1's rmse: 390.025
[400]	training's rmse: 293.397	valid_1's rmse: 389.062
[401]	training's rmse: 293.362	valid_1's rmse: 389.012
[402]	training's rmse: 291.629	valid_1's rmse: 386.618
[403]	training's rmse: 291.578	valid_1's rmse: 386.564
[404]	training's rmse: 290.777	valid_1's rmse: 385.657
[405]	trai

[632]	training's rmse: 226.964	valid_1's rmse: 300.549
[633]	training's rmse: 226.952	valid_1's rmse: 300.516
[634]	training's rmse: 226.08	valid_1's rmse: 299.161
[635]	training's rmse: 226.068	valid_1's rmse: 299.129
[636]	training's rmse: 225.262	valid_1's rmse: 297.95
[637]	training's rmse: 225.189	valid_1's rmse: 297.849
[638]	training's rmse: 225.155	valid_1's rmse: 297.792
[639]	training's rmse: 225.139	valid_1's rmse: 297.787
[640]	training's rmse: 225.106	valid_1's rmse: 297.731
[641]	training's rmse: 225.092	valid_1's rmse: 297.703
[642]	training's rmse: 224.246	valid_1's rmse: 296.469
[643]	training's rmse: 224.23	valid_1's rmse: 296.464
[644]	training's rmse: 223.397	valid_1's rmse: 295.247
[645]	training's rmse: 223.382	valid_1's rmse: 295.243
[646]	training's rmse: 222.569	valid_1's rmse: 294.049
[647]	training's rmse: 222.554	valid_1's rmse: 294.02
[648]	training's rmse: 221.753	valid_1's rmse: 292.774
[649]	training's rmse: 221.684	valid_1's rmse: 292.687
[650]	training

[876]	training's rmse: 189.784	valid_1's rmse: 250.028
[877]	training's rmse: 189.758	valid_1's rmse: 249.991
[878]	training's rmse: 189.733	valid_1's rmse: 249.973
[879]	training's rmse: 189.699	valid_1's rmse: 249.927
[880]	training's rmse: 189.502	valid_1's rmse: 249.739
[881]	training's rmse: 189.474	valid_1's rmse: 249.702
[882]	training's rmse: 189.282	valid_1's rmse: 249.512
[883]	training's rmse: 189.269	valid_1's rmse: 249.507
[884]	training's rmse: 188.827	valid_1's rmse: 248.756
[885]	training's rmse: 188.813	valid_1's rmse: 248.751
[886]	training's rmse: 188.793	valid_1's rmse: 248.764
[887]	training's rmse: 188.786	valid_1's rmse: 248.767
[888]	training's rmse: 188.579	valid_1's rmse: 248.516
[889]	training's rmse: 188.568	valid_1's rmse: 248.493
[890]	training's rmse: 188.553	valid_1's rmse: 248.51
[891]	training's rmse: 188.548	valid_1's rmse: 248.505
[892]	training's rmse: 188.528	valid_1's rmse: 248.493
[893]	training's rmse: 188.502	valid_1's rmse: 248.463
[894]	train

[1053]	training's rmse: 174.091	valid_1's rmse: 227.634
[1054]	training's rmse: 173.764	valid_1's rmse: 227.038
[1055]	training's rmse: 173.747	valid_1's rmse: 227.028
[1056]	training's rmse: 173.399	valid_1's rmse: 226.409
[1057]	training's rmse: 173.371	valid_1's rmse: 226.351
[1058]	training's rmse: 173.359	valid_1's rmse: 226.351
[1059]	training's rmse: 173.353	valid_1's rmse: 226.346
[1060]	training's rmse: 173.335	valid_1's rmse: 226.328
[1061]	training's rmse: 173.329	valid_1's rmse: 226.336
[1062]	training's rmse: 173.183	valid_1's rmse: 226.199
[1063]	training's rmse: 173.179	valid_1's rmse: 226.195
[1064]	training's rmse: 173.165	valid_1's rmse: 226.193
[1065]	training's rmse: 173.147	valid_1's rmse: 226.183
[1066]	training's rmse: 173.133	valid_1's rmse: 226.183
[1067]	training's rmse: 173.123	valid_1's rmse: 226.127
[1068]	training's rmse: 173.11	valid_1's rmse: 226.127
[1069]	training's rmse: 173.1	valid_1's rmse: 226.071
[1070]	training's rmse: 173.087	valid_1's rmse: 226

[1284]	training's rmse: 158.719	valid_1's rmse: 205.232
[1285]	training's rmse: 158.708	valid_1's rmse: 205.228
[1286]	training's rmse: 158.497	valid_1's rmse: 204.786
[1287]	training's rmse: 158.49	valid_1's rmse: 204.789
[1288]	training's rmse: 158.471	valid_1's rmse: 204.773
[1289]	training's rmse: 158.468	valid_1's rmse: 204.774
[1290]	training's rmse: 158.461	valid_1's rmse: 204.775
[1291]	training's rmse: 158.455	valid_1's rmse: 204.778
[1292]	training's rmse: 158.447	valid_1's rmse: 204.777
[1293]	training's rmse: 158.444	valid_1's rmse: 204.78
[1294]	training's rmse: 158.249	valid_1's rmse: 204.403
[1295]	training's rmse: 158.231	valid_1's rmse: 204.404
[1296]	training's rmse: 158.225	valid_1's rmse: 204.402
[1297]	training's rmse: 158.186	valid_1's rmse: 204.38
[1298]	training's rmse: 158.163	valid_1's rmse: 204.369
[1299]	training's rmse: 158.157	valid_1's rmse: 204.365
[1300]	training's rmse: 158.052	valid_1's rmse: 204.255
[1301]	training's rmse: 158.038	valid_1's rmse: 204

[1523]	training's rmse: 148.095	valid_1's rmse: 189.829
[1524]	training's rmse: 147.947	valid_1's rmse: 189.525
[1525]	training's rmse: 147.937	valid_1's rmse: 189.535
[1526]	training's rmse: 147.797	valid_1's rmse: 189.249
[1527]	training's rmse: 147.794	valid_1's rmse: 189.252
[1528]	training's rmse: 147.781	valid_1's rmse: 189.244
[1529]	training's rmse: 147.778	valid_1's rmse: 189.247
[1530]	training's rmse: 147.774	valid_1's rmse: 189.255
[1531]	training's rmse: 147.764	valid_1's rmse: 189.243
[1532]	training's rmse: 147.681	valid_1's rmse: 189.152
[1533]	training's rmse: 147.673	valid_1's rmse: 189.145
[1534]	training's rmse: 147.59	valid_1's rmse: 189.052
[1535]	training's rmse: 147.573	valid_1's rmse: 189.059
[1536]	training's rmse: 147.433	valid_1's rmse: 188.761
[1537]	training's rmse: 147.425	valid_1's rmse: 188.765
[1538]	training's rmse: 147.404	valid_1's rmse: 188.753
[1539]	training's rmse: 147.394	valid_1's rmse: 188.746
[1540]	training's rmse: 147.317	valid_1's rmse: 1

[1724]	training's rmse: 140.02	valid_1's rmse: 177.952
[1725]	training's rmse: 140.013	valid_1's rmse: 177.958
[1726]	training's rmse: 139.875	valid_1's rmse: 177.674
[1727]	training's rmse: 139.87	valid_1's rmse: 177.677
[1728]	training's rmse: 139.853	valid_1's rmse: 177.674
[1729]	training's rmse: 139.849	valid_1's rmse: 177.677
[1730]	training's rmse: 139.83	valid_1's rmse: 177.671
[1731]	training's rmse: 139.821	valid_1's rmse: 177.682
[1732]	training's rmse: 139.702	valid_1's rmse: 177.418
[1733]	training's rmse: 139.693	valid_1's rmse: 177.428
[1734]	training's rmse: 139.682	valid_1's rmse: 177.426
[1735]	training's rmse: 139.674	valid_1's rmse: 177.417
[1736]	training's rmse: 139.657	valid_1's rmse: 177.406
[1737]	training's rmse: 139.649	valid_1's rmse: 177.399
[1738]	training's rmse: 139.637	valid_1's rmse: 177.396
[1739]	training's rmse: 139.635	valid_1's rmse: 177.398
[1740]	training's rmse: 139.569	valid_1's rmse: 177.317
[1741]	training's rmse: 139.562	valid_1's rmse: 177

[1976]	training's rmse: 131.903	valid_1's rmse: 165.735
[1977]	training's rmse: 131.9	valid_1's rmse: 165.734
[1978]	training's rmse: 131.796	valid_1's rmse: 165.504
[1979]	training's rmse: 131.788	valid_1's rmse: 165.517
[1980]	training's rmse: 131.687	valid_1's rmse: 165.291
[1981]	training's rmse: 131.669	valid_1's rmse: 165.277
[1982]	training's rmse: 131.541	valid_1's rmse: 164.959
[1983]	training's rmse: 131.535	valid_1's rmse: 164.961
[1984]	training's rmse: 131.438	valid_1's rmse: 164.786
[1985]	training's rmse: 131.424	valid_1's rmse: 164.793
[1986]	training's rmse: 131.344	valid_1's rmse: 164.619
[1987]	training's rmse: 131.337	valid_1's rmse: 164.61
[1988]	training's rmse: 131.258	valid_1's rmse: 164.395
[1989]	training's rmse: 131.246	valid_1's rmse: 164.404
[1990]	training's rmse: 131.237	valid_1's rmse: 164.402
[1991]	training's rmse: 131.229	valid_1's rmse: 164.415
[1992]	training's rmse: 131.127	valid_1's rmse: 164.202
[1993]	training's rmse: 131.124	valid_1's rmse: 164

[2165]	training's rmse: 126.58	valid_1's rmse: 157.831
[2166]	training's rmse: 126.577	valid_1's rmse: 157.857
[2167]	training's rmse: 126.573	valid_1's rmse: 157.857
[2168]	training's rmse: 126.474	valid_1's rmse: 157.603
[2169]	training's rmse: 126.47	valid_1's rmse: 157.603
[2170]	training's rmse: 126.467	valid_1's rmse: 157.629
[2171]	training's rmse: 126.459	valid_1's rmse: 157.621
[2172]	training's rmse: 126.451	valid_1's rmse: 157.622
[2173]	training's rmse: 126.436	valid_1's rmse: 157.631
[2174]	training's rmse: 126.427	valid_1's rmse: 157.634
[2175]	training's rmse: 126.421	valid_1's rmse: 157.637
[2176]	training's rmse: 126.347	valid_1's rmse: 157.47
[2177]	training's rmse: 126.342	valid_1's rmse: 157.473
[2178]	training's rmse: 126.331	valid_1's rmse: 157.474
[2179]	training's rmse: 126.325	valid_1's rmse: 157.469
[2180]	training's rmse: 126.288	valid_1's rmse: 157.434
[2181]	training's rmse: 126.281	valid_1's rmse: 157.448
[2182]	training's rmse: 126.273	valid_1's rmse: 157

[2409]	training's rmse: 121.319	valid_1's rmse: 150.173
[2410]	training's rmse: 121.286	valid_1's rmse: 150.131
[2411]	training's rmse: 121.28	valid_1's rmse: 150.122
[2412]	training's rmse: 121.211	valid_1's rmse: 150
[2413]	training's rmse: 121.196	valid_1's rmse: 149.991
[2414]	training's rmse: 121.134	valid_1's rmse: 149.824
[2415]	training's rmse: 121.12	valid_1's rmse: 149.815
[2416]	training's rmse: 121.06	valid_1's rmse: 149.677
[2417]	training's rmse: 121.055	valid_1's rmse: 149.677
[2418]	training's rmse: 120.996	valid_1's rmse: 149.536
[2419]	training's rmse: 120.991	valid_1's rmse: 149.528
[2420]	training's rmse: 120.958	valid_1's rmse: 149.491
[2421]	training's rmse: 120.956	valid_1's rmse: 149.493
[2422]	training's rmse: 120.948	valid_1's rmse: 149.49
[2423]	training's rmse: 120.942	valid_1's rmse: 149.5
[2424]	training's rmse: 120.909	valid_1's rmse: 149.461
[2425]	training's rmse: 120.903	valid_1's rmse: 149.456
[2426]	training's rmse: 120.869	valid_1's rmse: 149.418
[2

[2653]	training's rmse: 116.974	valid_1's rmse: 144.057
[2654]	training's rmse: 116.965	valid_1's rmse: 144.058
[2655]	training's rmse: 116.962	valid_1's rmse: 144.059
[2656]	training's rmse: 116.96	valid_1's rmse: 144.07
[2657]	training's rmse: 116.957	valid_1's rmse: 144.073
[2658]	training's rmse: 116.908	valid_1's rmse: 143.951
[2659]	training's rmse: 116.903	valid_1's rmse: 143.953
[2660]	training's rmse: 116.894	valid_1's rmse: 143.947
[2661]	training's rmse: 116.886	valid_1's rmse: 143.955
[2662]	training's rmse: 116.883	valid_1's rmse: 143.986
[2663]	training's rmse: 116.878	valid_1's rmse: 143.989
[2664]	training's rmse: 116.852	valid_1's rmse: 143.972
[2665]	training's rmse: 116.849	valid_1's rmse: 143.975
[2666]	training's rmse: 116.819	valid_1's rmse: 143.938
[2667]	training's rmse: 116.812	valid_1's rmse: 143.947
[2668]	training's rmse: 116.764	valid_1's rmse: 143.828
[2669]	training's rmse: 116.756	valid_1's rmse: 143.837
[2670]	training's rmse: 116.753	valid_1's rmse: 14

[2844]	training's rmse: 114.148	valid_1's rmse: 140.17
[2845]	training's rmse: 114.146	valid_1's rmse: 140.172
[2846]	training's rmse: 114.136	valid_1's rmse: 140.179
[2847]	training's rmse: 114.13	valid_1's rmse: 140.181
[2848]	training's rmse: 114.126	valid_1's rmse: 140.18
[2849]	training's rmse: 114.121	valid_1's rmse: 140.185
[2850]	training's rmse: 114.071	valid_1's rmse: 140.066
[2851]	training's rmse: 114.068	valid_1's rmse: 140.065
[2852]	training's rmse: 114.02	valid_1's rmse: 139.931
[2853]	training's rmse: 114.017	valid_1's rmse: 139.931
[2854]	training's rmse: 114.015	valid_1's rmse: 139.96
[2855]	training's rmse: 114.01	valid_1's rmse: 139.971
[2856]	training's rmse: 113.969	valid_1's rmse: 139.876
[2857]	training's rmse: 113.961	valid_1's rmse: 139.886
[2858]	training's rmse: 113.938	valid_1's rmse: 139.875
[2859]	training's rmse: 113.933	valid_1's rmse: 139.887
[2860]	training's rmse: 113.911	valid_1's rmse: 139.875
[2861]	training's rmse: 113.905	valid_1's rmse: 139.89

[3084]	training's rmse: 110.627	valid_1's rmse: 134.919
[3085]	training's rmse: 110.621	valid_1's rmse: 134.927
[3086]	training's rmse: 110.619	valid_1's rmse: 134.928
[3087]	training's rmse: 110.615	valid_1's rmse: 134.923
[3088]	training's rmse: 110.61	valid_1's rmse: 134.915
[3089]	training's rmse: 110.606	valid_1's rmse: 134.922
[3090]	training's rmse: 110.565	valid_1's rmse: 134.812
[3091]	training's rmse: 110.561	valid_1's rmse: 134.806
[3092]	training's rmse: 110.539	valid_1's rmse: 134.796
[3093]	training's rmse: 110.533	valid_1's rmse: 134.813
[3094]	training's rmse: 110.527	valid_1's rmse: 134.811
[3095]	training's rmse: 110.522	valid_1's rmse: 134.805
[3096]	training's rmse: 110.52	valid_1's rmse: 134.819
[3097]	training's rmse: 110.513	valid_1's rmse: 134.824
[3098]	training's rmse: 110.507	valid_1's rmse: 134.817
[3099]	training's rmse: 110.505	valid_1's rmse: 134.82
[3100]	training's rmse: 110.502	valid_1's rmse: 134.819
[3101]	training's rmse: 110.498	valid_1's rmse: 134

[3299]	training's rmse: 108.328	valid_1's rmse: 132.173
[3300]	training's rmse: 108.326	valid_1's rmse: 132.168
[3301]	training's rmse: 108.323	valid_1's rmse: 132.177
[3302]	training's rmse: 108.276	valid_1's rmse: 132.047
[3303]	training's rmse: 108.273	valid_1's rmse: 132.043
[3304]	training's rmse: 108.271	valid_1's rmse: 132.05
[3305]	training's rmse: 108.267	valid_1's rmse: 132.051
[3306]	training's rmse: 108.215	valid_1's rmse: 131.939
[3307]	training's rmse: 108.213	valid_1's rmse: 131.936
[3308]	training's rmse: 108.162	valid_1's rmse: 131.826
[3309]	training's rmse: 108.155	valid_1's rmse: 131.818
[3310]	training's rmse: 108.118	valid_1's rmse: 131.733
[3311]	training's rmse: 108.116	valid_1's rmse: 131.729
[3312]	training's rmse: 108.111	valid_1's rmse: 131.725
[3313]	training's rmse: 108.108	valid_1's rmse: 131.729
[3314]	training's rmse: 108.106	valid_1's rmse: 131.733
[3315]	training's rmse: 108.104	valid_1's rmse: 131.728
[3316]	training's rmse: 108.085	valid_1's rmse: 1

[3541]	training's rmse: 105.527	valid_1's rmse: 128.174
[3542]	training's rmse: 105.478	valid_1's rmse: 128.112
[3543]	training's rmse: 105.473	valid_1's rmse: 128.113
[3544]	training's rmse: 105.471	valid_1's rmse: 128.123
[3545]	training's rmse: 105.469	valid_1's rmse: 128.122
[3546]	training's rmse: 105.467	valid_1's rmse: 128.128
[3547]	training's rmse: 105.465	valid_1's rmse: 128.127
[3548]	training's rmse: 105.435	valid_1's rmse: 128.054
[3549]	training's rmse: 105.433	valid_1's rmse: 128.05
[3550]	training's rmse: 105.431	valid_1's rmse: 128.056
[3551]	training's rmse: 105.428	valid_1's rmse: 128.061
[3552]	training's rmse: 105.423	valid_1's rmse: 128.055
[3553]	training's rmse: 105.421	valid_1's rmse: 128.05
[3554]	training's rmse: 105.402	valid_1's rmse: 128.024
[3555]	training's rmse: 105.395	valid_1's rmse: 128.021
[3556]	training's rmse: 105.352	valid_1's rmse: 127.935
[3557]	training's rmse: 105.35	valid_1's rmse: 127.932
[3558]	training's rmse: 105.348	valid_1's rmse: 127

[3796]	training's rmse: 103.072	valid_1's rmse: 124.586
[3797]	training's rmse: 103.07	valid_1's rmse: 124.585
[3798]	training's rmse: 103.055	valid_1's rmse: 124.578
[3799]	training's rmse: 103.053	valid_1's rmse: 124.577
[3800]	training's rmse: 103.048	valid_1's rmse: 124.581
[3801]	training's rmse: 103.043	valid_1's rmse: 124.591
[3802]	training's rmse: 102.996	valid_1's rmse: 124.539
[3803]	training's rmse: 102.993	valid_1's rmse: 124.54
[3804]	training's rmse: 102.992	valid_1's rmse: 124.552
[3805]	training's rmse: 102.987	valid_1's rmse: 124.561
[3806]	training's rmse: 102.981	valid_1's rmse: 124.561
[3807]	training's rmse: 102.979	valid_1's rmse: 124.564
[3808]	training's rmse: 102.948	valid_1's rmse: 124.479
[3809]	training's rmse: 102.946	valid_1's rmse: 124.478
[3810]	training's rmse: 102.932	valid_1's rmse: 124.453
[3811]	training's rmse: 102.93	valid_1's rmse: 124.455
[3812]	training's rmse: 102.926	valid_1's rmse: 124.456
[3813]	training's rmse: 102.92	valid_1's rmse: 124.

[3990]	training's rmse: 101.458	valid_1's rmse: 122.668
[3991]	training's rmse: 101.455	valid_1's rmse: 122.675
[3992]	training's rmse: 101.453	valid_1's rmse: 122.685
[3993]	training's rmse: 101.451	valid_1's rmse: 122.694
[3994]	training's rmse: 101.447	valid_1's rmse: 122.693
[3995]	training's rmse: 101.445	valid_1's rmse: 122.688
[3996]	training's rmse: 101.443	valid_1's rmse: 122.691
[3997]	training's rmse: 101.438	valid_1's rmse: 122.705
[3998]	training's rmse: 101.437	valid_1's rmse: 122.716
[3999]	training's rmse: 101.434	valid_1's rmse: 122.712
[4000]	training's rmse: 101.42	valid_1's rmse: 122.699
[4001]	training's rmse: 101.419	valid_1's rmse: 122.698
[4002]	training's rmse: 101.385	valid_1's rmse: 122.611
[4003]	training's rmse: 101.375	valid_1's rmse: 122.598
[4004]	training's rmse: 101.345	valid_1's rmse: 122.526
[4005]	training's rmse: 101.335	valid_1's rmse: 122.513
[4006]	training's rmse: 101.331	valid_1's rmse: 122.516
[4007]	training's rmse: 101.329	valid_1's rmse: 1

[4181]	training's rmse: 99.8296	valid_1's rmse: 120.477
[4182]	training's rmse: 99.8186	valid_1's rmse: 120.465
[4183]	training's rmse: 99.817	valid_1's rmse: 120.465
[4184]	training's rmse: 99.8119	valid_1's rmse: 120.463
[4185]	training's rmse: 99.8103	valid_1's rmse: 120.466
[4186]	training's rmse: 99.7848	valid_1's rmse: 120.428
[4187]	training's rmse: 99.782	valid_1's rmse: 120.437
[4188]	training's rmse: 99.771	valid_1's rmse: 120.437
[4189]	training's rmse: 99.7661	valid_1's rmse: 120.422
[4190]	training's rmse: 99.7294	valid_1's rmse: 120.369
[4191]	training's rmse: 99.7275	valid_1's rmse: 120.368
[4192]	training's rmse: 99.7256	valid_1's rmse: 120.367
[4193]	training's rmse: 99.7239	valid_1's rmse: 120.363
[4194]	training's rmse: 99.6983	valid_1's rmse: 120.303
[4195]	training's rmse: 99.6979	valid_1's rmse: 120.319
[4196]	training's rmse: 99.6673	valid_1's rmse: 120.256
[4197]	training's rmse: 99.666	valid_1's rmse: 120.252
[4198]	training's rmse: 99.6641	valid_1's rmse: 120.

[4429]	training's rmse: 97.9296	valid_1's rmse: 117.91
[4430]	training's rmse: 97.9012	valid_1's rmse: 117.832
[4431]	training's rmse: 97.8995	valid_1's rmse: 117.831
[4432]	training's rmse: 97.8781	valid_1's rmse: 117.76
[4433]	training's rmse: 97.8763	valid_1's rmse: 117.755
[4434]	training's rmse: 97.8553	valid_1's rmse: 117.685
[4435]	training's rmse: 97.8536	valid_1's rmse: 117.684
[4436]	training's rmse: 97.8425	valid_1's rmse: 117.675
[4437]	training's rmse: 97.8408	valid_1's rmse: 117.673
[4438]	training's rmse: 97.8129	valid_1's rmse: 117.611
[4439]	training's rmse: 97.8103	valid_1's rmse: 117.614
[4440]	training's rmse: 97.7997	valid_1's rmse: 117.605
[4441]	training's rmse: 97.7986	valid_1's rmse: 117.599
[4442]	training's rmse: 97.7961	valid_1's rmse: 117.596
[4443]	training's rmse: 97.7931	valid_1's rmse: 117.601
[4444]	training's rmse: 97.7891	valid_1's rmse: 117.595
[4445]	training's rmse: 97.7864	valid_1's rmse: 117.604
[4446]	training's rmse: 97.7509	valid_1's rmse: 11

[4578]	training's rmse: 96.7985	valid_1's rmse: 116.193
[4579]	training's rmse: 96.7946	valid_1's rmse: 116.198
[4580]	training's rmse: 96.7931	valid_1's rmse: 116.191
[4581]	training's rmse: 96.7903	valid_1's rmse: 116.196
[4582]	training's rmse: 96.7712	valid_1's rmse: 116.148
[4583]	training's rmse: 96.7684	valid_1's rmse: 116.154
[4584]	training's rmse: 96.7668	valid_1's rmse: 116.154
[4585]	training's rmse: 96.7641	valid_1's rmse: 116.159
[4586]	training's rmse: 96.7454	valid_1's rmse: 116.12
[4587]	training's rmse: 96.7426	valid_1's rmse: 116.12
[4588]	training's rmse: 96.7215	valid_1's rmse: 116.077
[4589]	training's rmse: 96.7187	valid_1's rmse: 116.078
[4590]	training's rmse: 96.701	valid_1's rmse: 116.033
[4591]	training's rmse: 96.698	valid_1's rmse: 116.046
[4592]	training's rmse: 96.6837	valid_1's rmse: 116.032
[4593]	training's rmse: 96.6794	valid_1's rmse: 116.045
[4594]	training's rmse: 96.6654	valid_1's rmse: 116.032
[4595]	training's rmse: 96.6648	valid_1's rmse: 116.

[4731]	training's rmse: 95.6978	valid_1's rmse: 114.852
[4732]	training's rmse: 95.6961	valid_1's rmse: 114.865
[4733]	training's rmse: 95.6947	valid_1's rmse: 114.867
[4734]	training's rmse: 95.6812	valid_1's rmse: 114.85
[4735]	training's rmse: 95.6789	valid_1's rmse: 114.859
[4736]	training's rmse: 95.6687	valid_1's rmse: 114.856
[4737]	training's rmse: 95.6659	valid_1's rmse: 114.863
[4738]	training's rmse: 95.6623	valid_1's rmse: 114.865
[4739]	training's rmse: 95.6611	valid_1's rmse: 114.863
[4740]	training's rmse: 95.6593	valid_1's rmse: 114.855
[4741]	training's rmse: 95.6574	valid_1's rmse: 114.857
[4742]	training's rmse: 95.631	valid_1's rmse: 114.8
[4743]	training's rmse: 95.6299	valid_1's rmse: 114.801
[4744]	training's rmse: 95.6197	valid_1's rmse: 114.787
[4745]	training's rmse: 95.6176	valid_1's rmse: 114.788
[4746]	training's rmse: 95.6156	valid_1's rmse: 114.78
[4747]	training's rmse: 95.6125	valid_1's rmse: 114.788
[4748]	training's rmse: 95.6105	valid_1's rmse: 114.7

[4909]	training's rmse: 94.7139	valid_1's rmse: 113.798
[4910]	training's rmse: 94.6905	valid_1's rmse: 113.719
[4911]	training's rmse: 94.6897	valid_1's rmse: 113.72
[4912]	training's rmse: 94.6801	valid_1's rmse: 113.704
[4913]	training's rmse: 94.6793	valid_1's rmse: 113.704
[4914]	training's rmse: 94.6778	valid_1's rmse: 113.703
[4915]	training's rmse: 94.6765	valid_1's rmse: 113.701
[4916]	training's rmse: 94.6512	valid_1's rmse: 113.635
[4917]	training's rmse: 94.6504	valid_1's rmse: 113.638
[4918]	training's rmse: 94.6492	valid_1's rmse: 113.645
[4919]	training's rmse: 94.6484	valid_1's rmse: 113.648
[4920]	training's rmse: 94.628	valid_1's rmse: 113.593
[4921]	training's rmse: 94.6227	valid_1's rmse: 113.576
[4922]	training's rmse: 94.6217	valid_1's rmse: 113.572
[4923]	training's rmse: 94.6197	valid_1's rmse: 113.576
[4924]	training's rmse: 94.617	valid_1's rmse: 113.574
[4925]	training's rmse: 94.6157	valid_1's rmse: 113.573
[4926]	training's rmse: 94.6141	valid_1's rmse: 113

5.58222508430481

In [42]:
model2 = lgb.LGBMRegressor(
    num_leaves=32, reg_alpha=0., reg_lambda=0.01, objective='mse', metric='rmse',
    max_depth=-1, learning_rate=0.05, min_child_samples=20,
    n_estimators=1000, subsample=0.7, colsample_bytree=0.7, subsample_freq=1,
)#定义LightGBM模型
model2.fit(X_train, Y_train, eval_set=[(X_valid, Y_valid)], early_stopping_rounds=100,
        eval_metric='rmse',
        # callbacks=[lgb.reset_parameter(learning_rate=lambda iter: max(0.005, 0.5 * (0.99 ** iter)))],
        categorical_feature=cate_feat,
        #sample_weight=data.loc[train_idx]['sample_weight'],
        verbose=2)

Training until validation scores don't improve for 100 rounds.
[2]	valid_0's rmse: 838.366
[4]	valid_0's rmse: 766.509
[6]	valid_0's rmse: 695.905
[8]	valid_0's rmse: 632.153
[10]	valid_0's rmse: 590.124
[12]	valid_0's rmse: 541.051
[14]	valid_0's rmse: 498.522
[16]	valid_0's rmse: 461.088
[18]	valid_0's rmse: 422.894
[20]	valid_0's rmse: 389.714
[22]	valid_0's rmse: 359.925
[24]	valid_0's rmse: 333.634
[26]	valid_0's rmse: 306.953
[28]	valid_0's rmse: 290.363
[30]	valid_0's rmse: 272.948
[32]	valid_0's rmse: 255.995
[34]	valid_0's rmse: 242.86
[36]	valid_0's rmse: 228.889
[38]	valid_0's rmse: 214.696
[40]	valid_0's rmse: 204.198
[42]	valid_0's rmse: 193.549
[44]	valid_0's rmse: 184.329
[46]	valid_0's rmse: 174.805
[48]	valid_0's rmse: 168.412
[50]	valid_0's rmse: 162.851
[52]	valid_0's rmse: 157.791
[54]	valid_0's rmse: 151.272
[56]	valid_0's rmse: 148.683
[58]	valid_0's rmse: 144.677
[60]	valid_0's rmse: 139.836
[62]	valid_0's rmse: 137.031
[64]	valid_0's rmse: 134.304
[66]	valid_0's

[566]	valid_0's rmse: 91.7126
[568]	valid_0's rmse: 91.6845
[570]	valid_0's rmse: 91.7498
[572]	valid_0's rmse: 91.5022
[574]	valid_0's rmse: 91.7266
[576]	valid_0's rmse: 91.5232
[578]	valid_0's rmse: 91.51
[580]	valid_0's rmse: 91.4097
[582]	valid_0's rmse: 91.2421
[584]	valid_0's rmse: 91.3284
[586]	valid_0's rmse: 91.465
[588]	valid_0's rmse: 91.3122
[590]	valid_0's rmse: 91.4094
[592]	valid_0's rmse: 91.3145
[594]	valid_0's rmse: 91.4616
[596]	valid_0's rmse: 91.3716
[598]	valid_0's rmse: 91.2918
[600]	valid_0's rmse: 91.4359
[602]	valid_0's rmse: 91.5975
[604]	valid_0's rmse: 91.6429
[606]	valid_0's rmse: 91.6815
[608]	valid_0's rmse: 91.6284
[610]	valid_0's rmse: 91.4106
[612]	valid_0's rmse: 91.4948
[614]	valid_0's rmse: 91.4242
[616]	valid_0's rmse: 91.4692
[618]	valid_0's rmse: 91.5762
[620]	valid_0's rmse: 91.6182
[622]	valid_0's rmse: 91.6736
[624]	valid_0's rmse: 91.5238
[626]	valid_0's rmse: 91.7935
[628]	valid_0's rmse: 92.0136
[630]	valid_0's rmse: 92.002
[632]	valid_0'

LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=0.7,
              importance_type='split', learning_rate=0.05, max_depth=-1,
              metric='rmse', min_child_samples=20, min_child_weight=0.001,
              min_split_gain=0.0, n_estimators=1000, n_jobs=-1, num_leaves=32,
              objective='mse', random_state=None, reg_alpha=0.0,
              reg_lambda=0.01, silent=True, subsample=0.7,
              subsample_for_bin=200000, subsample_freq=1)

In [91]:
submit_example=pd.read_csv('../ccf_car/submit_example.csv')
Y_test = model1.predict(X_test)
submission = pd.DataFrame({
    "id": submit_example['id'], 
    "forecastVolum": Y_test.round().astype(int)
})
submission['forecastVolum'] = submission['forecastVolum'].apply(lambda x: 0 if x < 0 else x)#<0的结果变为0
submission.to_csv('../rst/rst_fixNaN_rolling_holiday.csv', index=False)

In [5]:
def rmse(y, y_pred):
    return np.sqrt(mean_squared_error(y, y_pred))
def lgb_Regressor(train_x, train_y, val_x, val_y, train_X, y):
    lgb_model = lgb.LGBMRegressor(objective='regression',num_leaves=5,
                              learning_rate=0.05, n_estimators=360,
                              max_bin = 55, bagging_fraction = 0.8,
                              bagging_freq = 5, feature_fraction = 0.2319,
                              feature_fraction_seed=9, bagging_seed=9,
                              min_data_in_leaf =6, min_sum_hessian_in_leaf = 10)
    lgb_model.fit(train_x, train_y)
    pred_val = lgb_model.predict(val_x)
    score = rmse(val_y, pred_val)
    lgb_model.fit(train_X, y)
    
    return lgb_model, score, pred_val
print("LGBMRegressor开始训练...")
lgb_model,score, lgb_train_pred= lgb_Regressor(X_train, Y_train, X_valid, Y_valid, X_train_set, Y_train_set)
print(score)
lgb_pred = lgb_model.predict(X_test)

LGBMRegressor开始训练...
182.71559226057494


In [6]:
submit_example=pd.read_csv('../ccf_car/submit_example.csv')
Y_test = lgb_model.predict(X_test)
submission = pd.DataFrame({
    "id": submit_example['id'], 
    "forecastVolum": Y_test.round().astype(int)
})
submission['forecastVolum'] = submission['forecastVolum'].apply(lambda x: 0 if x < 0 else x)#<0的结果变为0
submission.to_csv('../rst/rst_fixNaN_rolling_holiday.csv', index=False)

In [10]:
submit_example=pd.read_csv('../ccf_car/submit_example.csv')
Y_test = lgb_model.predict(X_test)
submission = pd.DataFrame({
    "id": submit_example['id'], 
    "forecastVolum": Y_test
})
submission['forecastVolum'] = (submission['forecastVolum']*0.6).round().astype(int).apply(lambda x: 0 if x < 0 else x)#<0的结果变为0
submission.to_csv('../rst/rst_small.csv', index=False)

# 滑窗

In [None]:
X_train = data[data.date_block_num <= 20][features]
Y_train = data[data.date_block_num <= 20]['label']
X_valid = data[(data.date_block_num > 20) & (data.date_block_num <25)][features]
Y_valid = data[(data.date_block_num > 20) & (data.date_block_num <25)]['label']
X_train_set = data[data.date_block_num < 25][features]
Y_train_set = data[data.date_block_num < 25]['label']
X_test = data[data.date_block_num >= 25][features]
X_test=X_test.sort_index(by=['id'],ascending=True)