In [1]:
import gc
import pandas as pd
import numpy as np
import os
import xgboost as xgb
import lightgbm as lgb
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.externals import joblib
from imblearn.over_sampling import SMOTE 
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve 

In [2]:
clf2 = xgb.XGBClassifier(
    silent=0,#设置成1则没有运行信息输出，最好是设置为0.是否在运行升级时打印消息。
    nthread=-1,# cpu 线程数 默认最大
    learning_rate= 0.05,
    min_child_weight=1, 
    # 这个参数默认是 1，是每个叶子里面 h 的和至少是多少，对正负样本不均衡时的 0-1 分类而言
    #，假设 h 在 0.01 附近，min_child_weight 为 1 意味着叶子节点中最少需要包含 100 个样本。
    #这个参数非常影响结果，控制叶子节点中二阶导的和的最小值，该参数值越小，越容易 overfitting。
    max_depth=6, # 构建树的深度，越大越容易过拟合
    gamma=0,  # 树的叶子节点上作进一步分区所需的最小损失减少,越大越保守，一般0.1、0.2这样子。
    subsample=1, # 随机采样训练样本 训练实例的子采样比
    max_delta_step=0,#最大增量步长，我们允许每个树的权重估计。
    colsample_bytree=1, # 生成树时进行的列采样 
    reg_lambda=3,  # 控制模型复杂度的权重值的L2正则化项参数，参数越大，模型越不容易过拟合。
    #reg_alpha=0, # L1 正则项参数
    scale_pos_weight=1, #如果取值大于0的话，在类别样本不平衡的情况下有助于快速收敛。平衡正负权重
    objective='binary:logistic', #多分类的问题 指定学习任务和相应的学习目标
    n_estimators=300, #树的个数
    seed=2018, #随机种子
    eval_metric= 'auc'
)
clf3 = GradientBoostingClassifier(
    n_estimators=30,
    subsample=0.8,
    max_depth=6,
    max_leaf_nodes=50,
    max_features=0.9,
    learning_rate=0.1,
    random_state=2018,
    verbose=1
)

In [2]:
def sampling(Dataframe, frac, random_state):
    Dataframe_sampled = Dataframe.sample(
        n=None, 
        frac=frac, 
        replace=False, 
        weights=None, 
        random_state=random_state, 
        axis=0
    )
    return Dataframe_sampled

In [3]:
#################### lightGBM单模型 ###########################
def LGBtraining(X_train, y_train, X_val, y_val, random_state, cols, col_int):
    clf = lgb.LGBMClassifier(  
                                boosting_type='gbdt',
                                num_leaves=50,
                                #max_depth=-1,
                                n_estimators=300,
                                #max_bin=50,
                                objective='binary',
                                #min_split_gain=0.3,
                                learning_rate=0.05,
                                feature_fraction=0.9,
                                bagging_fraction=0.8,
                                random_state=random_state,
                                #reg_alpha=0.7,
                                #reg_lambda=0.4,
                                bagging_freq=5,
                                verbose=0,
                                n_jobs=-1     
    )
    clf.fit(
                X_train, y_train, 
                eval_set=[(X_val, y_val)], 
                eval_metric='auc', 
                early_stopping_rounds=20,
                feature_name=cols,
                categorical_feature=col_int
    )
    return clf

In [4]:
#################### lightGBM单模型 ###########################
def LGB_RF_training(X_train, y_train, X_val, y_val, random_state, cols, col_int):
    clf = lgb.LGBMClassifier(  
                                boosting_type='rf',
                                num_leaves=255,
                                #max_depth=-1,
                                n_estimators=200,
                                max_bin=2000,
                                objective='binary',
                                #min_split_gain=0.3,
                                learning_rate=0.05,
                                feature_fraction=0.4,
                                bagging_fraction=0.5,
                                random_state=random_state,
                                min_data_in_leaf=10,
                                #reg_alpha=0.7,
                                #reg_lambda=0.4,
                                bagging_freq=5,
                                verbose=0,
                                n_jobs=-1     
    )
    clf.fit(
                X_train, y_train, 
                eval_set=[(X_val, y_val)], 
                eval_metric='auc', 
                early_stopping_rounds=20,
                feature_name=cols,
                categorical_feature=col_int
    )
    return clf

In [5]:
####################训练集#######################
DATA_PATH = "datasets"
csv_path = os.path.join(DATA_PATH, "atec_anti_fraud_train.csv")
data = pd.read_csv(csv_path)

In [6]:
####################训练集#######################
pd.set_option('display.max_columns',2000)
data

Unnamed: 0,id,label,date,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f35,f36,f37,f38,f39,f40,f41,f42,f43,f44,f45,f46,f47,f48,f49,f50,f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,f64,f65,f66,f67,f68,f69,f70,f71,f72,f73,f74,f75,f76,f77,f78,f79,f80,f81,f82,f83,f84,f85,f86,f87,f88,f89,f90,f91,f92,f93,f94,f95,f96,f97,f98,f99,f100,f101,f102,f103,f104,f105,f106,f107,f108,f109,f110,f111,f112,f113,f114,f115,f116,f117,f118,f119,f120,f121,f122,f123,f124,f125,f126,f127,f128,f129,f130,f131,f132,f133,f134,f135,f136,f137,f138,f139,f140,f141,f142,f143,f144,f145,f146,f147,f148,f149,f150,f151,f152,f153,f154,f155,f156,f157,f158,f159,f160,f161,f162,f163,f164,f165,f166,f167,f168,f169,f170,f171,f172,f173,f174,f175,f176,f177,f178,f179,f180,f181,f182,f183,f184,f185,f186,f187,f188,f189,f190,f191,f192,f193,f194,f195,f196,f197,f198,f199,f200,f201,f202,f203,f204,f205,f206,f207,f208,f209,f210,f211,f212,f213,f214,f215,f216,f217,f218,f219,f220,f221,f222,f223,f224,f225,f226,f227,f228,f229,f230,f231,f232,f233,f234,f235,f236,f237,f238,f239,f240,f241,f242,f243,f244,f245,f246,f247,f248,f249,f250,f251,f252,f253,f254,f255,f256,f257,f258,f259,f260,f261,f262,f263,f264,f265,f266,f267,f268,f269,f270,f271,f272,f273,f274,f275,f276,f277,f278,f279,f280,f281,f282,f283,f284,f285,f286,f287,f288,f289,f290,f291,f292,f293,f294,f295,f296,f297
0,f10eb20f31cf7063ee8bdbd1272214e4d7e0193c8dbce4...,0,20171103,0,0,0,0,100807.0,0,5,1,1,1,1,2,0,2,0,2,2,2,1,31.0,61.0,142.0,245.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,,,,,,,,,,,,,1.0,0.0,1.0,0.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,3.0,96.0,38.0,301.0,302.0,302.0,302.0,301.0,302.0,40.0,79.0,40.0,79.0,2.0,3.0,4.0,3.0,41.0,80.0,96.5,721.1,3252.0,154.5,394.5,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0,1.0,4.0,8.0,15.0,30.0,4.0,14.0,34.0,70.0,142.0,3.0,15.0,41.0,79.0,191.0,192.0,190.0,190.0,190.0,190.0,191.0,192.0,191.0,190.0,190.0,190.0,32.0,33.0,25.0,25.0,24.0,25.0,31.0,79.0,42.0,42.0,42.0,68.0,32.0,32.0,28.0,27.0,28.0,27.0,32.0,235.0,235.0,126.0,126.0,127.0,126.0,235.0,192.0,191.0,190.0,189.0,189.0,301.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,3.0,3.0,4.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,4.0,3.0,4.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,3.0,3.0,3.0,14.0,41.0,80.0,167.0,2.0,2.0,4.0,3.0,3.0,4.0,6.0,6.0,2.0,2.0,4.0,3.0,2.0,2.0,3.0,4.0,3.0,3.0,4.0,1.0,3.0,3.0,4.0,2.0,2.0,2.0,2.0,8.0,2.0,2.0,3.0,3.0,3.0,2.0,5.0,5.0,10.0,16.0,2.0,2.0,4.0,5.0,7.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,27.0,297.0,302.0,384.0,770.0,20.0,99.0,102.0,124.0,140.0,301.0,312.0,328.0,85.0,302.0,201.0,203.0,203.0,61.0,201.0
1,d861929b67938d06538b910b9f6b85f5eb62b6ad7361ba...,0,20170917,0,1,1,1,100805.0,1,5,2,1,1,0,2,1,2,2,1,1,2,0,14.0,13.0,48.0,48.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,,,,,,,,,,,,,0.0,0.0,0.0,1.0,0.0,1.0,1.0,2.0,4.0,4.0,5.0,1.0,1.0,4.0,3.0,3.0,79.0,12.0,302.0,32.0,301.0,302.0,302.0,302.0,23.0,26.0,23.0,26.0,2.0,2.0,3.0,4.0,23.0,26.0,5.0,717.4,733.3,231.8,472.8,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,4.0,8.0,11.0,12.0,4.0,12.0,23.0,26.0,27.0,5.0,12.0,24.0,27.0,122.0,121.0,16.0,121.0,121.0,122.0,121.0,122.0,121.0,16.0,121.0,122.0,32.0,31.0,13.0,27.0,26.0,27.0,32.0,79.0,40.0,39.0,39.0,63.0,31.0,31.0,15.0,28.0,29.0,29.0,31.0,231.0,232.0,17.0,99.0,99.0,98.0,232.0,121.0,122.0,17.0,121.0,121.0,302.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,4.0,5.0,12.0,24.0,26.0,27.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,13.0,158.0,301.0,335.0,398.0,4.0,54.0,109.0,112.0,113.0,302.0,324.0,391.0,13.0,302.0,160.0,160.0,161.0,8.0,160.0
2,1270cb8a85eedd57672b2c6297fa5633e36773a2c3a351...,0,20171022,0,0,1,0,100102.0,0,6,1,0,0,0,2,1,2,1,1,2,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,3.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,15.0,18.0,37.0,102.0,139.0,206.0,253.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,7.0,13.0,2.0,1.0,2.0,4.0,6.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,0.0,0.0,1.0,0.0,,,,,,,,,,,,,,,,,,,,
3,9fa009724ee7ff9d688ae321304fbc78f608cdabbfdd2b...,0,20171029,0,0,0,1,100807.0,1,4,1,2,2,0,0,0,2,0,1,1,1,1,32.0,55.0,152.0,241.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,0.0,0.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,6.0,5.0,1.0,2.0,3.0,6.0,6.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,3.0,3.0,3.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,3.0,5.0,11.0,19.0,23.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,3.0,11.0,16.0,3.0,12.0,16.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,30.0,105.0,301.0,437.0,763.0,7.0,40.0,115.0,138.0,143.0,302.0,322.0,341.0,57.0,251.0,175.0,176.0,176.0,49.0,150.0
4,1da482485d7e8bcefae7e9d0d1167cec3ac111cfa71d8b...,0,20171002,1,1,0,1,100805.0,1,5,1,2,0,0,0,1,1,2,1,1,2,0,28.0,28.0,103.0,104.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,,,,,,,,,,,,,1.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,89.0,32.0,301.0,243.0,301.0,301.0,301.0,301.0,15.0,14.0,15.0,15.0,2.0,2.0,3.0,3.0,18.0,27.0,51.0,7952.6,15752.6,86.8,7686.8,2.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,3.0,5.0,6.0,8.0,14.0,3.0,12.0,19.0,27.0,34.0,3.0,11.0,16.0,15.0,217.0,218.0,176.0,216.0,216.0,215.0,217.0,218.0,217.0,177.0,216.0,216.0,35.0,34.0,28.0,30.0,29.0,30.0,32.0,81.0,48.0,48.0,48.0,65.0,31.0,32.0,29.0,29.0,28.0,29.0,30.0,241.0,241.0,127.0,143.0,144.0,144.0,241.0,218.0,218.0,177.0,216.0,215.0,302.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,10.0,15.0,15.0,16.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,3.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,3.0,3.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,3.0,3.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,3.0,15.0,16.0,2.0,15.0,16.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,21.0,156.0,302.0,369.0,505.0,11.0,49.0,99.0,113.0,117.0,302.0,301.0,301.0,74.0,302.0,182.0,181.0,182.0,51.0,181.0
5,81b481d377dc060f6c3249d086d28b0fb4c3781a72927d...,0,20171012,0,0,1,1,100802.0,0,6,2,1,2,2,1,1,2,1,2,2,1,1,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,,,,,,,,,,,,,1.0,0.0,1.0,1.0,0.0,0.0,3.0,3.0,3.0,3.0,2.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,18.0,16.0,16.0,17.0,16.0,16.0,15.0,21.0,14.0,21.0,15.0,16.0,15.0,16.0,17.0,24.0,789.2,1527.4,2038.4,789.2,790.2,1.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,1.0,3.0,5.0,7.0,12.0,13.0,15.0,21.0,28.0,18.0,18.0,19.0,25.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,17.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,19.0,18.0,19.0,18.0,21.0,26.0,34.0,1.0,1.0,1.0,2.0,14.0,15.0,14.0,15.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,7.0,7.0,8.0,8.0,7.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,1.0,6.0,6.0,5.0,5.0,6.0,2.0,1.0,2.0,2.0,2.0
6,cda7677920fc9bad4bb727ed8a87f59e3400572f75fd46...,0,20170927,0,1,1,0,100805.0,0,6,2,1,1,1,2,2,1,2,0,2,2,2,24.0,24.0,89.0,89.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,0.0,1.0,,,,,,,,,,,201.0,40.0,302.0,302.0,302.0,302.0,301.0,301.0,44.0,88.0,44.0,88.0,7.0,8.0,7.0,18.0,45.0,92.0,28.0,3383.6,10657.4,399.0,493.9,2.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,3.0,7.0,16.0,21.0,17.0,19.0,45.0,92.0,117.0,17.0,18.0,45.0,89.0,227.0,227.0,225.0,225.0,225.0,225.0,228.0,227.0,227.0,224.0,224.0,225.0,34.0,35.0,31.0,31.0,32.0,32.0,34.0,179.0,41.0,40.0,41.0,91.0,31.0,32.0,27.0,28.0,28.0,28.0,31.0,229.0,229.0,142.0,141.0,142.0,141.0,228.0,227.0,226.0,224.0,225.0,225.0,302.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,3.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,7.0,7.0,17.0,18.0,45.0,90.0,117.0,2.0,1.0,1.0,2.0,7.0,7.0,7.0,8.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,7.0,8.0,8.0,8.0,8.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,0.0,2.0,57.0,250.0,301.0,319.0,352.0,23.0,118.0,134.0,142.0,143.0,301.0,301.0,301.0,150.0,301.0,239.0,239.0,239.0,116.0,240.0
7,19d100ff146a203f0e558fd60534b8ece650be4ecb4564...,0,20171027,1,0,0,1,100802.0,0,6,2,0,1,0,2,2,1,2,2,2,2,0,31.0,53.0,152.0,233.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,,,,,,,,,,,,,0.0,0.0,1.0,0.0,1.0,0.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,3.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,3.0,9.0,16.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,5.0,7.0,1.0,2.0,2.0,3.0,5.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,73.0,181.0,302.0,381.0,630.0,29.0,77.0,112.0,130.0,134.0,301.0,314.0,321.0,150.0,302.0,189.0,188.0,189.0,92.0,188.0
8,49ff13e491c3e4c680cdf39e9b3b85560a4bb80994ac63...,0,20170926,1,1,1,1,100802.0,0,7,2,1,1,1,1,2,1,1,1,1,2,1,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,,,,,,,,,,,,,0.0,0.0,0.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,18.0,1.0,1.0,2.0,2.0,5.0,12.0,23.0,13.0,23.0,1.0,1.0,1.0,6.0,14.0,24.0,11.0,65692.4,81568.8,53411.0,65410.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,3.0,7.0,13.0,15.0,5.0,8.0,13.0,23.0,28.0,5.0,7.0,13.0,23.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,13.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,3.0,3.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,5.0,8.0,13.0,24.0,29.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,0.0,1.0,0.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0
9,ee3adc3a1b88a30297f86cf77c0fcd1d5c17653f1a9828...,0,20171011,1,1,1,0,100804.0,0,2,1,1,0,1,2,2,2,2,1,1,2,1,32.0,38.0,113.0,133.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,9.0,4.0,5.0,,,,,,,,,,,,,8.0,8.0,3.0,4.0,1.0,1.0,1.0,3.0,6.0,10.0,24.0,1.0,3.0,5.0,8.0,17.0,93.0,36.0,302.0,301.0,302.0,302.0,302.0,302.0,26.0,49.0,26.0,49.0,2.0,2.0,1.0,10.0,26.0,51.0,15.0,5625.3,6581.7,1753.2,3420.7,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,4.0,7.0,14.0,28.0,10.0,18.0,26.0,48.0,95.0,11.0,18.0,26.0,49.0,193.0,193.0,190.0,190.0,191.0,191.0,193.0,193.0,192.0,190.0,191.0,191.0,33.0,35.0,29.0,29.0,29.0,28.0,31.0,84.0,42.0,43.0,43.0,64.0,32.0,32.0,28.0,28.0,27.0,28.0,32.0,240.0,241.0,133.0,134.0,134.0,134.0,237.0,192.0,193.0,191.0,191.0,190.0,302.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,4.0,4.0,4.0,4.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,10.0,19.0,27.0,49.0,98.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,1.0,41.0,125.0,301.0,431.0,1081.0,17.0,44.0,91.0,109.0,124.0,302.0,356.0,613.0,82.0,216.0,155.0,156.0,155.0,52.0,116.0


In [7]:
####################测试集#######################
csv_path = os.path.join(DATA_PATH, "atec_anti_fraud_test_b.csv")
data_test = pd.read_csv(csv_path)

In [8]:
data_test

Unnamed: 0,id,date,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f35,f36,f37,f38,f39,f40,f41,f42,f43,f44,f45,f46,f47,f48,f49,f50,f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,f64,f65,f66,f67,f68,f69,f70,f71,f72,f73,f74,f75,f76,f77,f78,f79,f80,f81,f82,f83,f84,f85,f86,f87,f88,f89,f90,f91,f92,f93,f94,f95,f96,f97,f98,f99,f100,f101,f102,f103,f104,f105,f106,f107,f108,f109,f110,f111,f112,f113,f114,f115,f116,f117,f118,f119,f120,f121,f122,f123,f124,f125,f126,f127,f128,f129,f130,f131,f132,f133,f134,f135,f136,f137,f138,f139,f140,f141,f142,f143,f144,f145,f146,f147,f148,f149,f150,f151,f152,f153,f154,f155,f156,f157,f158,f159,f160,f161,f162,f163,f164,f165,f166,f167,f168,f169,f170,f171,f172,f173,f174,f175,f176,f177,f178,f179,f180,f181,f182,f183,f184,f185,f186,f187,f188,f189,f190,f191,f192,f193,f194,f195,f196,f197,f198,f199,f200,f201,f202,f203,f204,f205,f206,f207,f208,f209,f210,f211,f212,f213,f214,f215,f216,f217,f218,f219,f220,f221,f222,f223,f224,f225,f226,f227,f228,f229,f230,f231,f232,f233,f234,f235,f236,f237,f238,f239,f240,f241,f242,f243,f244,f245,f246,f247,f248,f249,f250,f251,f252,f253,f254,f255,f256,f257,f258,f259,f260,f261,f262,f263,f264,f265,f266,f267,f268,f269,f270,f271,f272,f273,f274,f275,f276,f277,f278,f279,f280,f281,f282,f283,f284,f285,f286,f287,f288,f289,f290,f291,f292,f293,f294,f295,f296,f297
0,661ae9a03c6c74feab2555c9987e140ae3f5421bd8e7a0...,20180305,0,0,1,1,100810.0,0,4,2,0,1,0,2,1,1,1,0,2,2,0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,,,,,,,,,,,,,0.0,1.0,0.0,1.0,1.0,1.0,2.0,1.0,2.0,3.0,2.0,2.0,2.0,1.0,3.0,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,1.0,3.0,3.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,3.0,2.0,2.0,3.0,3.0,2.0,3.0,2.0,3.0,3.0,4.0,3.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,3.0,5.0,5.0,10.0,24.0,35.0,46.0,1.0,2.0,2.0,4.0,3.0,4.0,11.0,11.0,2.0,1.0,2.0,2.0,2.0,1.0,3.0,4.0,2.0,2.0,3.0,2.0,1.0,3.0,4.0,2.0,2.0,8.0,7.0,7.0,2.0,1.0,2.0,2.0,3.0,2.0,3.0,7.0,10.0,15.0,2.0,1.0,3.0,4.0,9.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,78.0,302.0,302.0,324.0,353.0,44.0,150.0,149.0,156.0,155.0,301.0,301.0,302.0,192.0,301.0,218.0,219.0,219.0,153.0,218.0
1,ada98d4358e72a27cc5e92f691a87a4fd62a7cda0387e2...,20180303,0,0,1,1,100810.0,1,6,1,0,2,1,1,0,2,2,0,2,2,0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,,,,,,,,,,,,,,,,,,,,,0.0,1.0,,,,,,,,,,,1.0,2.0,27.0,4.0,6.0,6.0,9.0,24.0,51.0,78.0,51.0,78.0,4.0,6.0,7.0,9.0,55.0,82.0,686.5,14090.8,20128.8,1392.5,4811.3,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,4.0,8.0,15.0,22.0,8.0,23.0,56.0,83.0,109.0,9.0,23.0,52.0,80.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,25.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,4.0,6.0,9.0,23.0,53.0,80.0,107.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,0.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,7.0,23.0,25.0,5.0,6.0,2.0,1.0,1.0,2.0,2.0
2,e33675a962e5bf44d05a2b01903a4beb88a0c6385c05c6...,20180226,0,0,1,1,100811.0,1,2,1,0,1,0,1,1,1,1,2,2,1,1,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,,,,,,,,,,,,,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,3.0,2.0,3.0,2.0,3.0,1.0,2.0,1.0,3.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,5.0,21.0,33.0,39.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,3.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,0.0,0.0,1.0,2.0,35.0,90.0,301.0,317.0,340.0,28.0,52.0,144.0,147.0,150.0,301.0,302.0,302.0,130.0,292.0,225.0,225.0,225.0,104.0,218.0
3,4c5da176de7172dbd1ad0aa7edf9866548ec720d6c7318...,20180207,1,0,1,0,100811.0,0,5,1,2,1,1,1,2,2,0,0,1,1,1,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,,,,,,,,,,,,,1.0,1.0,0.0,1.0,1.0,0.0,2.0,3.0,3.0,3.0,3.0,2.0,1.0,2.0,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,3.0,2.0,3.0,3.0,3.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,3.0,2.0,2.0,2.0,3.0,3.0,1.0,1.0,1.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,216.0,301.0,302.0,301.0,302.0,121.0,179.0,179.0,179.0,179.0,301.0,302.0,301.0,256.0,302.0,262.0,261.0,261.0,221.0,261.0
4,fa39e3491900d49b862d30b5dbcd0b1c30bb4ff0d96396...,20180226,0,0,1,1,100811.0,1,7,1,1,2,1,0,1,1,0,1,1,2,0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,9.0,10.0,9.0,12.0,14.0,16.0,17.0,2.0,2.0,1.0,1.0,9.0,9.0,9.0,10.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,9.0,9.0,9.0,9.0,9.0,1.0,1.0,1.0,2.0,1.0,8.0,10.0,9.0,10.0,10.0,2.0,1.0,1.0,2.0,3.0,1.0,1.0,2.0,2.0,2.0,8.0,10.0,10.0,13.0,16.0,10.0,14.0,17.0,1.0,2.0,1.0,8.0,9.0,1.0,2.0,2.0,2.0,1.0,2.0,59.0,222.0,302.0,313.0,351.0,26.0,95.0,115.0,122.0,126.0,301.0,302.0,301.0,108.0,301.0,210.0,209.0,209.0,80.0,210.0
5,ae0f1ecb27e098bbf672529a50237dff27abd16b349569...,20180221,1,1,0,0,100109.0,1,7,2,0,1,0,1,2,1,1,1,1,1,1,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,,,,,2.0,2.0,1.0,5.0,5.0,2.0,1.0,1.0,7.0,7.0,0.0,0.0,,,,,0.0,0.0,,,,,,,,,,,1.0,2.0,6.0,1.0,1.0,2.0,1.0,2.0,0.0,5.0,1.0,6.0,1.0,1.0,2.0,2.0,1.0,6.0,129.7,129.7,519.3,130.7,129.7,1.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,6.0,6.0,1.0,1.0,1.0,6.0,6.0,1.0,1.0,1.0,7.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,7.0,2.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,6.0,7.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,3.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,0.0,2.0,1.0,2.0,1.0,2.0,3.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0
6,7a9e5e415412a92928f91608c39750e3a33eb123eb0ef8...,20180302,2,0,1,1,,4,1,1,1,1,2,0,1,2,2,0,2,2,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,2.0,1.0,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,2.0,1.0,1.0,78.0,234.0,297.0,301.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,79.0,233.0,297.0,302.0,301.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,62.0,63.0,2.0,1.0,1.0,2.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,51fb55256e959c36989151e8136a8a68002bb75887eb75...,20180222,0,1,0,0,100808.0,0,2,1,1,1,0,2,1,1,1,1,2,1,1,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,,,,,,,,,,,,,0.0,0.0,1.0,1.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,3.0,4.0,4.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,3.0,20.0,22.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,3.0,2.0,2.0,2.0,1.0,3.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,2.0,35.0,78.0,231.0,341.0,404.0,15.0,37.0,86.0,116.0,118.0,301.0,302.0,302.0,83.0,170.0,221.0,221.0,221.0,63.0,129.0
8,aa7664463d11ae5573ae6d89195373fbe033c354710142...,20180219,0,0,1,1,100811.0,1,2,2,0,0,1,2,1,0,0,1,1,0,1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,,0.0,1.0,0.0,1.0,0.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,3.0,2.0,3.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,3.0,6.0,7.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,4.0,2.0,1.0,3.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,23.0,78.0,194.0,322.0,331.0,14.0,32.0,77.0,130.0,130.0,302.0,302.0,307.0,62.0,140.0,201.0,202.0,201.0,43.0,89.0
9,b794af5790f98806ff8ad8ee268caa606dffcfbcb753bd...,20180304,1,1,0,0,100811.0,0,5,2,1,1,1,0,0,2,0,1,0,0,2,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,2.0,2.0,1.0,2.0,1.0,1.0,2.0,1.0,7.0,7.0,7.0,6.0,7.0,8.0,7.0,8.0,7.0,11.0,657.2,656.2,894.1,657.2,656.2,2.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,3.0,9.0,7.0,8.0,7.0,12.0,18.0,7.0,7.0,7.0,7.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,8.0,8.0,7.0,8.0,7.0,7.0,7.0,1.0,2.0,1.0,1.0,4.0,4.0,3.0,4.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,4.0,3.0,3.0,3.0,3.0,1.0,2.0,1.0,2.0,1.0,3.0,3.0,3.0,4.0,3.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,7.0,8.0,7.0,7.0,8.0,7.0,8.0,8.0,1.0,1.0,1.0,7.0,7.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0


In [9]:
cols = list(data.columns)[3:]
col_int = []
col_float = []
for col in cols:
    #print(data[col].dtype)
    if data[col].dtype == "int64":
        col_int.append(col)
    if data[col].dtype == "float64":
        col_float.append(col)
print(col_int)
print(col_float)

['f1', 'f2', 'f3', 'f4', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19']
['f5', 'f20', 'f21', 'f22', 'f23', 'f24', 'f25', 'f26', 'f27', 'f28', 'f29', 'f30', 'f31', 'f32', 'f33', 'f34', 'f35', 'f36', 'f37', 'f38', 'f39', 'f40', 'f41', 'f42', 'f43', 'f44', 'f45', 'f46', 'f47', 'f48', 'f49', 'f50', 'f51', 'f52', 'f53', 'f54', 'f55', 'f56', 'f57', 'f58', 'f59', 'f60', 'f61', 'f62', 'f63', 'f64', 'f65', 'f66', 'f67', 'f68', 'f69', 'f70', 'f71', 'f72', 'f73', 'f74', 'f75', 'f76', 'f77', 'f78', 'f79', 'f80', 'f81', 'f82', 'f83', 'f84', 'f85', 'f86', 'f87', 'f88', 'f89', 'f90', 'f91', 'f92', 'f93', 'f94', 'f95', 'f96', 'f97', 'f98', 'f99', 'f100', 'f101', 'f102', 'f103', 'f104', 'f105', 'f106', 'f107', 'f108', 'f109', 'f110', 'f111', 'f112', 'f113', 'f114', 'f115', 'f116', 'f117', 'f118', 'f119', 'f120', 'f121', 'f122', 'f123', 'f124', 'f125', 'f126', 'f127', 'f128', 'f129', 'f130', 'f131', 'f132', 'f133', 'f134', 'f135', 'f136', 'f137', 'f138', 'f

In [10]:
# data.sort_values("date",inplace=True)
# data.interpolate(method="nearest", inplace=True)
# data.sort_index(inplace=True)

# data_test.sort_values("date",inplace=True)
# data_test.interpolate(method="nearest", inplace=True) 
# data_test.sort_index(inplace=True)

In [11]:
################## 缺失90%的特征 #############################
missing_90 = ['f36', 'f37', 'f38', 'f39', 'f40', 'f41', 'f42', 'f43', 'f44', 'f45', 'f46', 'f47']
data.drop(missing_90, axis=1, inplace=True)
data_test.drop(missing_90, axis=1, inplace=True)
for i in missing_90:
    cols.remove(i)
print(len(cols))

285


In [11]:
##############训练集onehot编码#####################
# data_onehot = []
# for col in col_int:
#     dummies = pd.get_dummies(data[col], prefix=col)
#     data_onehot.append(dummies)
    
# for i in data_onehot:
#     data = pd.concat([data, i], axis=1)
    
# data.drop(col_int, axis=1, inplace=True)

In [10]:
##############测试集onehot编码#####################
# data_test_onehot = []
# for col in col_int:
#     dummies = pd.get_dummies(data_test[col], prefix=col)
#     data_test_onehot.append(dummies)

# for i in data_test_onehot:
#     data_test = pd.concat([data_test, i], axis=1)
    
# data_test.drop(col_int, axis=1, inplace=True)

In [12]:
##################缺失值填充########################
# data.fillna(0, inplace=True)
# data_test.fillna(0, inplace=True)

In [12]:
data.loc[data['label'] == -1, 'label'] = 1
data.loc[data['label'] == 1, 'label'] = 2
data.loc[data['label'] == 0, 'label'] = 1
data.loc[data['label'] == 2, 'label'] = 0
data['label'].value_counts()

1    977884
0     16847
Name: label, dtype: int64

In [13]:
on_train = data[data['date'] <= 20171021]
on_val = data[data['date'] > 20171021]

In [14]:
##################数据集划分（线上）########################
on_train = data[data['date'] <= 20171031]
on_val = data[data['date'] > 20171031]
print(on_train.shape, on_val.shape)

on_train1 = on_train[on_train['date'] > 20170915]
#on_train1 = sampling(on_train1, 0.5, 2018)

on_train2 = on_train[(on_train['date'] <= 20170915) | (on_train['date'] > 20170927)]
#on_train2 = sampling(on_train2, 0.5, 2019)

on_train3 = on_train[(on_train['date'] <= 20170927) | (on_train['date'] > 20171009)]
#on_train3 = sampling(on_train3, 0.5, 2020)

on_train4 = on_train[(on_train['date'] <= 20171009) | (on_train['date'] > 20171020)]
#on_train4 = sampling(on_train4, 0.5, 2021)

on_train5 = on_train[on_train['date'] <= 20171020]
#on_train5 = sampling(on_train5, 0.5, 2022)

print(on_train1.shape)
print(on_train2.shape)
print(on_train3.shape)
print(on_train4.shape)
print(on_train5.shape)

(911606, 288) (83125, 288)
(731486, 288)
(723597, 288)
(729032, 288)
(728626, 288)
(733683, 288)


In [18]:
on_val['label'].value_counts()

1    240836
0      4205
Name: label, dtype: int64

In [15]:
###################数据集划分（线上）########################
# X = data.loc[:,'f5':].as_matrix()
# y = data.loc[:,'label'].as_matrix()
# print(X.shape, y.shape)

X_train = on_train.loc[:,'f1':].as_matrix()
y_train = on_train.loc[:,'label'].as_matrix()
print(X_train.shape, y_train.shape)

X_val = on_val.loc[:,'f1':].as_matrix()
y_val = on_val.loc[:,'label'].as_matrix()
print(X_val.shape, y_val.shape)

X_test = data_test.loc[:,'f1':].as_matrix()
print(X_test.shape)

# X_train1 = on_train1.loc[:,'f5':].as_matrix()
# y_train1 = on_train1.loc[:,'label'].as_matrix()
# print(X_train1.shape, y_train1.shape)

# X_train2 = on_train2.loc[:,'f5':].as_matrix()
# y_train2 = on_train2.loc[:,'label'].as_matrix()
# print(X_train2.shape, y_train2.shape)

# X_train3 = on_train3.loc[:,'f5':].as_matrix()
# y_train3 = on_train3.loc[:,'label'].as_matrix()
# print(X_train3.shape, y_train3.shape)

# X_train4 = on_train4.loc[:,'f5':].as_matrix()
# y_train4 = on_train4.loc[:,'label'].as_matrix()
# print(X_train4.shape, y_train4.shape)

# X_train5 = on_train5.loc[:,'f5':].as_matrix()
# y_train5 = on_train5.loc[:,'label'].as_matrix()
# print(X_train5.shape, y_train5.shape)

# X_val = on_val.loc[:,'f5':].as_matrix()
# y_val = on_val.loc[:,'label'].as_matrix()
# print(X_val.shape, y_val.shape)

# X_test = data_test.loc[:,'f5':].as_matrix()
# print(X_test.shape)

(911606, 285) (911606,)
(83125, 285) (83125,)
(500538, 285)


In [14]:
y_val[:100]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)

In [48]:
for i in range(len(y_val)):
    if y_val[i] == 1:
        y_val[i] = 0
    else:
        y_val[i] = 1

In [16]:
del on_train1, on_train2, on_train3, on_train4, on_train5
del on_train, on_val
gc.collect()

62

In [17]:
clf = LGB_RF_training(X_train, y_train, X_val, y_val, 20, cols, col_int)



[1]	valid_0's auc: 0.962318
Training until validation scores don't improve for 20 rounds.
[2]	valid_0's auc: 0.968259
[3]	valid_0's auc: 0.969045
[4]	valid_0's auc: 0.970025
[5]	valid_0's auc: 0.971137
[6]	valid_0's auc: 0.970738
[7]	valid_0's auc: 0.970754
[8]	valid_0's auc: 0.971079
[9]	valid_0's auc: 0.971166
[10]	valid_0's auc: 0.971265
[11]	valid_0's auc: 0.971498
[12]	valid_0's auc: 0.971148
[13]	valid_0's auc: 0.971252
[14]	valid_0's auc: 0.971052
[15]	valid_0's auc: 0.971919
[16]	valid_0's auc: 0.972034
[17]	valid_0's auc: 0.972173
[18]	valid_0's auc: 0.97208
[19]	valid_0's auc: 0.971973
[20]	valid_0's auc: 0.972084
[21]	valid_0's auc: 0.972024
[22]	valid_0's auc: 0.972195
[23]	valid_0's auc: 0.97227
[24]	valid_0's auc: 0.972071
[25]	valid_0's auc: 0.972143
[26]	valid_0's auc: 0.972185
[27]	valid_0's auc: 0.972008
[28]	valid_0's auc: 0.97218
[29]	valid_0's auc: 0.972013
[30]	valid_0's auc: 0.971971
[31]	valid_0's auc: 0.97211
[32]	valid_0's auc: 0.972037
[33]	valid_0's auc: 0.9

In [22]:
y_pred = clf.predict_proba(X_test)[:,0]

In [52]:
clf1 = LGBtraining(X_train1, y_train1, X_val, y_val, 2018)

[1]	valid_0's auc: 0.947726	valid_1's auc: 0.940847
Training until validation scores don't improve for 10 rounds.
[2]	valid_0's auc: 0.95035	valid_1's auc: 0.94357
[3]	valid_0's auc: 0.950783	valid_1's auc: 0.944661
[4]	valid_0's auc: 0.95048	valid_1's auc: 0.944726
[5]	valid_0's auc: 0.951345	valid_1's auc: 0.945308
[6]	valid_0's auc: 0.952694	valid_1's auc: 0.946472
[7]	valid_0's auc: 0.95301	valid_1's auc: 0.946668
[8]	valid_0's auc: 0.953141	valid_1's auc: 0.946834
[9]	valid_0's auc: 0.953482	valid_1's auc: 0.947104
[10]	valid_0's auc: 0.957984	valid_1's auc: 0.954423
[11]	valid_0's auc: 0.958015	valid_1's auc: 0.954695
[12]	valid_0's auc: 0.96588	valid_1's auc: 0.962054
[13]	valid_0's auc: 0.965975	valid_1's auc: 0.962231
[14]	valid_0's auc: 0.966686	valid_1's auc: 0.963956
[15]	valid_0's auc: 0.968879	valid_1's auc: 0.965841
[16]	valid_0's auc: 0.969894	valid_1's auc: 0.966627
[17]	valid_0's auc: 0.970166	valid_1's auc: 0.966814
[18]	valid_0's auc: 0.9703	valid_1's auc: 0.967113


[155]	valid_0's auc: 0.980083	valid_1's auc: 0.99256
[156]	valid_0's auc: 0.980078	valid_1's auc: 0.992638
[157]	valid_0's auc: 0.980084	valid_1's auc: 0.992715
[158]	valid_0's auc: 0.980081	valid_1's auc: 0.992757
[159]	valid_0's auc: 0.98011	valid_1's auc: 0.992806
[160]	valid_0's auc: 0.980105	valid_1's auc: 0.992835
[161]	valid_0's auc: 0.980138	valid_1's auc: 0.992876
[162]	valid_0's auc: 0.98014	valid_1's auc: 0.992961
[163]	valid_0's auc: 0.980172	valid_1's auc: 0.993009
[164]	valid_0's auc: 0.9802	valid_1's auc: 0.993044
[165]	valid_0's auc: 0.980176	valid_1's auc: 0.993081
[166]	valid_0's auc: 0.980169	valid_1's auc: 0.993138
[167]	valid_0's auc: 0.98017	valid_1's auc: 0.993176
[168]	valid_0's auc: 0.980167	valid_1's auc: 0.993215
[169]	valid_0's auc: 0.980163	valid_1's auc: 0.993266
[170]	valid_0's auc: 0.980145	valid_1's auc: 0.993306
[171]	valid_0's auc: 0.980205	valid_1's auc: 0.993343
[172]	valid_0's auc: 0.980207	valid_1's auc: 0.99339
[173]	valid_0's auc: 0.980206	valid

In [53]:
clf2 = LGBtraining(X_train2, y_train2, X_val, y_val, 2019)

[1]	valid_0's auc: 0.940011	valid_1's auc: 0.919284
Training until validation scores don't improve for 10 rounds.
[2]	valid_0's auc: 0.94532	valid_1's auc: 0.93511
[3]	valid_0's auc: 0.958017	valid_1's auc: 0.948128
[4]	valid_0's auc: 0.957502	valid_1's auc: 0.948607
[5]	valid_0's auc: 0.960869	valid_1's auc: 0.955774
[6]	valid_0's auc: 0.961139	valid_1's auc: 0.95621
[7]	valid_0's auc: 0.961409	valid_1's auc: 0.957031
[8]	valid_0's auc: 0.961138	valid_1's auc: 0.957759
[9]	valid_0's auc: 0.961588	valid_1's auc: 0.957692
[10]	valid_0's auc: 0.961518	valid_1's auc: 0.957678
[11]	valid_0's auc: 0.961642	valid_1's auc: 0.957968
[12]	valid_0's auc: 0.961792	valid_1's auc: 0.958099
[13]	valid_0's auc: 0.962145	valid_1's auc: 0.958691
[14]	valid_0's auc: 0.962595	valid_1's auc: 0.959452
[15]	valid_0's auc: 0.962846	valid_1's auc: 0.959659
[16]	valid_0's auc: 0.962859	valid_1's auc: 0.959807
[17]	valid_0's auc: 0.967552	valid_1's auc: 0.963679
[18]	valid_0's auc: 0.967301	valid_1's auc: 0.964

[156]	valid_0's auc: 0.980248	valid_1's auc: 0.992048
[157]	valid_0's auc: 0.980348	valid_1's auc: 0.992105
[158]	valid_0's auc: 0.980402	valid_1's auc: 0.992189
[159]	valid_0's auc: 0.980384	valid_1's auc: 0.992306
[160]	valid_0's auc: 0.980373	valid_1's auc: 0.992361
[161]	valid_0's auc: 0.980385	valid_1's auc: 0.992449
[162]	valid_0's auc: 0.980371	valid_1's auc: 0.992482
[163]	valid_0's auc: 0.980369	valid_1's auc: 0.99252
[164]	valid_0's auc: 0.98035	valid_1's auc: 0.992548
[165]	valid_0's auc: 0.980338	valid_1's auc: 0.992607
[166]	valid_0's auc: 0.980336	valid_1's auc: 0.992671
[167]	valid_0's auc: 0.980326	valid_1's auc: 0.992711
[168]	valid_0's auc: 0.980307	valid_1's auc: 0.992805
Early stopping, best iteration is:
[158]	valid_0's auc: 0.980402	valid_1's auc: 0.992189


In [54]:
clf3 = LGBtraining(X_train3, y_train3, X_val, y_val, 2020)

[1]	valid_0's auc: 0.933077	valid_1's auc: 0.935711
Training until validation scores don't improve for 10 rounds.
[2]	valid_0's auc: 0.959539	valid_1's auc: 0.947222
[3]	valid_0's auc: 0.961038	valid_1's auc: 0.949926
[4]	valid_0's auc: 0.962046	valid_1's auc: 0.951228
[5]	valid_0's auc: 0.96455	valid_1's auc: 0.952968
[6]	valid_0's auc: 0.964635	valid_1's auc: 0.953044
[7]	valid_0's auc: 0.965167	valid_1's auc: 0.953513
[8]	valid_0's auc: 0.965271	valid_1's auc: 0.953555
[9]	valid_0's auc: 0.965209	valid_1's auc: 0.953806
[10]	valid_0's auc: 0.965515	valid_1's auc: 0.954131
[11]	valid_0's auc: 0.965459	valid_1's auc: 0.954311
[12]	valid_0's auc: 0.965761	valid_1's auc: 0.954674
[13]	valid_0's auc: 0.965984	valid_1's auc: 0.954806
[14]	valid_0's auc: 0.966779	valid_1's auc: 0.957642
[15]	valid_0's auc: 0.967236	valid_1's auc: 0.958093
[16]	valid_0's auc: 0.967328	valid_1's auc: 0.958189
[17]	valid_0's auc: 0.967518	valid_1's auc: 0.958361
[18]	valid_0's auc: 0.968145	valid_1's auc: 0.9

[155]	valid_0's auc: 0.97997	valid_1's auc: 0.991839
[156]	valid_0's auc: 0.979998	valid_1's auc: 0.991895
[157]	valid_0's auc: 0.980046	valid_1's auc: 0.991933
[158]	valid_0's auc: 0.980028	valid_1's auc: 0.99201
[159]	valid_0's auc: 0.980055	valid_1's auc: 0.992045
[160]	valid_0's auc: 0.980046	valid_1's auc: 0.992093
[161]	valid_0's auc: 0.980049	valid_1's auc: 0.992139
[162]	valid_0's auc: 0.980121	valid_1's auc: 0.992238
[163]	valid_0's auc: 0.980159	valid_1's auc: 0.9923
[164]	valid_0's auc: 0.980177	valid_1's auc: 0.992344
[165]	valid_0's auc: 0.980167	valid_1's auc: 0.992438
[166]	valid_0's auc: 0.980179	valid_1's auc: 0.992492
[167]	valid_0's auc: 0.980157	valid_1's auc: 0.992532
[168]	valid_0's auc: 0.980163	valid_1's auc: 0.992564
[169]	valid_0's auc: 0.980435	valid_1's auc: 0.992673
[170]	valid_0's auc: 0.980433	valid_1's auc: 0.992719
[171]	valid_0's auc: 0.980474	valid_1's auc: 0.992789
[172]	valid_0's auc: 0.980478	valid_1's auc: 0.992853
[173]	valid_0's auc: 0.980482	va

In [55]:
clf4 = LGBtraining(X_train4, y_train4, X_val, y_val, 2021)

[1]	valid_0's auc: 0.937454	valid_1's auc: 0.924429
Training until validation scores don't improve for 10 rounds.
[2]	valid_0's auc: 0.949138	valid_1's auc: 0.945107
[3]	valid_0's auc: 0.955137	valid_1's auc: 0.954543
[4]	valid_0's auc: 0.955821	valid_1's auc: 0.955198
[5]	valid_0's auc: 0.957751	valid_1's auc: 0.954888
[6]	valid_0's auc: 0.958354	valid_1's auc: 0.955997
[7]	valid_0's auc: 0.958252	valid_1's auc: 0.956747
[8]	valid_0's auc: 0.958396	valid_1's auc: 0.956757
[9]	valid_0's auc: 0.958678	valid_1's auc: 0.957041
[10]	valid_0's auc: 0.963115	valid_1's auc: 0.960585
[11]	valid_0's auc: 0.96317	valid_1's auc: 0.96063
[12]	valid_0's auc: 0.963061	valid_1's auc: 0.960513
[13]	valid_0's auc: 0.962971	valid_1's auc: 0.960635
[14]	valid_0's auc: 0.962807	valid_1's auc: 0.960562
[15]	valid_0's auc: 0.96303	valid_1's auc: 0.960624
[16]	valid_0's auc: 0.964767	valid_1's auc: 0.961132
[17]	valid_0's auc: 0.964794	valid_1's auc: 0.961583
[18]	valid_0's auc: 0.964671	valid_1's auc: 0.961

[155]	valid_0's auc: 0.979436	valid_1's auc: 0.990835
[156]	valid_0's auc: 0.979429	valid_1's auc: 0.990884
[157]	valid_0's auc: 0.979491	valid_1's auc: 0.990923
[158]	valid_0's auc: 0.979497	valid_1's auc: 0.990968
[159]	valid_0's auc: 0.979496	valid_1's auc: 0.99101
[160]	valid_0's auc: 0.979493	valid_1's auc: 0.991117
[161]	valid_0's auc: 0.979477	valid_1's auc: 0.991183
[162]	valid_0's auc: 0.979502	valid_1's auc: 0.991269
[163]	valid_0's auc: 0.979513	valid_1's auc: 0.991311
[164]	valid_0's auc: 0.97949	valid_1's auc: 0.991372
[165]	valid_0's auc: 0.979488	valid_1's auc: 0.991443
[166]	valid_0's auc: 0.979491	valid_1's auc: 0.991493
[167]	valid_0's auc: 0.979486	valid_1's auc: 0.991564
[168]	valid_0's auc: 0.979504	valid_1's auc: 0.991606
[169]	valid_0's auc: 0.979501	valid_1's auc: 0.991708
[170]	valid_0's auc: 0.97954	valid_1's auc: 0.991765
[171]	valid_0's auc: 0.979593	valid_1's auc: 0.991811
[172]	valid_0's auc: 0.979578	valid_1's auc: 0.991976
[173]	valid_0's auc: 0.979547	v

In [56]:
clf5 = LGBtraining(X_train5, y_train5, X_val, y_val, 2022)

[1]	valid_0's auc: 0.943953	valid_1's auc: 0.923342
Training until validation scores don't improve for 10 rounds.
[2]	valid_0's auc: 0.949739	valid_1's auc: 0.938449
[3]	valid_0's auc: 0.957047	valid_1's auc: 0.944686
[4]	valid_0's auc: 0.957871	valid_1's auc: 0.9446
[5]	valid_0's auc: 0.957366	valid_1's auc: 0.944874
[6]	valid_0's auc: 0.958579	valid_1's auc: 0.945494
[7]	valid_0's auc: 0.958546	valid_1's auc: 0.945709
[8]	valid_0's auc: 0.963469	valid_1's auc: 0.952577
[9]	valid_0's auc: 0.964686	valid_1's auc: 0.955289
[10]	valid_0's auc: 0.965531	valid_1's auc: 0.961831
[11]	valid_0's auc: 0.965298	valid_1's auc: 0.962032
[12]	valid_0's auc: 0.96575	valid_1's auc: 0.962673
[13]	valid_0's auc: 0.965568	valid_1's auc: 0.963181
[14]	valid_0's auc: 0.965897	valid_1's auc: 0.963356
[15]	valid_0's auc: 0.965992	valid_1's auc: 0.963591
[16]	valid_0's auc: 0.966056	valid_1's auc: 0.963894
[17]	valid_0's auc: 0.966632	valid_1's auc: 0.964033
[18]	valid_0's auc: 0.966832	valid_1's auc: 0.964

In [None]:
# clf = VotingClassifier(
#     estimators=[('lgb', clf1), ('xgb', clf2), ('gbdt', clf3)],
#     voting='soft',
#     weights=None,
#     n_jobs=1
# )
# clf.fit(X, y)

In [63]:
y_pred1 = clf1.predict_proba(X_test)[:,0]
y_pred2 = clf2.predict_proba(X_test)[:,0]
y_pred3 = clf3.predict_proba(X_test)[:,0]
y_pred4 = clf4.predict_proba(X_test)[:,0]
y_pred5 = clf5.predict_proba(X_test)[:,0]
# print(score(y_val, y_pred))
# print(score(y_val, y_pred2))
# print(score(y_val, y_pred3))
# print(score(y_val, y_pred4))
# print(score(y_val, y_pred5))

In [64]:
y_pred = y_pred1*0.2 + y_pred2*0.2 + y_pred3*0.2 + y_pred4*0.2 + y_pred5*0.2

In [23]:
result = pd.DataFrame({'id':data_test['id'].as_matrix(), 'score':y_pred})
result.to_csv("result28.csv", index=False)

In [45]:
def score(y,pred): 
    fpr, tpr, thresholds = roc_curve(y, pred, pos_label=1) 
    score=0.4*tpr[np.where(fpr>=0.001)[0][0]]+0.3*tpr[np.where(fpr>=0.005)[0][0]]+0.3*tpr[np.where(fpr>=0.01)[0][0]] 
    return score 