In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_absolute_error
import time
import seaborn as sns

from sklearn.feature_extraction.text import CountVectorizer
from scipy import sparse
from scipy.sparse import csr_matrix
from tqdm import tqdm
from sklearn.preprocessing import OneHotEncoder,LabelEncoder

#特征重要性
def display_importances(feature_importance_df_):
    cols = feature_importance_df_[["feature", "importance"]].groupby("feature").mean().sort_values(by="importance", ascending=False)[:40].index
    best_features = feature_importance_df_.loc[feature_importance_df_.feature.isin(cols)]
    plt.figure(figsize=(8, 10))
    sns.barplot(x="importance", y="feature", data=best_features.sort_values(by="importance", ascending=False))
    plt.title('LightGBM Features (avg over folds)')
    plt.tight_layout()
    plt.show()
 
#聚合求count
def feature_count(data, features=[]):
    if len(set(features)) != len(features):
        print('equal feature !!!!')
        return data
    new_feature = 'count'
    for i in features:
        new_feature += '_' + i.replace('add_', '')
    try:
        del data[new_feature]
    except:
        pass
    temp = data.groupby(features).size().reset_index().rename(columns={0: new_feature})
    data = data.merge(temp, 'left', on=features)
    new_num_features.append(new_feature)
    return data    



In [2]:
train = pd.read_csv('../input/train_dataset.csv')
test = pd.read_csv('../input/test_dataset.csv')

#删除id
test_id = test['用户编码'].copy()

train.drop("用户编码", axis = 1, inplace = True)
test.drop("用户编码", axis = 1, inplace = True)

label = train['信用分'].copy()
train.drop(['信用分'], axis=1, inplace=True)


#特征工程
data = pd.concat([train, test])

#原始的类别和数值特征
ori_cat_features = ['用户实名制是否通过核实','是否大学生客户','是否黑名单客户','是否4G不健康客户','缴费用户当前是否欠费缴费','是否经常逛商场的人','当月是否逛过福州仓山万达',
                    '当月是否到过福州山姆会员店','当月是否看电影','当月是否景点游览','当月是否体育场馆消费']
ori_num_features = ['用户年龄','用户网龄（月）','用户最近一次缴费距今时长（月）','缴费用户最近一次缴费金额（元）','用户近6个月平均消费值（元）','用户账单当月总费用（元）',
                    '用户当月账户余额（元）','用户话费敏感度','当月通话交往圈人数','近三个月月均商场出现次数','当月网购类应用使用次数','当月物流快递类应用使用次数',
                    '当月金融理财类应用使用总次数','当月视频播放类应用使用次数','当月飞机类应用使用次数','当月火车类应用使用次数','当月旅游资讯类应用使用次数']
ori_col = data.columns.tolist()
                    
#对年龄异常值取众数填充
data.loc[data['用户年龄']==0, '用户年龄'] = data['用户年龄'].mode()

#年龄特征
data['网龄/年龄'] = data['用户网龄（月）'] / data['用户年龄']
data['网龄年龄差'] = data['用户年龄'] - data['用户网龄（月）']/12

#对金额相关特征做组合
data['缴费金额是否能覆盖当月账单'] = data['缴费用户最近一次缴费金额（元）'] - data['用户账单当月总费用（元）']
data['最近一次交费是否超过平均消费额'] = data['缴费用户最近一次缴费金额（元）'] - data['用户近6个月平均消费值（元）']
data['当月账单是否超过平均消费额'] = data['用户账单当月总费用（元）'] - data['用户近6个月平均消费值（元）']
data['缴费习惯'] = data['缴费用户最近一次缴费金额（元）'] / (data['用户近6个月平均消费值（元）'] + 0.001)
data['通话人均花费'] = data['用户账单当月总费用（元）'] / (data['当月通话交往圈人数']+1)
data['近半年账单'] = data['用户近6个月平均消费值（元）']*6 + data['用户账单当月总费用（元）']
data['最近账单稳定性'] = data['用户账单当月总费用（元）'] / (data['用户近6个月平均消费值（元）'] + 0.001)
data['费用/余额'] = data['用户账单当月总费用（元）'] / (data['缴费用户最近一次缴费金额（元）'] + 0.001)
data['账户余额利用率'] = data['用户账单当月总费用（元）'] / (data['用户当月账户余额（元）'] + 0.001)

#对次数特征做组合
data['交通类应用使用次数'] = data['当月飞机类应用使用次数'] + data['当月火车类应用使用次数']

new_num_features = [i for i in data.columns.tolist() if i not in ori_col]

#充值金额是整数，和小数，应该对应不同的充值途径
def top_up_amount_method(s):
    
    if(s == 0):
        return 0
    elif(s % 10 == 0):
        return 1
    elif((s / 0.998) % 10 ==0):
        return 2
    else:
        return 3
data['充值方式1'] = data['缴费用户最近一次缴费金额（元）'].apply(top_up_amount_method)

def real_top_up_amount(s):
    if((s / 0.998) % 10 ==0):
        return s/0.998
    else:
        return s
data["充值方式2"] = data['缴费用户最近一次缴费金额（元）'].apply(real_top_up_amount)

#对类别特征进行组合，是否可以得出更好的结果
data['是否去过高档商场'] = data['当月是否逛过福州仓山万达'] + data['当月是否到过福州山姆会员店']
data['是否去过高档商场'] = data['是否去过高档商场'].map(lambda x:1 if x>=1 else 0)
data['是否_商场_电影'] = data['是否去过高档商场'] * data['当月是否看电影']
data['是否_商场_旅游'] = data['是否去过高档商场'] * data['当月是否景点游览']
data['是否_商场_体育馆'] = data['是否去过高档商场'] * data['当月是否体育场馆消费']
data['是否_电影_体育馆'] = data['当月是否看电影'] * data['当月是否体育场馆消费']
data['是否_电影_旅游'] = data['当月是否看电影'] * data['当月是否景点游览']
data['是否_旅游_体育馆'] = data['当月是否景点游览'] * data['当月是否体育场馆消费']
data['是否_商场_旅游_体育馆'] = data['是否去过高档商场'] * data['当月是否景点游览'] * data['当月是否体育场馆消费']
data['是否_商场_电影_体育馆'] = data['是否去过高档商场'] * data['当月是否看电影'] * data['当月是否体育场馆消费']
data['是否_商场_电影_旅游'] = data['是否去过高档商场'] * data['当月是否看电影'] * data['当月是否景点游览']
data['是否_体育馆_电影_旅游'] = data['当月是否体育场馆消费'] * data['当月是否看电影'] * data['当月是否景点游览']
data['是否_商场_体育馆_电影_旅游'] = data['是否去过高档商场'] * data['当月是否体育场馆消费'] * data['当月是否看电影'] * data['当月是否景点游览']

new_cat_features = [i for i in data.columns.tolist() if i not in ori_col and i not in new_num_features]

#对一些特征分段
discretize_features=['交通类应用使用次数','当月物流快递类应用使用次数','当月飞机类应用使用次数','当月火车类应用使用次数','当月旅游资讯类应用使用次数']
def map_discretize(x):
        if x==0:
            return 0
        elif x<=5:
            return 1
        elif x<=15:
            return 2
        elif x<=50:
            return 3
        elif x<=100:
            return 4
        else:
            return 5
        
for col in discretize_features:
    data[col]=data[col].map(lambda x:map_discretize(x))

#离散化
transform_value_feature=['用户年龄','用户网龄（月）','当月通话交往圈人数','近三个月月均商场出现次数','当月网购类应用使用次数','当月物流快递类应用使用次数'
                            ,'当月金融理财类应用使用总次数','当月视频播放类应用使用次数','当月飞机类应用使用次数','当月火车类应用使用次数','当月旅游资讯类应用使用次数']
user_fea=['缴费用户最近一次缴费金额（元）','用户近6个月平均消费值（元）','用户账单当月总费用（元）','用户当月账户余额（元）']
log_features=['当月网购类应用使用次数','当月金融理财类应用使用总次数','当月物流快递类应用使用次数','当月视频播放类应用使用次数']

for col in transform_value_feature+log_features:
    #取出最高99.9%值
    ulimit=np.percentile(data[col].values,99.9)
    #取出最低0.1%值
    llimit=np.percentile(data[col].values,0.1)
    data.loc[data[col]>ulimit,col]=ulimit
    data.loc[data[col]<llimit,col]=llimit
    
for col in user_fea+transform_value_feature+log_features:
    data[col]=data[col].map(lambda x:np.log1p(x))
    
#聚合特征
# data = feature_count(data, ['用户年龄'])
# data = feature_count(data, ['用户网龄（月）'])
# data = feature_count(data, ['用户最近一次缴费距今时长（月）'])
# data = feature_count(data, ['缴费用户最近一次缴费金额（元）'])
# data = feature_count(data, ['用户近6个月平均消费值（元）'])
# data = feature_count(data, ['用户账单当月总费用（元）'])
# data = feature_count(data, ['用户话费敏感度'])
# data = feature_count(data, ['当月通话交往圈人数'])
# data = feature_count(data, ['近三个月月均商场出现次数'])
# data = feature_count(data, ['最近一次交费是否超过平均消费额'])
# data = feature_count(data, ['当月账单是否超过平均消费额'])

# data = feature_count(data, ['用户话费敏感度','用户年龄'])
# data = feature_count(data, ['用户话费敏感度','用户网龄（月）'])
# data = feature_count(data, ['用户话费敏感度','用户最近一次缴费距今时长（月）'])
# data = feature_count(data, ['用户话费敏感度','缴费用户最近一次缴费金额（元）'])
# data = feature_count(data, ['用户话费敏感度','用户近6个月平均消费值（元）'])
# data = feature_count(data, ['用户话费敏感度','用户账单当月总费用（元）'])
# data = feature_count(data, ['用户话费敏感度','当月通话交往圈人数'])
# data = feature_count(data, ['用户话费敏感度','近三个月月均商场出现次数'])
# data = feature_count(data, ['用户话费敏感度','最近一次交费是否超过平均消费额'])
# data = feature_count(data, ['用户话费敏感度','当月账单是否超过平均消费额'])

# #聚合其他列的特征
# sparse_feature = ['用户年龄','用户网龄（月）','用户最近一次缴费距今时长（月）','用户话费敏感度']
# dense_feature = ['缴费用户最近一次缴费金额（元）','用户近6个月平均消费值（元）','用户账单当月总费用（元）',
#                     '用户当月账户余额（元）']

# def get_new_columns(name,aggs):
#     l=[]
#     for k in aggs.keys():
#         for agg in aggs[k]:
#             if str(type(agg))=="<class 'function'>":
#                 l.append(name + '_' + k + '_' + 'other')
#             else:
#                 l.append(name + '_' + k + '_' + agg)
#     return l
# for d in tqdm(sparse_feature):
#     aggs={}
#     for s in sparse_feature:
#         aggs[s]=['count','nunique']
#     for den in dense_feature:
#         aggs[den]=['mean','max','min','std']
#     aggs.pop(d)
#     temp=data.groupby(d).agg(aggs).reset_index()
#     temp.columns=[d]+get_new_columns(d,aggs)
#     new_num_features.append(get_new_columns(d,aggs))
#     data=pd.merge(data,temp,on=d,how='left')

#记录特征
cat_features = new_cat_features + ori_cat_features
num_features = new_num_features + ori_num_features

for i in cat_features:
    data[i] = data[i].astype('category')
for i in num_features:
    data[i] = data[i].astype('float')

# #类别特征做one-hot    
# for feature in cat_features:
#     try:
#         data[feature] = LabelEncoder().fit_transform(data[feature].apply(int))
#     except:
#         data[feature] = LabelEncoder().fit_transform(data[feature])    
 

train = data[:train.shape[0]]
test = data[train.shape[0]:]    


# train_x=train[num_features]
# test_x=test[num_features]
# enc = OneHotEncoder()
# for feature in cat_features:
#     enc.fit(data[feature].values.reshape(-1, 1))
#     train_a= enc.transform(train[feature].values.reshape(-1, 1))
#     test_a = enc.transform(test[feature].values.reshape(-1, 1))
#     train= sparse.hstack((train_x, train_a), 'csr')
#     test = sparse.hstack((test_x, test_a), 'csr')
    
    
# #CountVectorizer()特征,在本题中没有合适的量
# vector_feature = []
# # for i in vector_feature:
# #     data[i] = data[i].astype('str')
# #     train[i] = train[i].astype('str')
# #     test[i] = test[i].astype('str')

    

# cv=CountVectorizer()
# for feature in vector_feature:
#     cv.fit(data[feature])
#     train_a = cv.transform(train[feature])
#     test_a = cv.transform(test[feature])
#     train = sparse.hstack((train_x, train_a), 'csr')
#     test = sparse.hstack((test_x, test_a), 'csr')


#开始训练
# kf = StratifiedKFold(n_splits=5, random_state=2019, shuffle=False)
# training_time = 0 
# feature_importance_df = pd.DataFrame()
# best_score = []
# sub_list = []

# clf = lgb.LGBMRegressor(
#           boosting_type='gbdt', num_leaves=31, reg_alpha=2.2, reg_lambda=1.5,
#           max_depth=-1, n_estimators=2000,
#           subsample=0.8, colsample_bytree=0.7, subsample_freq=1,
#           learning_rate=0.03, random_state=2019, n_jobs=-1)

# for i, (train_index, val_index) in enumerate(kf.split(train, label)):
#      t0 = time.time()
#      X_train, y_train = train.loc[train_index,:], label[train_index]
#      X_val, y_val     = train.loc[val_index,:],   label[val_index]
#      #X_train, y_train = train[train_index], label[train_index]
#      #X_val,    y_val   = train[val_index],   label[val_index]
#      #clf.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)],\
#       #        eval_metric='mae', early_stopping_rounds=200, verbose=200, categorical_feature=cat_features)
#      clf.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)],\
#               eval_metric='mae', early_stopping_rounds=200, verbose=200)
#      pred_val = clf.predict(X_val, num_iteration=clf.best_iteration_)
#      vali_mae = mean_absolute_error(y_val, np.round(pred_val))
#      best_score.append(1/(1+vali_mae))
#      pred_test = clf.predict(test,num_iteration=clf.best_iteration_)
     
#      fold_importance_df = pd.DataFrame()
#      fold_importance_df["feature"] = list(X_train.columns)
#      fold_importance_df["importance"] = clf.feature_importances_
#      fold_importance_df["fold"] = i + 1
#      feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
     
#      sub_list.append(pred_test)
#      t = (time.time() - t0) / 60
#      training_time += t
     
#      print("This round cost time:{:.2f} minutes, lgb scor:{:.8f},\n".format(t, 1/(1+vali_mae)))
        
# pred_test = np.mean(np.array(sub_list), axis=0)
# print(best_score, '\n', np.mean(best_score), np.std(best_score))
# print("Total training time cost:{:.2f} minutes".format(training_time))  


In [3]:
for i in cat_features:
    train[i] = train[i].astype('int')
    test[i]  = test[i].astype('int')

In [4]:
import seaborn as sns
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold,GridSearchCV
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor

In [5]:
X_train,X_test,y_train,y_test = train_test_split(train,label,test_size=0.25,random_state=33)

In [6]:
clf1 = lgb.LGBMRegressor(boosting_type='gbdt',num_leaves=31,max_depth=-1,
                n_estimators=2000,subsample=0.8,
                subsample_freq=1,colsample_bytree=0.7,
                random_state=2019,n_jobs=-1)

clf2 = lgb.LGBMRegressor(boosting_type='gbdt',num_leaves=31,max_depth=-1,
                n_estimators=2000,subsample=0.8,
                subsample_freq=1,colsample_bytree=0.7,
                random_state=2018,n_jobs=-1)

clf3 = XGBRegressor(n_estimators=2000,silent=True,
            objective='reg:linear',booster='gbtree',n_jobs=-1,gamma=0,
            subsample=0.8,colsample_bytree=0.7,colsample_bylevel=1,
            scale_pos_weight=1,base_score=0.5,random_state=2017)

clf4 = XGBRegressor(max_depth=4,learning_rate=0.03,n_estimators=2000,silent=True,
            objective='reg:linear',booster='gbtree',n_jobs=-1,gamma=0,
            subsample=0.8,colsample_bytree=0.7,colsample_bylevel=1,
            scale_pos_weight=1,base_score=0.5,random_state=2016)

In [7]:
kf = StratifiedKFold(n_splits=10,random_state=2015,shuffle=False)
best_score = []
sub_list = []

param_test ={
   'reg_alpha' :[0.015,0.03,0.05],
   'reg_lambda':[0.8,1.0,1.2]
}

In [8]:
# grid search 寻找最优超参数
grid_search = GridSearchCV(estimator=clf3,param_grid=param_test,verbose=1,cv=5)

In [9]:
grid_search.fit(X_train,y_train,eval_metric='mae')

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed: 31.2min finished


GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.7, gamma=0, importance_type='gain',
       learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=2000, n_jobs=-1,
       nthread=None, objective='reg:linear', random_state=2017,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=0.8),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'reg_alpha': [0.015, 0.03, 0.05], 'reg_lambda': [0.8, 1.0, 1.2]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=1)

In [10]:
print(grid_search.score(X_test,y_test))
print(grid_search.best_params_)

0.7909546015882323
{'reg_alpha': 0.03, 'reg_lambda': 1.0}


In [11]:
for i,(train_index,val_index) in enumerate(kf.split(train,label)):
        X_train = train.loc[train_index,:]
        y_train = label[train_index]
        X_val = train.loc[val_index,:]
        y_val = label[val_index]
        
        clf1.fit(X_train,y_train,eval_set=[(X_train,y_train),(X_val,y_val)],
                 eval_metric='mae',early_stopping_rounds=110,verbose=200)
        pred_val1 = clf1.predict(X_val,num_iteration=clf1.best_iteration_)
        val1_mae = mean_absolute_error(y_val,np.round(pred_val1))
        pred_test1 = clf1.predict(test,num_iteration = clf1.best_iteration_)
        
        clf2.fit(X_train,y_train,eval_set=[(X_train,y_train),(X_val,y_val)],
                 eval_metric='rmse',early_stopping_rounds=110,verbose=200)
        pred_val2 = clf2.predict(X_val,num_iteration = clf2.best_iteration_)
        val2_mae = mean_absolute_error(y_val,np.round(pred_val2))
        pred_test2 = clf2.predict(test,num_iteration = clf2.best_iteration_)
        
        clf3.fit(X_train,y_train,eval_set=[(X_train,y_train),(X_val,y_val)],
                 eval_metric='mae',early_stopping_rounds=100,verbose=200)
        pred_val3 = clf3.predict(X_val,ntree_limit=clf3.best_ntree_limit)
        val3_mae = mean_absolute_error(y_val,np.round(pred_val3))
        pred_test3 = clf3.predict(test,ntree_limit=clf3.best_ntree_limit)
        
        clf4.fit(X_train,y_train,eval_set=[(X_train,y_train),(X_val,y_val)],
                 eval_metric='rmse',early_stopping_rounds=100,verbose=200)
        pred_val4 = clf4.predict(X_val,ntree_limit=clf4.best_ntree_limit)
        val4_mae = mean_absolute_error(y_val,np.round(pred_val4))
        pred_test4 = clf4.predict(test,ntree_limit=clf4.best_ntree_limit)
        
        
        pred_val = np.round(pred_val1*0.25 + pred_val2*0.25 + pred_val3*0.25 + pred_val4*0.25)
        vali_mae = mean_absolute_error(y_val,pred_val)
        best_score.append(1/(1+vali_mae))
        
        pred_test = np.round(pred_test1*0.25 + pred_test2*0.25 + pred_test3*0.25 + pred_test4*0.25)
        sub_list.append(pred_test)
        
        print('Round:{:.1f},clf1 score:{:.7f},clf2 score:{:.7f},clf3 score:{:.7f},clf4 score:{:.7f},fusion score:{:.7f}\n'.
             format(i+1,1/(1+val1_mae),1/(1+val2_mae),1/(1+val3_mae),1/(1+val4_mae),1/(1+vali_mae)))

Training until validation scores don't improve for 110 rounds.
[200]	training's l1: 13.2923	training's l2: 291.574	valid_1's l1: 15.2065	valid_1's l2: 392.576
[400]	training's l1: 12.216	training's l2: 245.473	valid_1's l1: 15.1972	valid_1's l2: 392.254
Early stopping, best iteration is:
[298]	training's l1: 12.7325	training's l2: 267.036	valid_1's l1: 15.1719	valid_1's l2: 390.895
Training until validation scores don't improve for 110 rounds.
[200]	training's l2: 292.096	training's rmse: 17.0908	valid_1's l2: 393.72	valid_1's rmse: 19.8424
[400]	training's l2: 246.412	training's rmse: 15.6975	valid_1's l2: 394.809	valid_1's rmse: 19.8698
Early stopping, best iteration is:
[292]	training's l2: 268.8	training's rmse: 16.3951	valid_1's l2: 393.149	valid_1's rmse: 19.828
[0]	validation_0-mae:555.956	validation_1-mae:554.664
Multiple eval metrics have been passed: 'validation_1-mae' will be used for early stopping.

Will train until validation_1-mae hasn't improved in 100 rounds.
[200]	val

In [12]:
pred_test = np.mean(np.array(sub_list[2:]),axis=0)

In [13]:
test_data_sub1 = pd.DataFrame()
test_data_sub1['id'] = test_id
test_data_sub1['score'] =  pred_test
test_data_sub1.columns = ['id','score']
test_data_sub1['score'] = test_data_sub1['score'].apply(lambda x: int(np.round(x)))
test_data_sub1[['id','score']].to_csv('lgb_xgb_stacking_mae_mse.csv', index=False)

In [14]:
importance_features = []
for i in range(0,55):
    importance_features.append((clf1.feature_importances_[i],X_train.columns[i]))

IndexError: index 54 is out of bounds for axis 0 with size 54

In [15]:
importance_features.sort()

In [16]:
importance_features

[(0, '当月是否逛过福州仓山万达'),
 (0, '是否_商场_电影_体育馆'),
 (0, '是否大学生客户'),
 (1, '当月是否到过福州山姆会员店'),
 (1, '是否_商场_体育馆'),
 (1, '是否_商场_旅游'),
 (1, '是否_商场_旅游_体育馆'),
 (1, '是否_商场_电影'),
 (1, '是否黑名单客户'),
 (1, '用户最近一次缴费距今时长（月）'),
 (2, '是否_商场_体育馆_电影_旅游'),
 (2, '是否_商场_电影_旅游'),
 (2, '用户实名制是否通过核实'),
 (3, '当月飞机类应用使用次数'),
 (3, '是否_体育馆_电影_旅游'),
 (4, '当月物流快递类应用使用次数'),
 (4, '是否去过高档商场'),
 (8, '当月是否看电影'),
 (9, '当月火车类应用使用次数'),
 (9, '是否_电影_体育馆'),
 (9, '是否_电影_旅游'),
 (10, '充值方式2'),
 (10, '是否_旅游_体育馆'),
 (11, '是否经常逛商场的人'),
 (12, '交通类应用使用次数'),
 (14, '当月是否体育场馆消费'),
 (21, '充值方式1'),
 (22, '当月是否景点游览'),
 (27, '是否4G不健康客户'),
 (29, '缴费用户当前是否欠费缴费'),
 (40, '缴费用户最近一次缴费金额（元）'),
 (53, '当月旅游资讯类应用使用次数'),
 (78, '用户话费敏感度'),
 (79, '缴费习惯'),
 (93, '缴费金额是否能覆盖当月账单'),
 (97, '用户当月账户余额（元）'),
 (110, '最近一次交费是否超过平均消费额'),
 (113, '近三个月月均商场出现次数'),
 (131, '费用/余额'),
 (143, '用户账单当月总费用（元）'),
 (147, '当月金融理财类应用使用总次数'),
 (151, '账户余额利用率'),
 (158, '当月网购类应用使用次数'),
 (179, '通话人均花费'),
 (184, '当月视频播放类应用使用次数'),
 (189, '当月账单是否超过平均消费额'),
 (191, '网龄/年龄'),
 (198, '近半年账单'),
 (208