# Step2.1 首先执行下面这个：定义函数

In [1]:
########################################################
# Author:                   
# Date: Nov.8, 2018         
# Version: v3            
# Notice:      
# 1. FeatureExtract
# 2. directories structure
#   code/
#   model/
#   feature/
#   answer/
#   train/
#   A/
#   MAKE SURE ALL OF THE DIR IS EXISTED!!!
# 3. 特征均存储在../feature目录
# 
# 4. 最终的特征汇总在features_all_train_test_20181106.csv
#    包含所有train和A的数据
#########################################################


import numpy as np
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures

# common prefix
TRAIN_PREFIX = "../train/"
TEST_PREFIX = "../B/"
FEATURE_PREFIX = "../feature/"

# File Encoder
FILE_ENCODER = "utf-8"

#Train-dataset
BANK_DETAIL_TRAIN_FILE = TRAIN_PREFIX + "bank_detail_train.csv"
BILL_DETAIL_TRAIN_FILE = TRAIN_PREFIX + "bill_detail_train.csv"
BROWSE_HISTORY_TRAIN_FILE = TRAIN_PREFIX +"browse_history_train.csv"
LOANTIME_TRAIN_FILE = TRAIN_PREFIX + "loantime_train.csv"
OVERDUE_TRAIN_FILE = TRAIN_PREFIX + "overdue_train.csv"
USERINFO_TRAIN_FILE = TRAIN_PREFIX + "userinfo_train.csv"

#Test-dataset
BANK_DETAIL_TEST_FILE = TEST_PREFIX + "bank_detail_B.csv"
BILL_DETAIL_TEST_FILE = TEST_PREFIX + "bill_detail_B.csv"
BROWSE_HISTORY_TEST_FILE = TEST_PREFIX + "browse_history_B.csv"
LOANTIME_TEST_FILE = TEST_PREFIX + "loantime_B.csv"
USERINFO_TEST_FILE = TEST_PREFIX + "userinfo_B.csv"
USERID_TO_PREDICT_FILE = FEATURE_PREFIX + "to_predict_userid.csv" #因为A目录没有写入权限

    



In [2]:
def constructBankDetailFeature(loantime_file, bank_detail_file, output_file):
    loantime = pd.read_csv(loantime_file,header=0,names=['用户标识','放款时间'])
    loantime['放款时间']=loantime['放款时间']//86400
    bank_detail=pd.read_csv(bank_detail_file).rename(index=str,
                                                        columns={"new_user_id": "用户标识","flag1": "流水时间",
                                                                 "flag2":"交易类型","flag3":"交易金额","flag4":"工资收入标记"}).drop_duplicates()
    bank_detail['流水时间']=bank_detail['流水时间']//86400
    
    agg_param = ['sum', 'count', 'max', 'min', 'median', 'mean', 'std', 'var']
    
    bank_detail = pd.merge(bank_detail, loantime,how='inner', on = "用户标识")
    feature=loantime
    d=bank_detail
    #----------------------------------------放款前特征统计------------------------------------------#
    t=d[(d['流水时间']<=d['放款时间'])]#5684742
    gb1=t[(t['交易类型']==0)].groupby(["用户标识"],as_index=False)#收入统计
    gb2=t[(t['交易类型']==1)].groupby(["用户标识"],as_index=False)#支出统计
    gb3=t[(t['工资收入标记']==1)].groupby(["用户标识"],as_index=False)#工资收入统计
    gb4=t[(t['交易类型']==0)&(t['工资收入标记']==0)].groupby(["用户标识"],as_index=False)#非工资收入统计
    
    t_income = t[(t['交易类型']==0)].loc[:,['用户标识', '流水时间', '交易金额']].rename(index = str, columns={'交易金额':'收入'})
    t_expend = t[(t['交易类型']==1)].loc[:,['用户标识', '流水时间', '交易金额']].rename(index = str, columns={'交易金额':'支出'})
    #t_income_expend = pd.merge(t_income, t_expend, how='inner', on = ['用户标识', '流水时间'])
    t_income_expend = pd.merge(t_income, t_expend, how='left', on = ['用户标识', '流水时间'])
    t_income_expend = t_income_expend.fillna(0)
    t_income_expend['收入支出差'] = t_income_expend['收入'] - t_income_expend['支出']
    t_income_minus_expend_gb = t_income_expend.loc[:, ['用户标识','收入支出差']].groupby(["用户标识"],as_index=False)
    # END
    
    # 交易金额统计量
    prefix = '放款前'
    x0=t.loc[:,['用户标识', '交易金额']].groupby(["用户标识"],as_index=False).agg(agg_param)
    x0.columns = ['_'.join(x) for x in x0.columns.ravel()]
    x0 = x0.add_prefix(prefix).reset_index()
    feature=pd.merge(feature, x0,how='left', on = "用户标识")
    # END
    
    x1=gb1['交易金额'].agg({'放款前用户收入笔数' : 'count',
                       '放款前用户收入总计':'sum',
                       '放款前用户收入最大值':'min',
                       '放款前用户收入最小值':'max',
                       '放款前用户收入平均值':'mean',
                       '放款前用户收入中位数':'median',
                       '放款前用户收入方差':'var',
                       '放款前用户收入标准差':'std'})
    x2=gb2['交易金额'].agg({'放款前用户支出笔数' : 'count',
                       '放款前用户支出总计':'sum',
                       '放款前用户支出最大值':'min',
                       '放款前用户支出最小值':'max',
                       '放款前用户支出平均值':'mean',
                       '放款前用户支出中位数':'median',
                       '放款前用户支出方差':'var',
                       '放款前用户支出标准差':'std'})
    x3=gb3['交易金额'].agg({'放款前用户工资收入笔数' : 'count',
                       '放款前用户工资收入总计':'sum',
                       '放款前用户工资收入最大值':'min',
                       '放款前用户工资收入最小值':'max',
                       '放款前用户工资收入平均值':'mean',
                       '放款前用户工资收入中位数':'median',
                       '放款前用户工资收入方差':'var',
                       '放款前用户工资收入标准差':'std'})
    x4=gb4['交易金额'].agg({'放款前用户非工资收入笔数' : 'count',
                       '放款前用户非工资收入总计':'sum',
                       '放款前用户非工资收入最大值':'min',
                       '放款前用户非工资收入最小值':'max',
                       '放款前用户非工资收入平均值':'mean',
                       '放款前用户非工资收入中位数':'median',
                       '放款前用户非工资收入方差':'var',
                       '放款前用户非工资收入标准差':'std'})
    
    x5=t_income_minus_expend_gb['收入支出差'].agg({'放款前收入支出差笔数' : 'count',
                       '放款前收入支出差总计':'sum',
                       '放款前收入支出差最大值':'min',
                       '放款前收入支出差最小值':'max',
                       '放款前收入支出差平均值':'mean',
                       '放款前收入支出差中位数':'median',
                       '放款前收入支出差方差':'var',
                       '放款前收入支出差标准差':'std'
                                             })
    # END
    
    feature=pd.merge(feature, x1,how='left', on = "用户标识")
    feature=pd.merge(feature, x2,how='left', on = "用户标识")
    feature=pd.merge(feature, x3,how='left', on = "用户标识")
    feature=pd.merge(feature, x4,how='left', on = "用户标识")
    feature=pd.merge(feature, x5,how='left', on = "用户标识")
    

    feature['放款前用户收入支出笔数差值']=feature['放款前用户收入笔数']-feature['放款前用户支出笔数']
    feature['放款前用户收入支出总计差值']=feature['放款前用户收入总计']-feature['放款前用户支出总计']
    #feature['放款前用户非工资收入笔数']=feature['放款前用户收入笔数']-feature['放款前用户工资收入笔数']
    #feature['放款前用户非工资收入总计']=feature['放款前用户收入总计']-feature['放款前用户工资收入总计']
    feature['放款前工资收入笔数乘以差值']=feature['放款前用户工资收入笔数']*feature['放款前用户收入支出笔数差值']
    feature['放款前工资收入总计乘以差值']=feature['放款前用户工资收入总计']*feature['放款前用户收入支出总计差值']
   
    #----------------------------------------放款后特征统计------------------------------------------#
    t=d[(d['流水时间']>d['放款时间'])]#5684742
    gb1=t[(t['交易类型']==0)].groupby(["用户标识"],as_index=False)#收入统计
    gb2=t[(t['交易类型']==1)].groupby(["用户标识"],as_index=False)#支出统计
    gb3=t[(t['工资收入标记']==1)].groupby(["用户标识"],as_index=False)#工资收入统计
    gb4=t[(t['交易类型']==0)&(t['工资收入标记']==0)].groupby(["用户标识"],as_index=False)#非工资收入统计
    
    t_income = t[(t['交易类型']==0)].loc[:,['用户标识', '流水时间', '交易金额']].rename(index = str, columns={'交易金额':'收入'})
    t_expend = t[(t['交易类型']==1)].loc[:,['用户标识', '流水时间', '交易金额']].rename(index = str, columns={'交易金额':'支出'})
    #t_income_expend = pd.merge(t_income, t_expend, how='inner', on = ['用户标识', '流水时间'])
    t_income_expend = pd.merge(t_income, t_expend, how='left', on = ['用户标识', '流水时间'])
    t_income_expend = t_income_expend.fillna(0)
    t_income_expend['收入支出差'] = t_income_expend['收入'] - t_income_expend['支出']
    t_income_minus_expend_gb = t_income_expend.loc[:, ['用户标识','收入支出差']].groupby(["用户标识"],as_index=False)
    # END
    
    # 交易金额统计量
    prefix = '放款后'
    x0=t.loc[:,['用户标识', '交易金额']].groupby(["用户标识"],as_index=False).agg(agg_param)
    x0.columns = ['_'.join(x) for x in x0.columns.ravel()]
    x0 = x0.add_prefix(prefix).reset_index()
    feature=pd.merge(feature, x0,how='left', on = "用户标识")
    # END
    
    x1=gb1['交易金额'].agg({'放款后用户收入笔数' : 'count',
                       '放款后用户收入总计':'sum',
                       '放款后用户收入最大值':'min',
                       '放款后用户收入最小值':'max',
                       '放款后用户收入平均值':'mean',
                       '放款后用户收入中位数':'median',
                       '放款后用户收入方差':'var',
                       '放款后用户收入标准差':'std'})
    x2=gb2['交易金额'].agg({'放款后用户支出笔数' : 'count',
                       '放款后用户支出总计':'sum',
                       '放款后用户支出最大值':'min',
                       '放款后用户支出最小值':'max',
                       '放款后用户支出平均值':'mean',
                       '放款后用户支出中位数':'median',
                       '放款后用户支出方差':'var',
                       '放款后用户支出标准差':'std'})
    x3=gb3['交易金额'].agg({'放款后用户工资收入笔数' : 'count',
                       '放款后用户工资收入总计':'sum',
                       '放款后用户工资收入最大值':'min',
                       '放款后用户工资收入最小值':'max',
                       '放款后用户工资收入平均值':'mean',
                       '放款后用户工资收入中位数':'median',
                       '放款后用户工资收入方差':'var',
                       '放款后用户工资收入标准差':'std'})
    x4=gb4['交易金额'].agg({'放款后用户非工资收入笔数' : 'count',
                       '放款后用户非工资收入总计':'sum',
                       '放款后用户非工资收入最大值':'min',
                       '放款后用户非工资收入最小值':'max',
                       '放款后用户非工资收入平均值':'mean',
                       '放款后用户非工资收入中位数':'median',
                       '放款后用户非工资收入方差':'var',
                       '放款后用户非工资收入标准差':'std'})
    
    x5=t_income_minus_expend_gb['收入支出差'].agg({'放款后收入支出差笔数' : 'count',
                       '放款后收入支出差总计':'sum',
                       '放款后收入支出差最大值':'min',
                       '放款后收入支出差最小值':'max',
                       '放款后收入支出差平均值':'mean',
                       '放款后收入支出差中位数':'median',
                        '放款后收入支出差方差':'var',
                       '放款后收入支出差标准差':'std'
                                             })
    #END
    feature=pd.merge(feature, x1,how='left', on = "用户标识")
    feature=pd.merge(feature, x2,how='left', on = "用户标识")
    feature=pd.merge(feature, x3,how='left', on = "用户标识")
    feature=pd.merge(feature, x4,how='left', on = "用户标识")
    feature=pd.merge(feature, x5,how='left', on = "用户标识")
 

    feature['放款后用户收入支出笔数差值']=feature['放款后用户收入笔数']-feature['放款后用户支出笔数']
    feature['放款后用户收入支出总计差值']=feature['放款后用户收入总计']-feature['放款后用户支出总计']
    #feature['放款后用户非工资收入笔数']=feature['放款后用户收入笔数']-feature['放款后用户工资收入笔数']
    #feature['放款后用户非工资收入总计']=feature['放款后用户收入总计']-feature['放款后用户工资收入总计']
    feature['放款后工资收入笔数乘以差值']=feature['放款后用户工资收入笔数']*feature['放款后用户收入支出笔数差值']
    feature['放款后工资收入总计乘以差值']=feature['放款后用户工资收入总计']*feature['放款后用户收入支出总计差值']
    
    #----------------------------------------不区分放款前后特征统计------------------------------------------#
    t=d#5684742
    gb1=t[(t['交易类型']==0)].groupby(["用户标识"],as_index=False)#收入统计
    gb2=t[(t['交易类型']==1)].groupby(["用户标识"],as_index=False)#支出统计
    gb3=t[(t['工资收入标记']==1)].groupby(["用户标识"],as_index=False)#工资收入统计
    gb4=t[(t['交易类型']==0)&(t['工资收入标记']==0)].groupby(["用户标识"],as_index=False)#非工资收入统计
   
    t_income = t[(t['交易类型']==0)].loc[:,['用户标识', '流水时间', '交易金额']].rename(index = str, columns={'交易金额':'收入'})
    t_expend = t[(t['交易类型']==1)].loc[:,['用户标识', '流水时间', '交易金额']].rename(index = str, columns={'交易金额':'支出'})
    #t_income_expend = pd.merge(t_income, t_expend, how='inner', on = ['用户标识', '流水时间'])
    t_income_expend = pd.merge(t_income, t_expend, how='left', on = ['用户标识', '流水时间'])
    t_income_expend = t_income_expend.fillna(0)
    t_income_expend['收入支出差'] = t_income_expend['收入'] - t_income_expend['支出']
    t_income_minus_expend_gb = t_income_expend.loc[:, ['用户标识','收入支出差']].groupby(["用户标识"],as_index=False)
    # END
    
    #交易金额统计量
    prefix = '整体'
    x0=t.loc[:,['用户标识','交易金额']].groupby(["用户标识"],as_index=False).agg(agg_param)
    x0.columns = ['_'.join(x) for x in x0.columns.ravel()]
    x0 = x0.add_prefix(prefix).reset_index()
    feature=pd.merge(feature, x0,how='left', on = "用户标识")
    # END
    
    x1=gb1['交易金额'].agg({'整体用户收入笔数' : 'count',
                       '整体用户收入总计':'sum',
                       '整体用户收入最大值':'min',
                       '整体用户收入最小值':'max',
                       '整体用户收入平均值':'mean',
                       '整体用户收入中位数':'median',
                       '整体用户收入方差':'var',
                       '整体用户收入标准差':'std'})
    x2=gb2['交易金额'].agg({'整体用户支出笔数' : 'count',
                       '整体用户支出总计':'sum',
                       '整体用户支出最大值':'min',
                       '整体用户支出最小值':'max',
                       '整体用户支出平均值':'mean',
                       '整体用户支出中位数':'median',
                       '整体用户支出方差':'var',
                       '整体用户支出标准差':'std'})
    x3=gb3['交易金额'].agg({'整体用户工资收入笔数' : 'count',
                       '整体用户工资收入总计':'sum',
                       '整体用户工资收入最大值':'min',
                       '整体用户工资收入最小值':'max',
                       '整体用户工资收入平均值':'mean',
                       '整体用户工资收入中位数':'median',
                       '整体用户工资收入方差':'var',
                       '整体用户工资收入标准差':'std'})
    x4=gb4['交易金额'].agg({'整体用户非工资收入笔数' : 'count',
                       '整体用户非工资收入总计':'sum',
                       '整体用户非工资收入最大值':'min',
                       '整体用户非工资收入最小值':'max',
                       '整体用户非工资收入平均值':'mean',
                       '整体用户非工资收入中位数':'median',
                       '整体用户非工资收入方差':'var',
                       '整体用户非工资收入标准差':'std'})
    
    x5=t_income_minus_expend_gb['收入支出差'].agg({'整体收入支出差笔数' : 'count',
                       '整体收入支出差总计':'sum',
                       '整体收入支出差最大值':'min',
                       '整体收入支出差最小值':'max',
                       '整体收入支出差平均值':'mean',
                       '整体收入支出差中位数':'median',
                        '整体收入支出差方差':'var',
                       '整体收入支出差标准差':'std'
                                             })
    #END
    feature=pd.merge(feature, x1,how='left', on = "用户标识")
    feature=pd.merge(feature, x2,how='left', on = "用户标识")
    feature=pd.merge(feature, x3,how='left', on = "用户标识")
    feature=pd.merge(feature, x4,how='left', on = "用户标识")
    feature=pd.merge(feature, x5,how='left', on = "用户标识")
 

    feature['整体用户收入支出笔数差值']=feature['整体用户收入笔数']-feature['整体用户支出笔数']
    feature['整体用户收入支出总计差值']=feature['整体用户收入总计']-feature['整体用户支出总计']
    #feature['整体用户非工资收入笔数']=feature['整体用户收入笔数']-feature['整体用户工资收入笔数']
    #feature['整体用户非工资收入总计']=feature['整体用户收入总计']-feature['整体用户工资收入总计']
    feature['整体工资收入笔数乘以差值']=feature['整体用户工资收入笔数']*feature['整体用户收入支出笔数差值']
    feature['整体工资收入总计乘以差值']=feature['整体用户工资收入总计']*feature['整体用户收入支出总计差值']
    print(feature.shape)
    feature.to_csv(output_file,index=None,encoding=FILE_ENCODER)
    
    

In [3]:
constructBankDetailFeature(LOANTIME_TRAIN_FILE, BANK_DETAIL_TRAIN_FILE, 
                           "../feature/bank_detail_train20181106.csv")
constructBankDetailFeature(LOANTIME_TEST_FILE, BANK_DETAIL_TEST_FILE, 
                           "../feature/bank_detail_test20181106.csv")

FileNotFoundError: File b'../train/loantime_train.csv' does not exist

In [None]:
def constructBankDetailFeature2(loantime_file, bank_detail_file, output_file):
    loantime = pd.read_csv(loantime_file,header=0,names=['用户标识','放款时间'])
    loantime['放款时间']=loantime['放款时间']//(86400*30)
    bank_detail=pd.read_csv(bank_detail_file).rename(index=str,
                                                        columns={"new_user_id": "用户标识","flag1": "流水时间",
                                                                 "flag2":"交易类型","flag3":"交易金额","flag4":"工资收入标记"}).drop_duplicates()
    bank_detail['流水时间']=bank_detail['流水时间']//(86400*30)
    
    bank_detail = pd.merge(bank_detail, loantime,how='inner', on = "用户标识")
    feature=loantime
    d=bank_detail
    #----------------------------------------放款前按月特征统计------------------------------------------#
    t=d[(d['流水时间']<=d['放款时间'])]#5684742
    gb1=t[(t['交易类型']==0)].groupby(["用户标识","流水时间"],as_index=False)#收入统计
    gb2=t[(t['交易类型']==1)].groupby(["用户标识","流水时间"],as_index=False)#支出统计
    gb3=t[(t['工资收入标记']==1)].groupby(["用户标识","流水时间"],as_index=False)#工资收入统计
    gb4=t[(t['交易类型']==0)&(t['工资收入标记']==0)].groupby(["用户标识","流水时间"],as_index=False)#非工资收入统计
    
    t_income = t[(t['交易类型']==0)].loc[:,['用户标识', '流水时间', '交易金额']].rename(index = str, columns={'交易金额':'收入'})
    t_expend = t[(t['交易类型']==1)].loc[:,['用户标识', '流水时间', '交易金额']].rename(index = str, columns={'交易金额':'支出'})
    t_income_expend = pd.merge(t_income, t_expend, how='inner', on = ['用户标识', '流水时间'])
    t_income_expend['收入支出差'] = t_income_expend['收入'] - t_income_expend['支出']
    t_income_minus_expend_gb = t_income_expend.loc[:, ['用户标识','流水时间','收入支出差']].groupby(['用户标识',"流水时间"],as_index=False)
    # END
    
    x1=gb1['交易金额'].agg({'放款前用户每月收入笔数' : 'count','放款前用户每月收入总计':'sum'})
    x2=gb2['交易金额'].agg({'放款前用户每月支出笔数' : 'count','放款前用户每月支出总计':'sum'})
    x3=gb3['交易金额'].agg({'放款前用户每月工资收入笔数' : 'count','放款前用户每月工资收入总计':'sum'})
    x4=gb4['交易金额'].agg({'放款前用户每月非工资收入笔数' : 'count','放款前用户每月非工资收入总计':'sum'})
    x5=t_income_minus_expend_gb['收入支出差'].agg({'放款前收入支出差笔数' : 'count','放款前收入支出差总计':'sum'})
    
    feature_bankdetail_monthly=t.loc[:,['用户标识', '流水时间']].drop_duplicates()

    feature_bankdetail_monthly=pd.merge(feature_bankdetail_monthly, x1,how='left', on = ['用户标识','流水时间'])
    feature_bankdetail_monthly=pd.merge(feature_bankdetail_monthly, x2,how='left', on = ['用户标识','流水时间'])
    feature_bankdetail_monthly=pd.merge(feature_bankdetail_monthly, x3,how='left', on = ['用户标识','流水时间'])
    feature_bankdetail_monthly=pd.merge(feature_bankdetail_monthly, x4,how='left', on = ['用户标识','流水时间'])
    feature_bankdetail_monthly=pd.merge(feature_bankdetail_monthly, x5,how='left', on = ['用户标识','流水时间'])
    #feature_bankdetail_monthly=feature_bankdetail_monthly.fillna(0)
    

    feature_bankdetail_monthly['放款前用户每月收入支出笔数差值']=feature_bankdetail_monthly['放款前用户每月收入笔数']-feature_bankdetail_monthly['放款前用户每月支出笔数']
    feature_bankdetail_monthly['放款前用户每月收入支出总计差值']=feature_bankdetail_monthly['放款前用户每月收入总计']-feature_bankdetail_monthly['放款前用户每月支出总计']
    feature_bankdetail_monthly['放款前用户每月工资收入笔数乘以差值']=feature_bankdetail_monthly['放款前用户每月工资收入笔数']*feature_bankdetail_monthly['放款前用户每月收入支出笔数差值']
    feature_bankdetail_monthly['放款前用户每月工资收入总计乘以差值']=feature_bankdetail_monthly['放款前用户每月工资收入总计']*feature_bankdetail_monthly['放款前用户每月收入支出总计差值']
    feature_bankdetail_monthly_gb=feature_bankdetail_monthly.groupby(["用户标识"],as_index=False)
    
    y1=feature_bankdetail_monthly_gb['放款前用户每月收入笔数'].agg({'放款前用户每月收入笔数min':'min',
                       '放款前用户每月收入笔数max':'max',
                       '放款前用户每月收入笔数mean':'mean',
                       '放款前用户每月收入笔数median':'median',
                       '放款前用户每月收入笔数var':'var',
                       '放款前用户每月收入笔数std':'std'})
    y2=feature_bankdetail_monthly_gb['放款前用户每月收入总计'].agg({'放款前用户每月收入总计min':'min',
                       '放款前用户每月收入总计max':'max',
                       '放款前用户每月收入总计mean':'mean',
                       '放款前用户每月收入总计median':'median',
                       '放款前用户每月收入总计var':'var',
                       '放款前用户每月收入总计std':'std'})    
    y3=feature_bankdetail_monthly_gb['放款前用户每月支出笔数'].agg({'放款前用户每月支出笔数min':'min',
                       '放款前用户每月支出笔数max':'max',
                       '放款前用户每月支出笔数mean':'mean',
                       '放款前用户每月支出笔数median':'median',
                       '放款前用户每月支出笔数var':'var',
                       '放款前用户每月支出笔数std':'std'})   
    y4=feature_bankdetail_monthly_gb['放款前用户每月支出总计'].agg({'放款前用户每月支出总计min':'min',
                       '放款前用户每月支出总计max':'max',
                       '放款前用户每月支出总计mean':'mean',
                       '放款前用户每月支出总计median':'median',
                       '放款前用户每月支出总计var':'var',
                       '放款前用户每月支出总计std':'std'})    
    y5=feature_bankdetail_monthly_gb['放款前用户每月工资收入笔数'].agg({'放款前用户每月工资收入笔数min':'min',
                       '放款前用户每月工资收入笔数max':'max',
                       '放款前用户每月工资收入笔数mean':'mean',
                       '放款前用户每月工资收入笔数median':'median',
                       '放款前用户每月工资收入笔数var':'var',
                       '放款前用户每月工资收入笔数std':'std'})     
    y6=feature_bankdetail_monthly_gb['放款前用户每月工资收入总计'].agg({'放款前用户每月工资收入总计min':'min',
                       '放款前用户每月工资收入总计max':'max',
                       '放款前用户每月工资收入总计mean':'mean',
                       '放款前用户每月工资收入总计median':'median',
                       '放款前用户每月工资收入总计var':'var',
                       '放款前用户每月工资收入总计std':'std'})
    y7=feature_bankdetail_monthly_gb['放款前用户每月非工资收入笔数'].agg({'放款前用户每月非工资收入笔数min':'min',
                       '放款前用户每月非工资收入笔数max':'max',
                       '放款前用户每月非工资收入笔数mean':'mean',
                       '放款前用户每月非工资收入笔数median':'median',
                       '放款前用户每月非工资收入笔数var':'var',
                       '放款前用户每月非工资收入笔数std':'std'})  
    y8=feature_bankdetail_monthly_gb['放款前用户每月非工资收入总计'].agg({'放款前用户每月非工资收入总计min':'min',
                       '放款前用户每月非工资收入总计max':'max',
                       '放款前用户每月非工资收入总计mean':'mean',
                       '放款前用户每月非工资收入总计median':'median',
                       '放款前用户每月非工资收入总计var':'var',
                       '放款前用户每月非工资收入总计std':'std'}) 
    y9=feature_bankdetail_monthly_gb['放款前收入支出差笔数'].agg({'放款前用户每月收入支出差笔数min':'min',
                       '放款前用户每月收入支出差笔数max':'max',
                       '放款前用户每月收入支出差笔数mean':'mean',
                       '放款前用户每月收入支出差笔数median':'median',
                       '放款前用户每月收入支出差笔数var':'var',
                       '放款前用户每月收入支出差笔数std':'std'})       
    y10=feature_bankdetail_monthly_gb['放款前收入支出差总计'].agg({'放款前用户每月收入支出差总计min':'min',
                       '放款前用户每月收入支出差总计max':'max',
                       '放款前用户每月收入支出差总计mean':'mean',
                       '放款前用户每月收入支出差总计median':'median',
                       '放款前用户每月收入支出差总计var':'var',
                       '放款前用户每月收入支出差总计std':'std'})   
    y11=feature_bankdetail_monthly_gb['放款前用户每月收入支出笔数差值'].agg({'放款前用户每月收入支出笔数差值min':'min',
                       '放款前用户每月收入支出笔数差值max':'max',
                       '放款前用户每月收入支出笔数差值mean':'mean',
                       '放款前用户每月收入支出笔数差值median':'median',
                       '放款前用户每月收入支出笔数差值var':'var',
                       '放款前用户每月收入支出笔数差值std':'std'}) 
    y12=feature_bankdetail_monthly_gb['放款前用户每月收入支出总计差值'].agg({'放款前用户每月收入支出总计差值min':'min',
                       '放款前用户每月收入支出总计差值max':'max',
                       '放款前用户每月收入支出总计差值mean':'mean',
                       '放款前用户每月收入支出总计差值median':'median',
                       '放款前用户每月收入支出总计差值var':'var',
                       '放款前用户每月收入支出总计差值std':'std'}) 
    y13=feature_bankdetail_monthly_gb['放款前用户每月工资收入笔数乘以差值'].agg({'放款前用户每月工资收入笔数乘以差值min':'min',
                       '放款前用户每月工资收入笔数乘以差值max':'max',
                       '放款前用户每月工资收入笔数乘以差值mean':'mean',
                       '放款前用户每月工资收入笔数乘以差值median':'median',
                       '放款前用户每月工资收入笔数乘以差值var':'var',
                       '放款前用户每月工资收入笔数乘以差值std':'std'})  
    y14=feature_bankdetail_monthly_gb['放款前用户每月工资收入总计乘以差值'].agg({'放款前用户每月工资收入总计乘以差值min':'min',
                       '放款前用户每月工资收入总计乘以差值max':'max',
                       '放款前用户每月工资收入总计乘以差值mean':'mean',
                       '放款前用户每月工资收入总计乘以差值median':'median',
                       '放款前用户每月工资收入总计乘以差值var':'var',
                       '放款前用户每月工资收入总计乘以差值std':'std'})   

    feature=pd.merge(feature, y1,how='left', on = "用户标识")
    feature=pd.merge(feature, y2,how='left', on = "用户标识")
    feature=pd.merge(feature, y3,how='left', on = "用户标识")
    feature=pd.merge(feature, y4,how='left', on = "用户标识")
    feature=pd.merge(feature, y5,how='left', on = "用户标识")
    feature=pd.merge(feature, y6,how='left', on = "用户标识")
    feature=pd.merge(feature, y7,how='left', on = "用户标识")
    feature=pd.merge(feature, y8,how='left', on = "用户标识")
    feature=pd.merge(feature, y9,how='left', on = "用户标识")
    feature=pd.merge(feature, y10,how='left', on = "用户标识")
    feature=pd.merge(feature, y11,how='left', on = "用户标识")
    feature=pd.merge(feature, y12,how='left', on = "用户标识")
    feature=pd.merge(feature, y13,how='left', on = "用户标识")
    feature=pd.merge(feature, y14,how='left', on = "用户标识")
    
                                                                   
    #----------------------------------------放款后特征统计------------------------------------------#
    t=d[(d['流水时间']>d['放款时间'])]#5684742
    gb1=t[(t['交易类型']==0)].groupby(["用户标识","流水时间"],as_index=False)#收入统计
    gb2=t[(t['交易类型']==1)].groupby(["用户标识","流水时间"],as_index=False)#支出统计
    gb3=t[(t['工资收入标记']==1)].groupby(["用户标识","流水时间"],as_index=False)#工资收入统计
    gb4=t[(t['交易类型']==0)&(t['工资收入标记']==0)].groupby(["用户标识","流水时间"],as_index=False)#非工资收入统计
    
    t_income = t[(t['交易类型']==0)].loc[:,['用户标识', '流水时间', '交易金额']].rename(index = str, columns={'交易金额':'收入'})
    t_expend = t[(t['交易类型']==1)].loc[:,['用户标识', '流水时间', '交易金额']].rename(index = str, columns={'交易金额':'支出'})
    t_income_expend = pd.merge(t_income, t_expend, how='inner', on = ['用户标识', '流水时间'])
    t_income_expend['收入支出差'] = t_income_expend['收入'] - t_income_expend['支出']
    t_income_minus_expend_gb = t_income_expend.loc[:, ['用户标识','流水时间','收入支出差']].groupby(["用户标识","流水时间"],as_index=False)
    # END
    
    x1=gb1['交易金额'].agg({'放款后用户每月收入笔数' : 'count','放款后用户每月收入总计':'sum'})
    x2=gb2['交易金额'].agg({'放款后用户每月支出笔数' : 'count','放款后用户每月支出总计':'sum'})
    x3=gb3['交易金额'].agg({'放款后用户每月工资收入笔数' : 'count','放款后用户每月工资收入总计':'sum'})
    x4=gb4['交易金额'].agg({'放款后用户每月非工资收入笔数' : 'count','放款后用户每月非工资收入总计':'sum'})
    x5=t_income_minus_expend_gb['收入支出差'].agg({'放款后用户每月收入支出差笔数' : 'count','放款后用户每月收入支出差总计':'sum'})
    
    feature_bankdetail_monthly=t.loc[:,['用户标识', '流水时间']].drop_duplicates()

    feature_bankdetail_monthly=pd.merge(feature_bankdetail_monthly, x1,how='left', on = ['用户标识','流水时间'])
    feature_bankdetail_monthly=pd.merge(feature_bankdetail_monthly, x2,how='left', on = ['用户标识','流水时间'])
    feature_bankdetail_monthly=pd.merge(feature_bankdetail_monthly, x3,how='left', on = ['用户标识','流水时间'])
    feature_bankdetail_monthly=pd.merge(feature_bankdetail_monthly, x4,how='left', on = ['用户标识','流水时间'])
    feature_bankdetail_monthly=pd.merge(feature_bankdetail_monthly, x5,how='left', on = ['用户标识','流水时间'])
    #feature_bankdetail_monthly=feature_bankdetail_monthly.fillna(0)
    

    feature_bankdetail_monthly['放款后用户每月收入支出笔数差值']=feature_bankdetail_monthly['放款后用户每月收入笔数']-feature_bankdetail_monthly['放款后用户每月支出笔数']
    feature_bankdetail_monthly['放款后用户每月收入支出总计差值']=feature_bankdetail_monthly['放款后用户每月收入总计']-feature_bankdetail_monthly['放款后用户每月支出总计']
    feature_bankdetail_monthly['放款后用户每月工资收入笔数乘以差值']=feature_bankdetail_monthly['放款后用户每月工资收入笔数']*feature_bankdetail_monthly['放款后用户每月收入支出笔数差值']
    feature_bankdetail_monthly['放款后用户每月工资收入总计乘以差值']=feature_bankdetail_monthly['放款后用户每月工资收入总计']*feature_bankdetail_monthly['放款后用户每月收入支出总计差值']
    
    feature_bankdetail_monthly_gb=feature_bankdetail_monthly.groupby(["用户标识"],as_index=False)
    
    y1=feature_bankdetail_monthly_gb['放款后用户每月收入笔数'].agg({'放款后用户每月收入笔数min':'min',
                       '放款后用户每月收入笔数max':'max',
                       '放款后用户每月收入笔数mean':'mean',
                       '放款后用户每月收入笔数median':'median',
                       '放款后用户每月收入笔数var':'var',
                       '放款后用户每月收入笔数std':'std'})
    y2=feature_bankdetail_monthly_gb['放款后用户每月收入总计'].agg({'放款后用户每月收入总计min':'min',
                       '放款后用户每月收入总计max':'max',
                       '放款后用户每月收入总计mean':'mean',
                       '放款后用户每月收入总计median':'median',
                       '放款后用户每月收入总计var':'var',
                       '放款后用户每月收入总计std':'std'})    
    y3=feature_bankdetail_monthly_gb['放款后用户每月支出笔数'].agg({'放款后用户每月支出笔数min':'min',
                       '放款后用户每月支出笔数max':'max',
                       '放款后用户每月支出笔数mean':'mean',
                       '放款后用户每月支出笔数median':'median',
                       '放款后用户每月支出笔数var':'var',
                       '放款后用户每月支出笔数std':'std'})   
    y4=feature_bankdetail_monthly_gb['放款后用户每月支出总计'].agg({'放款后用户每月支出总计min':'min',
                       '放款后用户每月支出总计max':'max',
                       '放款后用户每月支出总计mean':'mean',
                       '放款后用户每月支出总计median':'median',
                       '放款后用户每月支出总计var':'var',
                       '放款后用户每月支出总计std':'std'})    
    y5=feature_bankdetail_monthly_gb['放款后用户每月工资收入笔数'].agg({'放款后用户每月工资收入笔数min':'min',
                       '放款后用户每月工资收入笔数max':'max',
                       '放款后用户每月工资收入笔数mean':'mean',
                       '放款后用户每月工资收入笔数median':'median',
                       '放款后用户每月工资收入笔数var':'var',
                       '放款后用户每月工资收入笔数std':'std'})     
    y6=feature_bankdetail_monthly_gb['放款后用户每月工资收入总计'].agg({'放款后用户每月工资收入总计min':'min',
                       '放款后用户每月工资收入总计max':'max',
                       '放款后用户每月工资收入总计mean':'mean',
                       '放款后用户每月工资收入总计median':'median',
                       '放款后用户每月工资收入总计var':'var',
                       '放款后用户每月工资收入总计std':'std'})
    y7=feature_bankdetail_monthly_gb['放款后用户每月非工资收入笔数'].agg({'放款后用户每月非工资收入笔数min':'min',
                       '放款后用户每月非工资收入笔数max':'max',
                       '放款后用户每月非工资收入笔数mean':'mean',
                       '放款后用户每月非工资收入笔数median':'median',
                       '放款后用户每月非工资收入笔数var':'var',
                       '放款后用户每月非工资收入笔数std':'std'})  
    y8=feature_bankdetail_monthly_gb['放款后用户每月非工资收入总计'].agg({'放款后用户每月非工资收入总计min':'min',
                       '放款后用户每月非工资收入总计max':'max',
                       '放款后用户每月非工资收入总计mean':'mean',
                       '放款后用户每月非工资收入总计median':'median',
                       '放款后用户每月非工资收入总计var':'var',
                       '放款后用户每月非工资收入总计std':'std'}) 
    y9=feature_bankdetail_monthly_gb['放款后用户每月收入支出差笔数'].agg({'放款后用户每月收入支出差笔数min':'min',
                       '放款后用户每月收入支出差笔数max':'max',
                       '放款后用户每月收入支出差笔数mean':'mean',
                       '放款后用户每月收入支出差笔数median':'median',
                       '放款后用户每月收入支出差笔数var':'var',
                       '放款后用户每月收入支出差笔数std':'std'})       
    y10=feature_bankdetail_monthly_gb['放款后用户每月收入支出差总计'].agg({'放款后用户每月收入支出差总计min':'min',
                       '放款后用户每月收入支出差总计max':'max',
                       '放款后用户每月收入支出差总计mean':'mean',
                       '放款后用户每月收入支出差总计median':'median',
                       '放款后用户每月收入支出差总计var':'var',
                       '放款后用户每月收入支出差总计std':'std'})   
    y11=feature_bankdetail_monthly_gb['放款后用户每月收入支出笔数差值'].agg({'放款后用户每月收入支出笔数差值min':'min',
                       '放款后用户每月收入支出笔数差值max':'max',
                       '放款后用户每月收入支出笔数差值mean':'mean',
                       '放款后用户每月收入支出笔数差值median':'median',
                       '放款后用户每月收入支出笔数差值var':'var',
                       '放款后用户每月收入支出笔数差值std':'std'}) 
    y12=feature_bankdetail_monthly_gb['放款后用户每月收入支出总计差值'].agg({'放款后用户每月收入支出总计差值min':'min',
                       '放款后用户每月收入支出总计差值max':'max',
                       '放款后用户每月收入支出总计差值mean':'mean',
                       '放款后用户每月收入支出总计差值median':'median',
                       '放款后用户每月收入支出总计差值var':'var',
                       '放款后用户每月收入支出总计差值std':'std'}) 
    y13=feature_bankdetail_monthly_gb['放款后用户每月工资收入笔数乘以差值'].agg({'放款后用户每月工资收入笔数乘以差值min':'min',
                       '放款后用户每月工资收入笔数乘以差值max':'max',
                       '放款后用户每月工资收入笔数乘以差值mean':'mean',
                       '放款后用户每月工资收入笔数乘以差值median':'median',
                       '放款后用户每月工资收入笔数乘以差值var':'var',
                       '放款后用户每月工资收入笔数乘以差值std':'std'})  
    y14=feature_bankdetail_monthly_gb['放款后用户每月工资收入总计乘以差值'].agg({'放款后用户每月工资收入总计乘以差值min':'min',
                       '放款后用户每月工资收入总计乘以差值max':'max',
                       '放款后用户每月工资收入总计乘以差值mean':'mean',
                       '放款后用户每月工资收入总计乘以差值median':'median',
                       '放款后用户每月工资收入总计乘以差值var':'var',
                       '放款后用户每月工资收入总计乘以差值std':'std'}) 
                       
    feature=pd.merge(feature, y1,how='left', on = "用户标识")
    feature=pd.merge(feature, y2,how='left', on = "用户标识")
    feature=pd.merge(feature, y3,how='left', on = "用户标识")
    feature=pd.merge(feature, y4,how='left', on = "用户标识")
    feature=pd.merge(feature, y5,how='left', on = "用户标识")
    feature=pd.merge(feature, y6,how='left', on = "用户标识")
    feature=pd.merge(feature, y7,how='left', on = "用户标识")
    feature=pd.merge(feature, y8,how='left', on = "用户标识")
    feature=pd.merge(feature, y9,how='left', on = "用户标识")
    feature=pd.merge(feature, y10,how='left', on = "用户标识")
    feature=pd.merge(feature, y11,how='left', on = "用户标识")
    feature=pd.merge(feature, y12,how='left', on = "用户标识")
    feature=pd.merge(feature, y13,how='left', on = "用户标识")
    feature=pd.merge(feature, y14,how='left', on = "用户标识")
    

    print(feature.shape)
    feature.to_csv(output_file,index=None,encoding=FILE_ENCODER)

In [None]:
#constructBankDetailFeature2(LOANTIME_TRAIN_FILE, BANK_DETAIL_TRAIN_FILE, 
#                           FEATURE_PREFIX+"bank_detail2_train20181106.csv")
#constructBankDetailFeature2(LOANTIME_TEST_FILE, BANK_DETAIL_TEST_FILE, 
#                           FEATURE_PREFIX+"bank_detail2_test20181106.csv")