In [1]:
import numpy as np
import pandas as pd
import os
import datetime
import sys
import re
import json
import time
import zipfile
import cpca
from sklearn import preprocessing
# import warnings
# warnings.simplefilter(action='ignore', category=FutureWarning)

In [345]:
def walk_zip_files(path,pattern_zip='.*.zip',pattern_csv='.*trx.*.csv',**kw):
    '''解析文件夹下所有zip里的符合pattern的文件'''
    import zipfile
    import re
    trx = None

    for dirpath,dirnames,filenames in os.walk(path):
        for file in filenames:
            if re.search(pattern_zip,file):
                fullpath=os.path.join(dirpath,file)
    #             print(fullpath)

                with zipfile.ZipFile(fullpath, "r") as z:
                    for i in z.namelist():
                        if re.search(pattern_csv,i):
                            f = z.open(i)
                            try:
                                new_trx = pd.read_csv(f, **kw)
                            except:
                                print('Wrong file full path is :',fullpath)
                            
                            trx = pd.concat([trx,new_trx],axis=0,ignore_index=True)
#                             print(fullpath,new_trx.shape)
    return trx


def walk_files(path,pattern='.*loan.*csv',**kw):
    '''提取path目录下所有符合pattern正则式文件的内容'''
    data = None
    for dirpath,dirnames,filenames in os.walk(path):
        for file in filenames:
            if re.search(pattern,file):
                fullpath=os.path.join(dirpath,file)
                print(fullpath)
                tmp = pd.read_csv(fullpath,**kw)#.dropna()
#                 if tmp.shape[1] == col_num:
#                 try:
# #                     date = re.findall(pattern='\d{4}/\d{2}/\d{2}',string=fullpath)[0]
# #                     tmp['file_date'] = date
#                 except:
#                     pass
                data = pd.concat([data,tmp],axis=0,ignore_index=True)
            else :
                print('上述文件列数错误')
    return data

def convert_time(time):
    try:
        return datetime.datetime.strptime(str(time), "%Y-%m-%d")
    except:
        try:
            return datetime.datetime.strptime(str(time), "%Y-%m-%d %H:%M:%S")
        except:
            try:
                return datetime.datetime.strptime(str(time), "%Y%m%d%H%M%S")
            except:
                try:
                    return datetime.datetime.strptime(str(time), "%Y%m%d")
                except:
                    try:
                        return datetime.datetime.strptime(str(time), "%d%m%Y%H%M%S")
                    except:
                        print("Wrong date format: %s" % time)
                        return None

def convert_time_col(df,suffix=''):
    '''对df含有时间类型的列，其字符串转换成标准时间'''
    columns = df.columns
    # 时间类型
    time_cols = columns[columns.str.lower().str.contains(pat='(time|date|ddl)')]
    for time_col in time_cols:
        print('coverting :'+time_col)
        df[str(time_col)+suffix] = df[time_col].apply(convert_time)   
    return df

In [None]:
def unique(x):
    return len(set(x))

def extract_bank_file(loan_file,repay_file,prod_no,start_day='20180101',end_day='20200101'):
    '''
    得到对应平台的银行放还款数据
    ----
    loan_file: loan文件
    repay_file: repay文件unique
    prod_no: 平台产品编号 [zs:LN0026,lz2:LN0031,pk-jp:LN0004,pk-jd:LN0007,qfxd:LN0013]
    start_day: 统计期开始日，包括该日;str
    end_day: 统计期截止日，包括该日;str
    '''
    
    loan = pd.read_csv(loan_file)
    repay =  pd.read_csv(repay_file)
    # 预处理一下
    loan['LOANDATE'] = loan['LOANDATE'].astype(str)
    loan['LOANAMOUNT'] = loan['LOANAMOUNT'].apply(lambda x:np.round(x,2))
    repay['REPAYDATE'] = repay['REPAYDATE'].astype(str)
    repay['CAPITAL'] = repay['CAPITAL'].apply(lambda x:np.round(x,2))

    loan = loan[loan['PRODID']== prod_no]
    repay = repay[repay['PRODID']== prod_no]
    
    loan = loan[(loan['LOANDATE']>= start_day)&(loan['LOANDATE']<= end_day)]
    repay = repay[(repay['REPAYDATE']>= start_day)&(repay['REPAYDATE']<= end_day)]
    
    loan['year_month'] = loan['LOANDATE'].apply(convert_time).astype(str).str[:7]
    repay['year_month'] = repay['REPAYDATE'].apply(convert_time).astype(str).str[:7]
    
    loan = convert_time_col(loan)
    repay = convert_time_col(repay)
    return loan,repay


##### ---------------四 start---------------
# '''bank_count_month 计算每月银行放款、还款情况'''
def bank_count_month(loan,repay):
    '''计算每月银行放款、还款情况'''
    # loan count,repay count
    bank_loan_count = pd.pivot_table(loan,index='year_month',values=['LOANAMOUNT','IDNO','DUENO'],
                                     aggfunc={'LOANAMOUNT':sum,'IDNO':unique,'DUENO':len},
                                     margins=True,margins_name='合计').reset_index()
    bank_loan_count = bank_loan_count.rename(columns={'year_month':'月份','LOANAMOUNT':'贷款金额','IDNO':'贷款企业数',
                                            'DUENO':'贷款次数'})
    bank_loan_count['次均贷款金额'] = bank_loan_count['贷款金额']/bank_loan_count['贷款次数']
    bank_repay_count = pd.pivot_table(repay,index='year_month',values=['CAPITAL','IDNO','DUENO'],
                                      aggfunc={'CAPITAL':sum,'IDNO':unique,'DUENO':len},
                                      margins=True,margins_name='合计').reset_index()
    bank_repay_count = bank_repay_count.rename(columns={'year_month':'月份','CAPITAL':'还款金额','IDNO':'还款企业数',
                                            'DUENO':'还款次数'})
    bank_repay_count['次均还款金额'] = bank_repay_count['还款金额']/bank_repay_count['还款次数']
    return bank_loan_count,bank_repay_count

# '''bank_account_period 根据银行放还款数据来算企业账期分布'''
def bank_account_period(loan,repay):
    '''由银行放还款数据来算账期天数、逾期'''
    loan['due_month'] = loan['DUEDATE'].astype(str).str[:7]
    repay['repay_month'] = repay['REPAYDATE'].astype(str).str[:7]
    loan_repay = loan.merge(repay[['DUENO','REPAYDATE','CAPITAL','REPAYSOURCE',
                                   'NORMALINTEREST','PENALTYINTEREST','repay_month']],
                                 on=['DUENO'],how='left')
    def days(x):
        try:return x.days
        except:None
    
    loan_repay['repay_loan_days'] = (loan_repay['REPAYDATE'] - loan_repay['LOANDATE']).apply(days)
    loan_repay['overdue_days'] = (loan_repay['REPAYDATE']-loan_repay['DUEDATE']).apply(days)
    loan_repay['overdue_days'] = loan_repay['overdue_days'].fillna((today-loan_repay['DUEDATE']).apply(days)-1)
    loan_repay['due_loan_days'] = (loan_repay['DUEDATE'] - loan_repay['LOANDATE']).apply(days)
    
    ## 账期天数分区间统计
    days_bins = list(np.arange(0,60,5))
    days_bins.extend(np.arange(60,120,10))
    days_bins.extend(np.arange(120,180,15))
    days_bins.extend(np.arange(180,360,30))
    days_bins.extend([360,540,720,np.inf])
    days_bins = sorted(set(days_bins))
    
    loan_repay['repay_loan_days_bins'] = pd.cut(loan_repay['repay_loan_days'],bins=days_bins,right=False)
    account_period_count = loan_repay.groupby('repay_loan_days_bins').agg(
                                    {'IDNO':unique,'DUENO':unique,'CAPITAL':sum}).reset_index()
    account_period_count = account_period_count.rename(columns={'IDNO':'企业数',
                                                                'DUENO':'借据数',
                                                                'CAPITAL':'还款金额',
                                                                'repay_loan_days_bins':'账期（天）'})
    
    overdue_detail = loan_repay[loan_repay['overdue_days']>0]
    compensation_detail = loan_repay[loan_repay['REPAYSOURCE']==3] ## 代偿明细

    return loan_repay,account_period_count,overdue_detail,compensation_detail


# '''bank_balance_month 银行每月贷款、还款、贷款余额'''  
def bank_balance_month(loan_count,repay_count):
    ## 承接bank_count_month方法返回的两个df值
    loan_repay_count = loan_count.merge(repay_count,on='月份',how='outer').sort_values('月份').fillna(0)
    loan_repay_count['贷款余额'] = np.cumsum(loan_repay_count['贷款金额'] - loan_repay_count['还款金额'])
    return loan_repay_count

# '''bank_interest_month 银行每月利息'''
def bank_interest_month(bank_repay):
    bank_repay['year_month'] = bank_repay['REPAYDATE'].apply(convert_time).astype(str).str[:7]
    bank_interest_count = pd.pivot_table(bank_repay,index='year_month',aggfunc={'NORMALINTEREST':sum}).reset_index()
    bank_interest_count = bank_interest_count.rename(columns={'year_month':'月份','NORMALINTEREST':'利息收入'})
    bank_interest_count['累计利息收入'] = np.cumsum(bank_interest_count['利息收入'])
    return bank_interest_count

##### ---------------六 end---------------



def bank_datasets(loan,repay):
    datasets = {}
    ## 每月统计
    loan_count,repay_count = bank_count_month(loan,repay)
    ## 借据还款账期
    loan_repay,account_period_count,overdue_detail,compensation_detail = bank_account_period(loan,repay)
    ## 每月贷款余额
    loan_balance_count = bank_balance_month(loan_count,repay_count)
    ## 每月利息
    interest_income = bank_interest_month(repay)
    
    # datasets['bank_loan'] = loan
    # datasets['bank_repay'] = repay
    datasets['bank_loan_count'] = loan_count
    datasets['bank_repay_count'] = repay_count
    datasets['bank_loan_repay'] = loan_repay
    datasets['account_period_count'] = account_period_count # 账期
    datasets['overdue_detail'] = overdue_detail # 逾期
    datasets['compensation_detail'] = compensation_detail # 代偿
    datasets['loan_balance_count'] = loan_balance_count # 贷款、还款、贷款余额
    datasets['interest_income'] = interest_income # 利息
    return datasets

In [901]:
def order_analysis(order, groupby_col="company_id", cn_name=u"超市ID",period=365):
    # 默认一年内订单
    time_thresh = today-datetime.timedelta(period)
    order = order[order['order_buy_time'] >= time_thresh]
    
    # 最早／最晚交易时间
    tmp = order.groupby([groupby_col])["order_buy_time"].agg(["min","max"])
    result = tmp.reset_index()
    result.columns = [cn_name,u"最早交易时间",u"最近交易时间"]
    result[u"交易存续天数"] = (today - result[u"最早交易时间"]).dt.days + 1
    
    order["order_status"] = order["order_status"].apply(lambda x: str(x))
    order["order_receive_amt"] = order["order_receive_amt"].apply(lambda x: float(x))
#     order_finish = order[order["order_status"]=="4"]  #需要根据实际数据表示交易完成的对应码来做修改，目前量子内部的表示码为4
    order_finish = order
    
    # 月交易密度／月交易金额
    tmp = order_finish.groupby([groupby_col])["order_receive_amt"].agg(["count","sum"])
    tmp.columns=[u"交易次数",u"交易金额"]
    result = pd.merge(result, tmp, left_on=[cn_name], right_index=True, how="left")
    result[u"月交易密度"] = result[u"交易次数"] / result[u"交易存续天数"] * 30
    result[u"月交易金额"] = result[u"交易金额"] / result[u"交易存续天数"] * 30
#     result[u"月交易密度"] = result[u"交易次数"] 
#     result[u"月交易金额"] = result[u"交易金额"] 
    return result


def loan_analysis(loan, groupby_col="IDNO",period=365):
    # 取1年内借款总额
    time_thresh = today-datetime.timedelta(period)
    loan = loan[loan['LOANDATE'] >= time_thresh]
    # 月贷款密度／月贷款金额
    tmp = loan.groupby([groupby_col])["LOANAMOUNT"].agg(["count","sum"])
    tmp.columns=[u"年贷款次数",u"年贷款金额"]
    tmp = tmp/period*365
    return tmp.reset_index()

In [6]:
##  '''diff_days_2col 可用统计订单交易间隔、物流间隔的'''
def diff_days_2col(zhangdan,groupby_col,days_col='repay_loan_days',prefix='账期'):
    '''可用于统计订单交易间隔、物流间隔'''
    tmp = zhangdan.groupby(groupby_col)[days_col].agg(["mean","std","max",'min'])
    tmp.columns=[u"间隔时间均值（天）", u"间隔时间标准差（天）", u"最大间隔时间（天）",u"最小间隔时间（天）"]
    tmp = tmp.add_prefix(prefix)
    tmp = tmp.apply(lambda x: round(x,2))
    return tmp

def diff_days(zhangdan,groupby_col,time_col,prefix='交易'):
    def timedelta2days(tf):
            try:
                return round(tf.total_seconds()/3600/24, 2)
            except AttributeError:
                return None
    #     groupby_col = ['超市id','company_name','company_certificate_num']
    #     time_col = 'buy_timestamp'

    zhangdan["diff"] = zhangdan.sort_values(by=[time_col]
                             ).groupby(groupby_col)[time_col].diff().apply(lambda x: timedelta2days(x))
    # 计算出交易间隔时间的均值／标准差／最大值
    tmp = zhangdan.groupby(groupby_col)["diff"].agg(["mean","std","max",'min'])
    tmp.columns=[u"间隔时间均值（天）", u"间隔时间标准差（天）", u"最长间隔时间（天）",u"最短间隔时间（天）"]
    tmp = tmp.add_prefix(prefix)
    tmp = tmp.apply(lambda x: round(x,2))
    # result = pd.merge(result, tmp, left_on=cn_name, right_index=True, how="left")
    return tmp


## 数据准备与清洗

#### 1. 行方借贷

In [940]:
# loan_file = '/Users/candi/Documents/项目资料/供应链金融资料/月报/data/9月商户分析数据/as_bbt_loan.csv'
# repay_file = '/Users/candi/Documents/项目资料/供应链金融资料/月报/data/9月商户分析数据/as_bbt_repay.csv'

# loan_file2 = '/Users/candi/Documents/项目资料/供应链金融资料/月报/data/10月商户分析数据/10月放款数据.xlsx'
# repay_file2 = '/Users/candi/Documents/项目资料/供应链金融资料/月报/data/10月商户分析数据/10月还款数据.xlsx'

# l1 = pd.read_csv(loan_file)
# r1 = pd.read_csv(repay_file)
# l2 = pd.read_excel(loan_file2)
# r2 = pd.read_excel(repay_file2)

# l = pd.concat([l1,l2])
# r = pd.concat([r1,r2])

# l.to_csv('/Users/candi/Documents/项目资料/供应链金融资料/月报/data/10月商户分析数据/as_bbt_loan.csv',index=False)
# r.to_csv('/Users/candi/Documents/项目资料/供应链金融资料/月报/data/10月商户分析数据/as_bbt_repay.csv',index=False)

In [941]:
loan_file = '/Users/candi/Documents/项目资料/供应链金融资料/月报/data/10月商户分析数据/as_bbt_loan.csv'
repay_file = '/Users/candi/Documents/项目资料/供应链金融资料/月报/data/10月商户分析数据/as_bbt_repay.csv'
bank_loan,bank_repay = extract_bank_file(loan_file,repay_file,'LN0026',start_day='20180101',end_day='20191031')

bank_loan = convert_time_col(bank_loan)
bank_repay = convert_time_col(bank_repay)



coverting :LOANDATE
coverting :DUEDATE
coverting :CREATETIME
coverting :UPDATETIME
coverting :REPAYDATE
coverting :CREATETIME
coverting :UPDATETIME
coverting :LOANDATE
coverting :DUEDATE
coverting :CREATETIME
coverting :UPDATETIME
coverting :REPAYDATE
coverting :CREATETIME
coverting :UPDATETIME


In [None]:
bank_loan,bank_repay = extract_bank_file(loan_file,repay_file,'LN0026',start_day='20180101',end_day='20190831')

#### 2. 授信白名单

In [377]:
## white_list白名单；有效白名单为STATUS=0,有效授信额度CREDITLimit=0时可能是授信变化
white_list = pd.read_csv('/Users/candi/Documents/项目资料/供应链金融资料/月报/data/7月商户分析数据/中商7月白名单.csv')
white_list = white_list[(white_list['STATUS']==0)&(white_list['SIGNSTATUS']==0)].sort_values(
    'CREDITLimit').drop_duplicates('PUSERID',keep='last')
white_list = convert_time_col(white_list)

coverting :AUDITTIME
coverting :UPDATETIME




In [730]:
white_list_add = pd.read_csv('/Users/candi/Documents/项目资料/供应链金融资料/月报/data/8月商户分析数据/8月中商新增白名单.csv')

In [843]:
white_list_add

Unnamed: 0,PRODID,MERCHANTNO,BIDNO,BIDTYPE,BIDNAME,PBIDNO,STATUS,MODELLIMIT,CREDITLimit,PUSERID,SIGNSTATUS,AUDITTIME,APPLYCOUNT,remark,UPDATETIME
0,1,99202,342221197410062525,2,烟酒专卖,266748,0,200000.0,200000.0,266748,0,2019-08-08 15:25:46,1,,2019-03-07 18:07:34
1,1,99202,342625196504063097,2,野孩子（有货梯）,553525,0,200000.0,200000.0,553525,0,2019-08-13 15:17:12,1,,2019-03-28 12:00:43


In [735]:
white_list = pd.concat([white_list,white_list_add])


In [844]:
white_list

Unnamed: 0,PRODID,MERCHANTNO,BIDNO,BIDTYPE,BIDNAME,PBIDNO,STATUS,MODELLIMIT,CREDITLimit,PUSERID,SIGNSTATUS,AUDITTIME,APPLYCOUNT,remark,UPDATETIME
1406,1,99202,370923197311240618,2,牛奶批发,522147,0,200000.0,0.0,522147,0,2018-12-29 17:37:38,1,,2019-03-07 18:07:34
634,1,99202,410923197802233036,2,汇宜副食超市,284967,0,150000.0,0.0,284967,0,2018-12-29 17:37:38,1,,2019-03-07 18:07:34
1355,1,99202,350428198811255539,2,世纪华联,509353,0,150000.0,0.0,509353,0,2018-12-29 17:37:38,1,,2019-03-07 18:07:34
397,1,99202,342622196501150954,2,欣文便利店,209956,0,150000.0,0.0,209956,0,2018-12-28 15:16:16,2,,2019-03-07 18:07:34
1149,1,99202,130721197003122224,2,鲜花水果超市（不需要客服审核）,46879,0,200000.0,0.0,46879,0,2018-12-28 15:16:16,1,,2019-03-07 18:07:34
1528,1,99202,332522199507158590,2,世纪金龙桃洼村,576406,0,50000.0,50000.0,576406,0,2019-05-28 16:02:59,1,,2019-04-12 14:45:36
1407,1,99202,35042819731218601X,2,世纪超市君宜路,522293,0,50000.0,50000.0,522293,0,2019-04-18 16:20:53,1,,2019-04-12 14:42:51
958,1,99202,413024197312153252,2,烟酒店,423756,0,50000.0,50000.0,423756,0,2019-05-23 15:13:20,1,,2019-03-07 18:07:34
1519,1,99202,410185198706092085,2,真良家便民超市,574084,0,50000.0,50000.0,574084,0,2019-04-28 16:55:53,1,,2019-04-12 14:45:18
1526,1,99202,622627198109262212,2,特盛源生鲜超市,576259,0,50000.0,50000.0,576259,0,2019-04-29 16:27:17,1,,2019-04-12 14:45:34


In [845]:
white_list.PBIDNO.unique().size,white_list.shape

(1619, (1619, 15))

## need to check

In [373]:
## 需要查一下以下3个客户的授信是否有问题,有贷款记录但授信为0
white_list[(white_list['BIDNO'].isin(zgc_customer_id['IDNO']))&(white_list['CREDITLimit']==0)]

Unnamed: 0,PRODID,MERCHANTNO,BIDNO,BIDTYPE,BIDNAME,PBIDNO,STATUS,MODELLIMIT,CREDITLimit,PUSERID,SIGNSTATUS,AUDITTIME,APPLYCOUNT,remark,UPDATETIME
397,1,99202,342622196501150954,2,欣文便利店,209956,0,150000,0,209956,0,2018-12-28 15:16:16,2,,2019-03-07 18:07:34
1149,1,99202,130721197003122224,2,鲜花水果超市（不需要客服审核）,46879,0,200000,0,46879,0,2018-12-28 15:16:16,1,,2019-03-07 18:07:34
634,1,99202,410923197802233036,2,汇宜副食超市,284967,0,150000,0,284967,0,2018-12-29 17:37:38,1,,2019-03-07 18:07:34


In [378]:
white_list

Unnamed: 0,PRODID,MERCHANTNO,BIDNO,BIDTYPE,BIDNAME,PBIDNO,STATUS,MODELLIMIT,CREDITLimit,PUSERID,SIGNSTATUS,AUDITTIME,APPLYCOUNT,remark,UPDATETIME
1406,1,99202,370923197311240618,2,牛奶批发,522147,0,200000,0,522147,0,2018-12-29 17:37:38,1,,2019-03-07 18:07:34
634,1,99202,410923197802233036,2,汇宜副食超市,284967,0,150000,0,284967,0,2018-12-29 17:37:38,1,,2019-03-07 18:07:34
1355,1,99202,350428198811255539,2,世纪华联,509353,0,150000,0,509353,0,2018-12-29 17:37:38,1,,2019-03-07 18:07:34
397,1,99202,342622196501150954,2,欣文便利店,209956,0,150000,0,209956,0,2018-12-28 15:16:16,2,,2019-03-07 18:07:34
1149,1,99202,130721197003122224,2,鲜花水果超市（不需要客服审核）,46879,0,200000,0,46879,0,2018-12-28 15:16:16,1,,2019-03-07 18:07:34
1528,1,99202,332522199507158590,2,世纪金龙桃洼村,576406,0,50000,50000,576406,0,2019-05-28 16:02:59,1,,2019-04-12 14:45:36
1407,1,99202,35042819731218601X,2,世纪超市君宜路,522293,0,50000,50000,522293,0,2019-04-18 16:20:53,1,,2019-04-12 14:42:51
958,1,99202,413024197312153252,2,烟酒店,423756,0,50000,50000,423756,0,2019-05-23 15:13:20,1,,2019-03-07 18:07:34
1519,1,99202,410185198706092085,2,真良家便民超市,574084,0,50000,50000,574084,0,2019-04-28 16:55:53,1,,2019-04-12 14:45:18
1526,1,99202,622627198109262212,2,特盛源生鲜超市,576259,0,50000,50000,576259,0,2019-04-29 16:27:17,1,,2019-04-12 14:45:34


In [379]:
## 需要查一下以下客户:有贷款记录，但不在白名单里
check_list = white_list.merge(zgc_customer_id,left_on='BIDNO',right_on='IDNO',how='right')
check_list = check_list[check_list.CREDITLimit.isna()]
check_list

Unnamed: 0,PRODID,MERCHANTNO,BIDNO,BIDTYPE,BIDNAME,PBIDNO,STATUS,MODELLIMIT,CREDITLimit,PUSERID,SIGNSTATUS,AUDITTIME,APPLYCOUNT,remark,UPDATETIME,IDNO


In [183]:
bank_loan[bank_loan['IDNO'].isin(check_list['IDNO'])]

Unnamed: 0,JNLNO,MERCHANTNO,PRODID,DUENO,CONTNO,IDTYPE,IDNO,IDNAME,LOANAMOUNT,LOANDATE,DUEDATE,NORMALRATES,PENALTYRATES,REPAYTYPE,CHANNELJNLNO,SendRouterJnlNo,CREATETIME,UPDATETIME,year_month
12142,010120190611000030236501000000,20180001,LN0026,20190010065062,20190604000000008670,ZJ01,362421199008232032,郭秀海,200000.0,2019-06-04,2021-06-04,5.65,18.0,1,32138220190604660165207992023899,32138220190604660165207992023899,2019-06-11 17:30:20,2019-06-11 17:30:20,2019-06
12148,010120190611000030248001000000,20180001,LN0026,20190010065102,20190605000000008706,ZJ01,332521197212142430,林海波,150000.0,2019-06-05,2021-06-05,5.65,18.0,1,32138220190605719243490992025052,32138220190605719243490992025052,2019-06-11 17:30:32,2019-06-11 17:30:32,2019-06
12151,010120190611000030248301000000,20180001,LN0026,20190010065084,20190605000000008687,ZJ01,371428198303261013,徐际旺,200000.0,2019-06-05,2021-06-05,5.65,18.0,1,32138220190605707959541992025611,32138220190605707959541992025611,2019-06-11 17:30:32,2019-06-11 17:30:32,2019-06
12296,010120190623000030901101000000,20180001,LN0026,20190010067655,20190622000000009429,ZJ01,110106196808052732,杨忠利,200000.0,2019-06-22,2021-06-22,5.65,18.0,1,32138220190622173418431992023932,32138220190622173418431992023932,2019-06-23 07:00:01,2019-06-23 07:00:01,2019-06
12299,010120190624000030938501000000,20180001,LN0026,20190010067718,20190622000000009425,ZJ01,411526198703226060,骆雪晴,50000.0,2019-06-23,2020-06-23,5.65,18.0,1,32138220190623286266256992029944,32138220190623286266256992029944,2019-06-24 07:00:01,2019-06-24 07:00:01,2019-06
12301,010120190625000030976101000000,20180001,LN0026,20190010067784,20190624000000009517,ZJ01,342423197707022077,陈程,150000.0,2019-06-24,2021-06-24,5.65,18.0,1,32138220190624375140236992029715,32138220190624375140236992029715,2019-06-25 07:00:01,2019-06-25 07:00:01,2019-06
12302,010120190625000030976201000000,20180001,LN0026,20190010067783,20190624000000009512,ZJ01,341623198602092321,纪娟,200000.0,2019-06-24,2021-06-24,5.65,18.0,1,32138220190624371368544992026224,32138220190624371368544992026224,2019-06-25 07:00:01,2019-06-25 07:00:01,2019-06
12303,010120190625000030976301000000,20180001,LN0026,20190010067780,20190624000000009514,ZJ01,130425198206254829,王莉敏,200000.0,2019-06-24,2021-06-24,5.65,18.0,1,32138220190624368113637992027186,32138220190624368113637992027186,2019-06-25 07:00:01,2019-06-25 07:00:01,2019-06
12304,010120190625000030976401000000,20180001,LN0026,20190010067781,20190624000000009515,ZJ01,230623198810141638,钱国辉,200000.0,2019-06-24,2021-06-24,5.65,18.0,1,32138220190624369776013992028057,32138220190624369776013992028057,2019-06-25 07:00:01,2019-06-25 07:00:01,2019-06
12342,010120190626000031024801000000,20180001,LN0026,20190010067816,20190625000000009532,ZJ01,110106197609222721,李静,200000.0,2019-06-25,2021-06-25,5.65,18.0,1,32138220190625451475879992026096,32138220190625451475879992026096,2019-06-26 07:00:01,2019-06-26 07:00:01,2019-06


#### 3.中商客户

In [942]:
today = convert_time('2019-11-01')
# audittime = convert_time('2019-07-01')

In [738]:
# path = '/Users/candi/Documents/项目资料/供应链金融资料/月报/data/zshm/data/ftp_data/'  ## ftp数据文件
path = '/Users/candi/Documents/项目资料/供应链金融资料/月报/data/zshm/data/ftp_data/'  ## ftp数据文件

## 中商ftp传过来的客户
company_all = walk_zip_files(path=path,pattern_csv='company.*')
company_all = company_all.drop_duplicates('company_id')
company_all = convert_time_col(company_all)




coverting :company_redister_time


In [687]:
# company = walk_zip_files(path=path,pattern_csv='company.*')

In [688]:
# company.shape

(609379, 16)

In [689]:
# company_0731 = walk_zip_files(path=path,pattern_zip='.*0731.*.zip',pattern_csv='company.*')

In [691]:
# company_1101 = walk_zip_files(path=path,pattern_zip='.*1101.*.zip',pattern_csv='company.*')

In [693]:
# company_0731.shape

(2258, 16)

In [686]:
# company_all.shape

(2372, 16)

In [846]:
white_list.columns

Index(['PRODID', 'MERCHANTNO', 'BIDNO', 'BIDTYPE', 'BIDNAME', 'PBIDNO',
       'STATUS', 'MODELLIMIT', 'CREDITLimit', 'PUSERID', 'SIGNSTATUS',
       'AUDITTIME', 'APPLYCOUNT', 'remark', 'UPDATETIME'],
      dtype='object')

In [847]:
company_all.columns

Index(['company_id', 'company_name', 'company_nickname', 'company_type',
       'company_certificate_type', 'company_certificate_num', 'city_name',
       's_name', 'district_name', 'company_address', 'company_redister_time',
       'company_limit', 'company_legal_person', 'company_legal_person_id',
       'company_coop_type', 'company_coop_status'],
      dtype='object')

In [380]:
## 授信客户
company_use = company_all.merge(white_list,left_on='company_id',right_on='PUSERID',how='right')
## 新增授信客户
company_use_new = company_use.merge(white_list[white_list.AUDITTIME >= audittime],
                                    left_on='company_id',right_on='PUSERID',how='right')

In [199]:
company_all.columns

Index(['company_id', 'company_name', 'company_nickname', 'company_type',
       'company_certificate_type', 'company_certificate_num', 'city_name',
       's_name', 'district_name', 'company_address', 'company_redister_time',
       'company_limit', 'company_legal_person', 'company_legal_person_id',
       'company_coop_type', 'company_coop_status'],
      dtype='object')

In [654]:
company_use.to_excel(result_path+'1-中商下游商户信息.xlsx')

#### 4.行方贷款客户

In [739]:
## 行方贷款客户
zgc_customer_id = bank_loan[['IDNO']].drop_duplicates()
zgc_customer_new_id = pd.concat([bank_loan[bank_loan['year_month'] < '2019-08'][['IDNO']].drop_duplicates(),
                                 bank_loan[bank_loan['year_month'] < '2019-08'][['IDNO']].drop_duplicates(),
         bank_loan[bank_loan['year_month'] == '2019-08'][['IDNO']].drop_duplicates()]).drop_duplicates(keep=False)
# zgc_customer = company_use[company_use['company_legal_person_id'].isin(zgc_customer_id.IDNO)]
zgc_customer = company_all.merge(white_list.merge(zgc_customer_id,left_on='BIDNO',right_on='IDNO',how='right'),
                                 left_on='company_id',right_on='PUSERID',how='right')
## 新增贷款客户信息
# zgc_customer_new = company_use[company_use['company_legal_person_id'].isin(zgc_customer_new_id.IDNO)]
zgc_customer_new = company_all.merge(white_list.merge(zgc_customer_new_id,left_on='BIDNO',right_on='IDNO',how='right'),
                                     left_on='company_id',right_on='PUSERID',how='right')

In [850]:
white_list.columns

Index(['PRODID', 'MERCHANTNO', 'BIDNO', 'BIDTYPE', 'BIDNAME', 'PBIDNO',
       'STATUS', 'MODELLIMIT', 'CREDITLimit', 'PUSERID', 'SIGNSTATUS',
       'AUDITTIME', 'APPLYCOUNT', 'remark', 'UPDATETIME'],
      dtype='object')

In [863]:
zgc_cus = company_all.merge(white_list[['BIDNO','PUSERID','CREDITLimit']].merge(zgc_customer_id,left_on='BIDNO',right_on='IDNO',how='outer'),
                                 left_on='company_id',right_on='PUSERID',how='outer')

In [852]:
zgc_cus

Unnamed: 0,company_id,company_name,company_nickname,company_type,company_certificate_type,company_certificate_num,city_name,s_name,district_name,company_address,company_redister_time,company_limit,company_legal_person,company_legal_person_id,company_coop_type,company_coop_status,BIDNO,PUSERID,CREDITLimit,IDNO
0,100420.0,福廉美超市老店（带ps机）,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110112MA007XX567,北京市,北京辖区,通州区,张家湾开发区张家湾开发区,2014-10-17 00:00:00,,郭爱英,371524197812125226,2.0,0.0,371524197812125226,100420,200000.0,371524197812125226
1,101044.0,北京金雅德超市,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,92110116L76799675Y,北京市,北京辖区,怀柔区,庙城镇西台下村,2014-10-20 00:00:00,,张英利,130724198301011737,2.0,0.0,130724198301011737,101044,200000.0,130724198301011737
2,102128.0,北京城乡·118便利店,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110115MA00E5XK07,北京市,北京辖区,大兴区,地盛西路,2014-10-24 00:00:00,,王宁霞,622427197909112380,7.0,0.0,622427197909112380,102128,200000.0,622427197909112380
3,102510.0,北京嗨家密云宾阳里店NO.0076（8open）,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110228L41129188N,北京市,北京辖区,密云县,行宫前街,2014-10-25 00:00:00,,孙祥鹏,410223198505074039,22.0,0.0,410223198505074039,102510,200000.0,410223198505074039
4,102588.0,京东便利店,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110228764248049X,北京市,北京辖区,密云县,密云城区车站路,2014-10-25 00:00:00,,郭春山,110228197311061535,1.0,0.0,110228197311061535,102588,200000.0,110228197311061535
5,103926.0,远东超市,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,92131082MA080BH7XH,北京市,北京辖区,廊坊市,燕郊镇迎宾路,2014-11-03 00:00:00,,周威,421202199110016235,2.0,0.0,421202199110016235,103926,100000.0,421202199110016235
6,105146.0,万家超市,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110105MA01AL4M5B,北京市,北京辖区,朝阳区,常营街道像素北区,2014-11-07 00:00:00,,李海辉,411282198210201513,1.0,0.0,411282198210201513,105146,200000.0,411282198210201513
7,105476.0,门头馨村世纪华联,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,92110108MAOOBY2379,北京市,北京辖区,海淀区,四季青街道香山南路,2014-11-08 00:00:00,,占金明,332529198701151739,2.0,0.0,332529198701151739,105476,200000.0,332529198701151739
8,106096.0,仟百嘉便利店（思明嘉莲街道）,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,92350203MA2YDFD22Y,福建省,厦门市,思明区,嘉莲街道禾丰新景,2014-11-11 00:00:00,,黄延英,350124197212116303,1.0,0.0,350124197212116303,106096,70000.0,350124197212116303
9,106136.0,全爱特（北京）商贸有限公司（冷链商品17点前收货）,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110102061310483T,北京市,北京辖区,朝阳区,容达路辅路,2014-11-12 00:00:00,,张淑萍,142202198012190765,1.0,0.0,142202198012190765,106136,200000.0,142202198012190765


In [856]:
company_cn = pd.read_excel('/Users/candi/Documents/项目资料/供应链金融资料/月报/中商下游字段名.xlsx',
                           sheet_name='企业信息表')
order_cn = pd.read_excel('/Users/candi/Documents/项目资料/供应链金融资料/月报/中商下游字段名.xlsx',
                         sheet_name='订单信息表')
company_cn_dict = {}
for en,cn in zip(company_cn['英文字段名'],company_cn['中文字段名']):
    company_cn_dict[en] = cn

order_cn_dict = {}
for en,cn in zip(order_cn['英文字段名'],order_cn['中文字段名']):
    order_cn_dict[en] = cn

In [858]:
order_cn_dict

{'company_certificate_num': '企业三证合一码',
 'company_contract_id': '出货仓库ID',
 'company_id': '客户id',
 'company_name': '企业名称',
 'order_buy_amt': '下单总金额',
 'order_buy_time': '下单时间',
 'order_discount_amt': '优惠金额',
 'order_id': '订单编号',
 'order_receive_amt': '收货总金额',
 'order_receive_time': '收货时间',
 'order_root_id': '订单头编号',
 'order_send_amt': '发货总金额',
 'order_send_time': '发货时间',
 'order_status': '订单状态',
 'order_storage_id': '出库单号'}

In [857]:
company_cn_dict

{'city_name': '省/直辖市',
 'company_address': '经营地址',
 'company_certificate_num': '统一社会信用代码',
 'company_certificate_type': '企业证照类型',
 'company_coop_status': '合作状态ID',
 'company_coop_type': '合作类型',
 'company_id': '企业id',
 'company_legal_person': '法人代表',
 'company_legal_person_id': '法人代表身份证号',
 'company_limit': '企业自有额度',
 'company_name': '企业名称',
 'company_nickname': '企业别名',
 'company_redister_time': '企业注册时间',
 'company_type': '企业类型',
 'district_name': '区',
 's_name': '市'}

In [854]:
order_upto20190910 = order_upto20190910.sort_values('order_buy_time')

In [864]:
order_upto20190910_cn = order_upto20190910.rename(columns=order_cn_dict)
zgc_cus_cn = zgc_cus.rename(columns=company_cn_dict)

In [861]:
order_upto20190910_cn.to_excel(
    '/Users/candi/Documents/项目资料/供应链金融资料/中商下游/to行方/中商下游全量订单_截至20190910.xlsx',index=False)

In [866]:
zgc_cus_cn.to_excel(
    '/Users/candi/Documents/项目资料/供应链金融资料/中商下游/to行方/中商下游企业信息.xlsx',index=False)

In [399]:
zgc_customer1 = white_list.merge(zgc_customer_id,left_on='BIDNO',right_on='IDNO',how='right')
zgc_customer1 = zgc_customer1.mrege()
zgc_customer_new1 = white_list.merge(zgc_customer_new_id,left_on='BIDNO',right_on='IDNO',how='right')

In [409]:
company_all.merge(white_list.merge(zgc_customer_id,left_on='BIDNO',right_on='IDNO',how='right'),left_on='company_id',right_on='PUSERID',how='right')

Unnamed: 0,company_id,company_name,company_nickname,company_type,company_certificate_type,company_certificate_num,city_name,s_name,district_name,company_address,...,STATUS,MODELLIMIT,CREDITLimit,PUSERID,SIGNSTATUS,AUDITTIME,APPLYCOUNT,remark,UPDATETIME,IDNO
0,100420.0,福廉美超市老店（带ps机）,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110112MA007XX567,北京市,北京辖区,通州区,张家湾开发区张家湾开发区,...,0,200000,200000,100420,0,2018-09-07 18:14:17,1,,2019-03-07 18:07:34,371524197812125226
1,101044.0,北京金雅德超市,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,92110116L76799675Y,北京市,北京辖区,怀柔区,庙城镇西台下村,...,0,200000,200000,101044,0,2018-09-07 18:14:17,0,初次申请,2019-03-07 18:07:34,130724198301011737
2,102128.0,北京城乡·118便利店,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110115MA00E5XK07,北京市,北京辖区,大兴区,地盛西路,...,0,200000,200000,102128,0,2018-09-07 18:14:17,1,,2019-03-07 18:07:34,622427197909112380
3,102510.0,北京嗨家密云宾阳里店NO.0076（8open）,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110228L41129188N,北京市,北京辖区,密云县,行宫前街,...,0,200000,200000,102510,0,2018-09-07 18:14:17,0,初次申请,2019-03-07 18:07:34,410223198505074039
4,102588.0,京东便利店,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110228764248049X,北京市,北京辖区,密云县,密云城区车站路,...,0,200000,200000,102588,0,2018-09-07 18:14:17,0,初次申请,2019-03-07 18:07:34,110228197311061535
5,103926.0,远东超市,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,92131082MA080BH7XH,北京市,北京辖区,廊坊市,燕郊镇迎宾路,...,0,100000,100000,103926,0,2018-09-07 18:14:17,0,初次申请,2019-03-07 18:07:34,421202199110016235
6,105146.0,万家超市,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110105MA01AL4M5B,北京市,北京辖区,朝阳区,常营街道像素北区,...,0,200000,200000,105146,0,2018-09-07 18:14:17,0,初次申请,2019-03-07 18:07:34,411282198210201513
7,105476.0,门头馨村世纪华联,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,92110108MAOOBY2379,北京市,北京辖区,海淀区,四季青街道香山南路,...,0,200000,200000,105476,0,2018-09-07 18:14:17,1,,2019-03-07 18:07:34,332529198701151739
8,106096.0,仟百嘉便利店（思明嘉莲街道）,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,92350203MA2YDFD22Y,福建省,厦门市,思明区,嘉莲街道禾丰新景,...,0,150000,70000,106096,0,2018-12-24 15:17:23,1,,2019-03-07 18:07:34,350124197212116303
9,106136.0,全爱特（北京）商贸有限公司（冷链商品17点前收货）,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110102061310483T,北京市,北京辖区,朝阳区,容达路辅路,...,0,200000,200000,106136,0,2018-09-07 18:14:17,0,初次申请,2019-03-07 18:07:34,142202198012190765


In [382]:
zgc_customer_id.shape,zgc_customer.shape

((1245, 1), (1245, 32))

In [166]:
company_all[company_all['company_legal_person_id'].isin(zgc_customer_id.IDNO)].shape

(1235, 16)

In [364]:
company_all[company_all['company_legal_person_id'].isin(zgc_customer_id.IDNO)].shape

(1241, 16)

## 中商缺的商户信息

In [167]:
## 5个商户有贷款记录，但中商没有给我们商户信息
company_all.merge(zgc_customer_id,left_on='company_legal_person_id',right_on='IDNO',how='right')

Unnamed: 0,city_name,company_address,company_certificate_num,company_certificate_type,company_coop_status,company_coop_type,company_id,company_legal_person,company_legal_person_id,company_limit,company_name,company_nickname,company_redister_time,company_type,district_name,s_name,IDNO
0,,张家湾开发区张家湾开发区,91110112MA007XX567,http://hmres.huimin100.cn/cms-huimin/picture/1...,0.0,2.0,100420.0,郭爱英,371524197812125226,,福廉美超市老店（带ps机）,,2014-10-17 00:00:00,1.0,,,371524197812125226
1,,庙城镇西台下村,92110116L76799675Y,http://hmres.huimin100.cn/cms-huimin/picture/1...,0.0,2.0,101044.0,张英利,130724198301011737,,北京金雅德超市,,2014-10-20 00:00:00,1.0,,,130724198301011737
2,,地盛西路,91110115MA00E5XK07,http://hmres.huimin100.cn/cms-huimin/picture/1...,0.0,7.0,102128.0,王宁霞,622427197909112380,,北京城乡·118便利店,,2014-10-24 00:00:00,1.0,,,622427197909112380
3,,行宫前街,91110228L41129188N,http://hmres.huimin100.cn/cms-huimin/picture/1...,0.0,22.0,102510.0,孙祥鹏,410223198505074039,,北京嗨家密云宾阳里店NO.0076（8open）,,2014-10-25 00:00:00,1.0,,,410223198505074039
4,,密云城区车站路,91110228764248049X,http://hmres.huimin100.cn/cms-huimin/picture/1...,0.0,1.0,102588.0,郭春山,110228197311061535,,京东便利店,,2014-10-25 00:00:00,1.0,,,110228197311061535
5,,燕郊镇迎宾路,92131082MA080BH7XH,http://hmres.huimin100.cn/cms-huimin/picture/1...,0.0,2.0,103926.0,周威,421202199110016235,,远东超市,,2014-11-03 00:00:00,1.0,,,421202199110016235
6,,常营街道像素北区,91110105MA01AL4M5B,http://hmres.huimin100.cn/cms-huimin/picture/1...,0.0,1.0,105146.0,李海辉,411282198210201513,,万家超市,,2014-11-07 00:00:00,1.0,,,411282198210201513
7,,四季青街道香山南路,92110108MAOOBY2379,http://hmres.huimin100.cn/cms-huimin/picture/1...,0.0,2.0,105476.0,占金明,332529198701151739,,门头馨村世纪华联,,2014-11-08 00:00:00,1.0,,,332529198701151739
8,,嘉莲街道禾丰新景,92350203MA2YDFD22Y,http://hmres.huimin100.cn/cms-huimin/picture/1...,0.0,1.0,106096.0,黄延英,350124197212116303,,仟百嘉便利店（思明嘉莲街道）,,2014-11-11 00:00:00,1.0,,,350124197212116303
9,,容达路辅路,91110102061310483T,http://hmres.huimin100.cn/cms-huimin/picture/1...,0.0,1.0,106136.0,张淑萍,142202198012190765,,全爱特（北京）商贸有限公司（冷链商品17点前收货）,,2014-11-12 00:00:00,1.0,,,142202198012190765


In [None]:
362202198704074029,350429199111212010,342529197306306814,332623197009170856,330625196408046736

#### 5.订单

In [365]:
order_old = pd.read_csv(
    '/Users/candi/Documents/项目资料/供应链金融资料/月报/data/zshm/data/trx_info_20181101-20190630.csv')
order_detail_old = pd.read_csv(
    '/Users/candi/Documents/项目资料/供应链金融资料/月报/data/zshm/data/trx_detail_info_20181101-20190630.csv')


In [366]:
## 订单
order_all = walk_zip_files(path=path+'201907/',pattern_csv='trx_info.*')

## 订单明细
order_detail = walk_zip_files(path=path+'201907/',pattern_csv='.*detail.*',pattern_zip='.*zip')

## 物流明细
# order_delivery = walk_zip_files(path=path+'201907/',pattern_csv='.*delivery.*',pattern_zip='.*zip')


In [367]:
order_all = pd.concat([order_old,order_all])
order_detail = pd.concat([order_detail_old,order_detail])

order_all = order_all.drop_duplicates('order_id')
order_all['year_month'] = order_all['order_buy_time'].apply(convert_time).astype(str).str[:7]
order_all = convert_time_col(order_all)

order_detail = order_detail.drop_duplicates()
order_detail = convert_time_col(order_detail)

# order_delivery = order_delivery.drop_duplicates()
# order_delivery = convert_time_col(order_delivery)


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  if __name__ == '__main__':


coverting :order_buy_time
coverting :order_receive_time
coverting :order_send_time
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan


In [742]:
order_miss_0613 = walk_zip_files(
    '/Users/candi/Documents/项目资料/供应链金融资料/月报/data/zshm/data/ftp_data/201906/',
    pattern_csv='trx_info.*',pattern_zip='.*0613.*')

In [744]:
order_201908 = walk_zip_files(path=path+'201908/',pattern_csv='trx_info.*')

In [867]:
order_201909 = walk_zip_files(path=path+'201909/',pattern_csv='trx_info.*')

In [None]:
order_201910 = walk_zip_files(path=path+'201910/',pattern_csv='trx_info.*')

In [746]:
order_all = pd.concat([order_201908,order_miss_0613,order_all]).drop_duplicates('order_id')

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  if __name__ == '__main__':


In [876]:
order_all_201909 = pd.concat([order_all,order_201909]).drop_duplicates('order_id')

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  if __name__ == '__main__':


In [881]:
order_all_201909['year_month'] = order_all_201909['order_buy_time'].apply(str).str[:7]

In [882]:
order_all_201909.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 144889 entries, 0 to 11917
Data columns (total 16 columns):
company_certificate_num    144889 non-null object
company_contract_id        144889 non-null int64
company_id                 144889 non-null int64
company_name               144889 non-null object
order_buy_amt              144889 non-null float64
order_buy_time             144889 non-null object
order_discount_amt         144889 non-null float64
order_id                   144889 non-null object
order_receive_amt          144889 non-null float64
order_receive_time         144889 non-null object
order_root_id              0 non-null float64
order_send_amt             144889 non-null float64
order_send_time            144865 non-null object
order_status               144889 non-null int64
order_storage_id           0 non-null float64
year_month                 144889 non-null object
dtypes: float64(6), int64(3), object(7)
memory usage: 18.8+ MB


In [841]:
order_all.shape,order_upto20190910.shape

((133000, 16), (137330, 15))

In [834]:
order_upto20190910 = walk_zip_files(path='/Users/candi/Documents/项目资料/供应链金融资料/月报/data/zshm/data/ftp_data/',
                                    pattern_csv='trx_info.*')

Wrong file full path is : /Users/candi/Documents/项目资料/供应链金融资料/月报/data/zshm/data/ftp_data/201906/99202_order_20190628.zip


In [840]:
order_upto20190910 = order_upto20190910.drop_duplicates('order_id')

In [842]:
white_list_all

Unnamed: 0,PRODID,MERCHANTNO,BIDNO,BIDTYPE,BIDNAME,PBIDNO,STATUS,MODELLIMIT,CREDITLimit,PUSERID,SIGNSTATUS,AUDITTIME,APPLYCOUNT,remark,UPDATETIME
1406,1,99202,370923197311240618,2,牛奶批发,522147,0,200000.0,0.0,522147,0,2018-12-29 17:37:38,1,,2019-03-07 18:07:34
634,1,99202,410923197802233036,2,汇宜副食超市,284967,0,150000.0,0.0,284967,0,2018-12-29 17:37:38,1,,2019-03-07 18:07:34
1355,1,99202,350428198811255539,2,世纪华联,509353,0,150000.0,0.0,509353,0,2018-12-29 17:37:38,1,,2019-03-07 18:07:34
397,1,99202,342622196501150954,2,欣文便利店,209956,0,150000.0,0.0,209956,0,2018-12-28 15:16:16,2,,2019-03-07 18:07:34
1149,1,99202,130721197003122224,2,鲜花水果超市（不需要客服审核）,46879,0,200000.0,0.0,46879,0,2018-12-28 15:16:16,1,,2019-03-07 18:07:34
1528,1,99202,332522199507158590,2,世纪金龙桃洼村,576406,0,50000.0,50000.0,576406,0,2019-05-28 16:02:59,1,,2019-04-12 14:45:36
1407,1,99202,35042819731218601X,2,世纪超市君宜路,522293,0,50000.0,50000.0,522293,0,2019-04-18 16:20:53,1,,2019-04-12 14:42:51
958,1,99202,413024197312153252,2,烟酒店,423756,0,50000.0,50000.0,423756,0,2019-05-23 15:13:20,1,,2019-03-07 18:07:34
1519,1,99202,410185198706092085,2,真良家便民超市,574084,0,50000.0,50000.0,574084,0,2019-04-28 16:55:53,1,,2019-04-12 14:45:18
1526,1,99202,622627198109262212,2,特盛源生鲜超市,576259,0,50000.0,50000.0,576259,0,2019-04-29 16:27:17,1,,2019-04-12 14:45:34


In [747]:
order_all.shape

(133000, 16)

In [717]:
order_detail_.drop_duplicates(['order_id','product_id']).shape

(1708150, 9)

In [749]:
order_all[order_all['order_send_time'].isna()]

Unnamed: 0,company_certificate_num,company_contract_id,company_id,company_name,order_buy_amt,order_buy_time,order_discount_amt,order_id,order_receive_amt,order_receive_time,order_root_id,order_send_amt,order_send_time,order_status,order_storage_id,year_month
6763,91110112MA01AB5B4N,104,110226,振华超市（百事邻里）,1040.7,2019-01-11 13:57:21,48.2,PM20017101110226471862405225,992.5,2019-01-12 15:27:39,,992.5,NaT,40,,2019-01
6764,92110106L33860237A,102,123844,春燕子超市,10500.0,2019-01-11 13:57:55,400.0,PM20017301123844471862741573,10100.0,2019-01-12 21:00:30,,10100.0,NaT,40,,2019-01
17994,92110108MA00WE0R0Y,105,260258,华联生活超市（阮航）,945.82,2019-03-01 20:46:51,144.2,PM22017501260258514444101309,801.62,2019-03-02 11:51:17,,801.62,NaT,40,,2019-03
18073,92110108L21455453U,105,273924,平价商店,862.0,2019-03-01 23:55:02,67.0,PM20017501273924514557018987,795.0,2019-03-02 15:02:47,,795.0,NaT,40,,2019-03
21339,92110109L06343699E,105,264023,物美价廉便利超市,1132.4,2019-03-01 23:48:36,120.0,PM20017501264023514553159469,1012.4,2019-03-03 16:28:53,,1012.4,NaT,40,,2019-03
26586,92370112MA3MBY064F,172,504681,湛记超市,767.55,2018-11-07 22:32:50,63.05,PM20016601504681416011698121,704.5,2018-11-08 09:57:17,,704.5,NaT,40,,2018-11
28294,92110106MA00FRJ54T,102,136125,京鑫便利超市,1386.1,2018-11-08 13:53:20,30.0,PM20016601136125416563993452,1356.1,2018-11-09 13:33:56,,1356.1,NaT,40,,2018-11
39204,350206830116783,151,34061,健福兴购物商场（湖里江头街道）,1340.5,2018-11-01 20:22:32,212.99,PM22016401034061410749521971,1127.51,2018-11-02 16:09:12,,1127.51,NaT,40,,2018-11
39368,110105604136959,104,98272,金泰宾馆超市,465.0,2018-10-31 19:22:07,44.73,PM20016501080681409849278390,420.27,2018-11-02 15:36:43,,420.27,NaT,40,,2018-10
41451,91110105569532896L,104,117601,世纪华联贵国酒店（北京点利实惠商贸有限公司,2181.3,2019-02-23 23:23:19,118.5,PM22017501117601509353986801,2062.8,2019-02-24 08:39:07,,2062.8,NaT,40,,2019-02


In [151]:
## 当月订单
# order_new = order_all[order_all['year_month'] == '2019-06']

## 当月订单详情
# order_detail_new = order_detail[order_detail['order_id'].isin(order_new.order_id)]

In [757]:
## 有贷款记录的商户的订单
order_all_ = order_all.merge(zgc_customer[['company_id','city_name','company_legal_person', 
                                           'company_legal_person_id','CREDITLimit']],
                             on='company_id',how='right').drop_duplicates('order_id')
## 有贷款记录的商户的当月订单
order_new_ = order_all_[order_all_['year_month'] == '2019-08']

order_detail_ = order_detail[order_detail['order_id'].isin(order_all_.order_id)]
## 当月订单详情
order_detail_new_ = order_detail[order_detail['order_id'].isin(order_new_.order_id)]

In [931]:
## 有贷款记录的商户的订单
order_all_201909_ = order_all_201909.merge(zgc_customer[['company_id','city_name','company_legal_person', 
                                           'company_legal_person_id','CREDITLimit']],
                             on='company_id',how='right').drop_duplicates('order_id')
## 有贷款记录的商户的当月订单
order_new_ = order_all_201909_[order_all_201909_['year_month'] == '2019-09']

In [756]:
white_list[white_list.PUSERID != white_list.PBIDNO]

Unnamed: 0,PRODID,MERCHANTNO,BIDNO,BIDTYPE,BIDNAME,PBIDNO,STATUS,MODELLIMIT,CREDITLimit,PUSERID,SIGNSTATUS,AUDITTIME,APPLYCOUNT,remark,UPDATETIME
957,1,99202,412722198706138759,2,鑫福隆超市,412722198706138759,0,200000.0,200000.0,42325,0,2018-09-07 18:14:17,0,初次申请,2019-03-07 18:07:34


In [708]:
order_detail_.product_name.unique().size

12901

In [714]:
order_detail_.shape,order_detail.shape,

((1717458, 9), (2258445, 8))

In [709]:
# order_detail.o -
(order_detail_.product_num*order_detail_.product_price).sum()

118420242.15999997

In [715]:
# order_detail.o -
(order_detail.product_num*order_detail.product_price).sum()

149436433.39

In [712]:
order_all_.order_receive_amt.sum()

104395205.41000001

In [None]:
# path = '/Users/candi/Documents/项目资料/供应链金融资料/月报/data/201906/zshm/ftp_data/'
# order = walk_zip_files(path=path,pattern_csv='trx_info.*',pattern_zip='.*zip')
# company = walk_zip_files(path=path,pattern_csv='company.*',pattern_zip='.*zip')
# order_detail = walk_zip_files(path=path,pattern_csv='.*detail.*',pattern_zip='.*zip')
# delivery = walk_zip_files(path=path,pattern_csv='delivery_info.*',pattern_zip='.*zip')

In [678]:
order_new.sort_values(['company_id','order_buy_time'])

Unnamed: 0,company_id,company_name,company_certificate_num,company_contract_id,order_root_id,order_id,order_buy_time,order_buy_amt,order_send_time,order_send_amt,order_discount_amt,order_storage_id,order_receive_time,order_receive_amt,order_status,year_month
83760,201,喜客超市,911101057999884853,105,,PM20017601000201596627685109,2019-06-04 23:39:28,2481.25,2019-06-05 08:32:35,2040.25,441.00,,2019-06-05 16:53:23,2040.25,40,2019-06
86520,201,喜客超市,911101057999884853,105,,PM20017601000201599226032996,2019-06-07 23:50:04,812.18,2019-06-08 08:06:10,703.18,109.00,,2019-06-08 13:16:59,703.18,40,2019-06
84628,201,喜客超市,911101057999884853,105,,PM20017601000201600933213732,2019-06-09 23:15:21,1389.30,2019-06-10 07:41:24,1178.30,211.00,,2019-06-10 12:39:41,1178.30,40,2019-06
80444,201,喜客超市,911101057999884853,105,,PM20017701000201606144264569,2019-06-16 00:00:27,1455.75,2019-06-17 07:22:23,1449.75,6.00,,2019-06-17 11:05:09,1449.75,40,2019-06
91193,201,喜客超市,911101057999884853,105,,PM20017701000201608704592031,2019-06-18 23:07:39,1702.34,2019-06-19 08:45:48,1691.34,11.00,,2019-06-19 12:59:39,1691.34,40,2019-06
85593,201,喜客超市,911101057999884853,105,,PM20017701000201610448126585,2019-06-20 23:33:33,1540.76,2019-06-21 07:54:13,1523.76,17.00,,2019-06-21 13:20:24,1523.76,40,2019-06
87903,201,喜客超市,911101057999884853,105,,PM20017701000201612184807751,2019-06-22 23:48:00,1348.50,2019-06-23 09:38:22,1327.50,21.00,,2019-06-23 16:19:52,1327.50,40,2019-06
88492,201,喜客超市,911101057999884853,105,,PM20017701000201613863437137,2019-06-24 22:25:44,827.24,2019-06-26 07:14:10,671.24,156.00,,2019-06-26 15:55:40,671.24,40,2019-06
90271,201,喜客超市,911101057999884853,105,,PM20017701000201618198328029,2019-06-29 22:50:32,1604.77,2019-06-30 09:18:03,1386.02,218.75,,2019-06-30 13:04:48,1386.02,40,2019-06
80814,313,北京嗨家豪杰店（NO.0093）,91110108MA0020TY0C,105,,LD20017601000313594401756999,2019-06-02 09:49:35,567.50,2019-06-03 01:53:23,462.50,105.00,,2019-06-03 12:31:16,462.50,40,2019-06


In [817]:
print('''中商惠民下游业务，授信商户数{0}家，授信总额度{1}亿，平均授信额度{2}万，总贷款客户{3}家，
  占总授信商户数的{4}%，贷款余额{5}万，分布在全国主要的{6}个省市地区。'''.format(
     white_list.shape[0],
     np.round(white_list.CREDITLimit.sum()/1e8,2),
     np.round(white_list.CREDITLimit.sum()/white_list.shape[0]/10000,1),
     bank_loan.IDNO.unique().size,
     np.round(bank_loan.IDNO.unique().size/white_list.shape[0]*100,2),
     np.round(bank_loan.LOANAMOUNT.sum()/10000-bank_repay.CAPITAL.sum()/10000,2),
     zgc_customer.dropna(subset=['city_name']).city_name.unique().size))

中商惠民下游业务，授信商户数1619家，授信总额度2.6亿，平均授信额度16.1万，总贷款客户1245家，
  占总授信商户数的76.9%，贷款余额6757.46万，分布在全国主要的11个省市地区。


## 2.2 商户按不同授信区间分布情况

In [392]:
company_limit = pd.pivot_table(zgc_customer,index='CREDITLimit',values=['BIDNO'],
                                      aggfunc={'BIDNO':unique},
                                      margins=True,margins_name='合计').reset_index()
company_limit = company_limit.rename(columns={'CREDITLimit':'授信额度','BIDNO':'商户数'})
company_limit['占比'] = company_limit['商户数']/company_limit.loc[len(company_limit)-1,'商户数']

In [400]:
zgc_customer_new1.PBIDNO

Unnamed: 0,PRODID,MERCHANTNO,BIDNO,BIDTYPE,BIDNAME,PBIDNO,STATUS,MODELLIMIT,CREDITLimit,PUSERID,SIGNSTATUS,AUDITTIME,APPLYCOUNT,remark,UPDATETIME,IDNO
0,1,99202,110225196909150344,2,学霞副食,88360,0,200000,200000,88360,0,2019-07-02 16:06:23,1,,2019-03-07 18:07:34,110225196909150344
1,1,99202,371322198703302722,2,微小超（自助售货机）,552898,0,200000,200000,552898,0,2019-07-22 18:23:05,1,,2019-03-28 11:59:58,371322198703302722
2,1,99202,14273019910709102X,2,晨光生鲜超市,563162,0,200000,200000,563162,0,2019-07-01 15:00:28,1,,2019-04-12 14:44:05,14273019910709102X
3,1,99202,142623199301162611,2,涛华玉丰（东）,211834,0,200000,200000,211834,0,2019-07-09 15:11:15,1,,2019-03-07 18:07:34,142623199301162611
4,1,99202,412821197810114428,2,烟酒大卖场,12681,0,200000,200000,12681,0,2019-07-02 16:06:23,1,,2019-03-07 18:07:34,412821197810114428
5,1,99202,130434198201012456,2,国际电子城快客便利店（送货带pos机）,469562,0,200000,200000,469562,0,2019-07-08 16:07:33,1,,2019-03-07 18:07:34,130434198201012456
6,1,99202,411525199110039364,2,中商惠民（东冉村店）,29427,0,200000,200000,29427,0,2019-07-15 17:27:17,1,,2019-03-07 18:07:34,411525199110039364


In [402]:
company_limit = pd.pivot_table(zgc_customer_new,index='CREDITLimit',values=['BIDNO'],
                                      aggfunc={'BIDNO':unique},
                                      margins=True,margins_name='合计').reset_index()
company_limit = company_limit.rename(columns={'CREDITLimit':'授信额度','BIDNO':'商户数'})
company_limit['占比'] = company_limit['商户数']/company_limit.loc[len(company_limit)-1,'商户数']

In [411]:
company_limit

Unnamed: 0,授信额度,商户数,占比
0,200000,7,1.0
1,合计,7,1.0


## 2.3商户数及授信总额按地区分布情况

In [412]:
## white_list中一部分已授信的商户未在中商用ftp传给我们的company表中
company_limit_city = pd.pivot_table(zgc_customer,index='city_name',values=['company_id','CREDITLimit'],
                                      aggfunc={'CREDITLimit':sum,'company_id':unique},
                                      margins=True,margins_name='合计').reset_index()
company_limit_city = company_limit_city.rename(columns={'city_name':'地区','CREDITLimit':'授信总额','company_id':'商户数'})
company_limit_city['户均授信额度'] = company_limit_city['授信总额']/company_limit_city['商户数']
company_limit_city['授信额度占比'] = company_limit_city['授信总额']/company_limit_city.loc[
                                                                len(company_limit_city)-1,'授信总额']
company_limit_city['商户数占比'] = company_limit_city['商户数']/company_limit_city.loc[len(company_limit_city)-1,'商户数']


In [698]:
zgc_customer[zgc_customer['city_name']=='安徽省'].CREDITLimit.apply(float).sum()

4450000.0

In [414]:
company_limit_city.sort_values('授信总额',ascending=False)

Unnamed: 0,地区,授信总额,商户数,户均授信额度,授信额度占比,商户数占比
11,合计,200700000,1239.0,161985.472155,1.0,1.0
1,北京市,77050000,404.0,190717.821782,0.383906,0.326069
0,上海市,28170000,192.0,146718.75,0.140359,0.154964
6,广东省,21200000,108.0,196296.296296,0.10563,0.087167
3,天津市,16140000,110.0,146727.272727,0.080419,0.088781
9,福建省,15930000,112.0,142232.142857,0.079372,0.090395
8,浙江省,15270000,103.0,148252.427184,0.076084,0.083132
5,山东省,13240000,92.0,143913.043478,0.065969,0.074253
2,四川省,5250000,35.0,150000.0,0.026158,0.028249
4,安徽省,4450000,45.0,98888.888889,0.022172,0.03632


## 2.4商户数及授信总额按成立年限分布情况

In [80]:
def parse_company_info(company):
    '''企业成立年限、法人年龄信息解析'''
    # 已转化时间格式
    company['foundation_years'] = company['company_redister_time'].apply(lambda x:(today-x).days/365)
    company['person_birthday'] = company['company_legal_person_id'].str[6:14].apply(convert_time)
    company['age'] = company['person_birthday'].apply(lambda x:(today-x).days/365)
    return company

In [429]:
def foundation_dist(company):
    foundation_bins = list(np.arange(0,5,0.5))
    foundation_bins.extend(list(np.arange(5,10,1)))
    foundation_bins.extend([10,15,20,np.inf])
    foundation_bins = sorted(set(foundation_bins))

    company['foundation_years_bins'] = pd.cut(company['foundation_years'],bins=foundation_bins,right=False)
    foundation_count = company.groupby('foundation_years_bins').agg({'CREDITLimit':sum,'company_id':})
    foundation_count['平均授信额度'] = foundation_count['CREDITLimit']/foundation_count['company_id']
    foundation_count['商户数占比'] = foundation_count['company_id']/foundation_count['company_id'].sum()
    foundation_count = foundation_count.reset_index()
    foundation_count = foundation_count.rename(columns={'company_id':'商户数','CREDITLimit':'授信总额',
                                                        'foundation_years_bins':'成立年限'})
    return foundation_count

In [430]:

## 全部贷款商户
company_use_ = parse_company_info(zgc_customer)
fc = foundation_dist(company_use_)

## 本月新增授信商户
# if not company_use_new.empty:
#     company_use_new_ = parse_company_info(company_use_new)
#     fc_new = foundation_dist(company_use_new_)

## 本月新增贷款商户
if not zgc_customer_new.empty:
    zgc_customer_new_ = parse_company_info(zgc_customer_new)
    fc_zgc_new = foundation_dist(zgc_customer_new_)

Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan
Wrong date format: nan


In [431]:
fc

Unnamed: 0,成立年限,授信总额,商户数,平均授信额度,商户数占比
0,"[0.0, 0.5)",300000,2.0,150000.0,0.001614
1,"[0.5, 1.0)",3100000,18.0,172222.222222,0.014528
2,"[1.0, 1.5)",9400000,59.0,159322.033898,0.047619
3,"[1.5, 2.0)",13290000,87.0,152758.62069,0.070218
4,"[2.0, 2.5)",16340000,108.0,151296.296296,0.087167
5,"[2.5, 3.0)",16690000,108.0,154537.037037,0.087167
6,"[3.0, 3.5)",48080000,314.0,153121.019108,0.25343
7,"[3.5, 4.0)",27480000,171.0,160701.754386,0.138015
8,"[4.0, 4.5)",18180000,111.0,163783.783784,0.089588
9,"[4.5, 5.0)",18390000,99.0,185757.575758,0.079903


In [135]:
fc_new

NameError: name 'fc_new' is not defined

In [432]:
fc_zgc_new

Unnamed: 0,成立年限,授信总额,商户数,平均授信额度,商户数占比
0,"[0.0, 0.5)",0,,,
1,"[0.5, 1.0)",200000,1.0,200000.0,0.142857
2,"[1.0, 1.5)",200000,1.0,200000.0,0.142857
3,"[1.5, 2.0)",0,,,
4,"[2.0, 2.5)",200000,1.0,200000.0,0.142857
5,"[2.5, 3.0)",0,,,
6,"[3.0, 3.5)",0,,,
7,"[3.5, 4.0)",200000,1.0,200000.0,0.142857
8,"[4.0, 4.5)",0,,,
9,"[4.5, 5.0)",200000,1.0,200000.0,0.142857


## 2.5商户数及授信总额按法人年龄分布情况

In [435]:
def age_dist(company):
    age_bins = list(np.arange(20,80,5))
    age_bins.extend([18,np.inf])
    age_bins = sorted(set(age_bins))

    company['age_bins'] = pd.cut(company['age'],bins=age_bins,right=False)
    age_count = company.groupby('age_bins').agg({'CREDITLimit':sum,'company_certificate_num':unique})
    age_count['平均授信额度'] = age_count['CREDITLimit']/age_count['company_certificate_num']
    age_count['法人数占比'] = age_count['company_certificate_num']/age_count['company_certificate_num'].sum()
    age_count = age_count.reset_index()
    age_count = age_count.rename(columns={'company_certificate_num':'法人数','CREDITLimit':'授信总额',
                                          'age_bins':'法人年龄'})
    return age_count

In [436]:
## 全部商户
ac = age_dist(zgc_customer)
## 新商户
if not zgc_customer_new.empty:
    ac_new = age_dist(zgc_customer_new)

In [437]:
ac

Unnamed: 0,法人年龄,授信总额,法人数,平均授信额度,法人数占比
0,"[18.0, 20.0)",150000,1.0,150000.0,0.00081
1,"[20.0, 25.0)",4920000,29.0,169655.172414,0.023501
2,"[25.0, 30.0)",24620000,151.0,163046.357616,0.122366
3,"[30.0, 35.0)",45690000,282.0,162021.276596,0.228525
4,"[35.0, 40.0)",47140000,288.0,163680.555556,0.233387
5,"[40.0, 45.0)",33230000,205.0,162097.560976,0.166126
6,"[45.0, 50.0)",30380000,185.0,164216.216216,0.149919
7,"[50.0, 55.0)",10920000,69.0,158260.869565,0.055916
8,"[55.0, 60.0)",3500000,23.0,152173.913043,0.018639
9,"[60.0, 65.0)",150000,1.0,150000.0,0.00081


In [438]:
ac_new

Unnamed: 0,法人年龄,授信总额,法人数,平均授信额度,法人数占比
0,"[18.0, 20.0)",0,,,
1,"[20.0, 25.0)",0,,,
2,"[25.0, 30.0)",600000,3.0,200000.0,0.428571
3,"[30.0, 35.0)",200000,1.0,200000.0,0.142857
4,"[35.0, 40.0)",200000,1.0,200000.0,0.142857
5,"[40.0, 45.0)",200000,1.0,200000.0,0.142857
6,"[45.0, 50.0)",200000,1.0,200000.0,0.142857
7,"[50.0, 55.0)",0,,,
8,"[55.0, 60.0)",0,,,
9,"[60.0, 65.0)",0,,,


# 3.	供应链业务情况分析

In [824]:
result_path = '/Users/candi/Documents/项目资料/供应链金融资料/月报/data/zshm/201908/中间结果数据/'

In [934]:
order_new = order_new_
order_all = order_all_201909

In [935]:
print('''中商下游传过来的数据中有{0}户商户，{1}笔订单，订单总金额共计{2}元，户均金额{10}元，
笔均订单金额{3}元，最大的单笔订单金额{4}元。其中，本月下单商户{5}户，{6}笔订单，订单总金额{7}元，
笔均{8}元，最大的单笔订单金额{9}元。'''.format(
            order_all.company_id.unique().size,
            order_all.shape[0],
            np.round(order_all.order_receive_amt.sum(),2),
            np.round(order_all.order_receive_amt.sum()/order_all.shape[0],2),
            order_all.order_receive_amt.max(),
            order_new.company_id.unique().size,
            order_new.shape[0],
            np.round(order_new.order_receive_amt.sum(),2),
            np.round(order_new.order_receive_amt.sum()/order_new.shape[0],2),
            order_new.order_receive_amt.max(),
            np.round(order_all.order_receive_amt.sum()/order_all.company_id.unique().size,2)
))

print('''中商下游传过来的订单数据中有过贷款记录的有{0}户商户，共{1}笔订单，订单总金额共计{2}元，户均金额{10}元，
笔均订单金额{3}元，最大的单笔订单金额{4}元。其中，本月下单商户{5}户，{6}笔订单，订单总金额{7}元，
笔均{8}元，最大的单笔订单金额{9}元。'''.format(
            order_all_.company_id.unique().size,
            order_all_.shape[0],
            np.round(order_all_.order_receive_amt.sum(),2),
            np.round(order_all_.order_receive_amt.sum()/order_all_.shape[0],2),
            order_all_.order_receive_amt.max(),
            order_new_.company_id.unique().size,
            order_new_.shape[0],
            np.round(order_new_.order_receive_amt.sum(),2),
            np.round(order_new_.order_receive_amt.sum()/order_new_.shape[0],2),
            order_new_.order_receive_amt.max(),
            np.round(order_all_.order_receive_amt.sum()/order_all_.company_id.unique().size,2)
))

中商下游传过来的数据中有2179户商户，144889笔订单，订单总金额共计162304099.19元，户均金额74485.59元，
笔均订单金额1120.2元，最大的单笔订单金额146896.0元。其中，本月下单商户931户，8558笔订单，订单总金额10262248.37元，
笔均1199.14元，最大的单笔订单金额48399.83元。
中商下游传过来的订单数据中有过贷款记录的有1231户商户，共101396笔订单，订单总金额共计116184589.66元，户均金额94382.28元，
笔均订单金额1145.85元，最大的单笔订单金额146896.0元。其中，本月下单商户931户，8558笔订单，订单总金额10262248.37元，
笔均1199.14元，最大的单笔订单金额48399.83元。


## 3.1商户业务情况总表

In [821]:
## 各商户每月订单统计
order_com_month = pd.pivot_table(order_all_,index=['company_certificate_num','company_id','city_name',
                                                   'company_legal_person', 'company_legal_person_id'],
               columns=['year_month'],values=['order_receive_amt','order_buy_amt',
                                              'order_discount_amt','order_id'],
               aggfunc={'order_receive_amt':sum,'order_buy_amt':sum,
                        'order_discount_amt':sum,'order_id':len},margins=True,margins_name='合计')
order_com_month = order_com_month.rename(columns={'company_id':'超市ID',
                                                  'city_name':'地区',
                                                  'order_receive_amt':'订单金额',
                                                  'order_buy_amt':'下单金额',
                                                  'order_discount_amt':'优惠金额',
                                                  'order_id':'订单次数'})

In [822]:
order_com_month

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,下单金额,下单金额,下单金额,下单金额,下单金额,下单金额,下单金额,下单金额,下单金额,下单金额,...,订单金额,订单金额,订单金额,订单金额,订单金额,订单金额,订单金额,订单金额,订单金额,订单金额
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,year_month,2018-10,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07,...,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07,合计
company_certificate_num,company_id,city_name,company_legal_person,company_legal_person_id,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2
040000200803030011,432988.0,上海市,李茂钟,35222719880723561X,,11425.42,10841.54,8095.31,7482.39,13962.32,7483.80,14497.60,11462.62,8312.71,...,9408.72,8944.94,6543.45,6300.84,12175.27,6282.90,12476.50,10448.72,7597.70,8.017904e+04
040002200907070005,430592.0,上海市,夏宝仙,342422196612084610,976.53,10830.76,12843.76,9906.35,6400.15,15558.17,17314.72,10278.34,9585.06,16622.60,...,9611.99,10686.82,8412.93,5341.86,13361.72,14511.51,9114.14,8300.37,15339.49,9.560026e+04
040004201206280010,549566.0,上海市,李进,340122198006214092,,10304.39,12076.64,13632.39,3207.15,4191.63,6490.30,6637.86,5170.97,10661.64,...,8541.54,10333.85,11473.74,2679.35,3482.88,5368.60,5452.46,4580.10,9064.68,6.097720e+04
040004201607140025,292990.0,上海市,潘景庭,341225199003188917,,4281.52,4006.25,6704.40,2497.54,6761.94,5678.90,4382.60,6174.20,7681.72,...,3544.72,3435.05,5547.21,2126.64,5540.04,4533.72,3701.00,5081.40,6354.04,3.986382e+04
040004201611230004,481568.0,上海市,黄漪燕,330681198812167820,735.15,21978.12,71344.93,,,,,974.20,1152.99,,...,19726.72,68724.63,,,,,826.30,941.50,,9.082570e+04
040004201708240021,488360.0,上海市,毛海霄,331081198412166333,,12657.98,10447.80,20290.31,6258.10,17219.49,11635.68,11723.42,31164.00,9461.02,...,11572.73,8832.74,17526.72,5460.60,14830.19,10260.88,10548.82,29375.20,8972.34,1.173802e+05
040005201112200015,439800.0,上海市,赖瑞清,350628197403033511,1178.25,17068.65,18359.99,12799.77,7044.29,20379.18,13739.13,11450.70,1161.10,6281.64,...,14979.33,16151.21,11397.09,6179.06,18596.74,12724.73,11012.20,964.50,5970.43,9.905369e+04
040008201706300027,426268.0,上海市,卫义,340803196403282777,,10722.13,8287.25,6592.23,4045.85,6880.11,9459.37,9419.68,6712.79,7081.97,...,9537.87,6930.05,5622.88,3631.36,5934.08,8590.48,8746.69,5772.20,6197.89,6.096350e+04
040009201406090013,429408.0,上海市,李业财,350429197210232018,,12379.58,8362.96,6527.28,4000.94,6532.18,10374.98,10887.57,11959.74,10190.20,...,10324.92,7028.56,5540.68,3273.29,5383.78,8457.58,8927.27,10066.07,8455.24,6.745739e+04
040009201709210021,500529.0,上海市,王业,320923198601142114,,18162.96,19685.48,15567.17,9363.99,12791.24,17116.92,15481.03,8092.61,9911.57,...,15685.77,17295.15,13561.46,7880.11,11137.65,15631.30,14218.00,7236.95,9045.06,1.116914e+05


In [655]:
order_com_month.to_excel(result_path+'./2-中商下游订单信息.xlsx')

### 3.1.1订单总金额排名

In [446]:
def order_com_top(order):
    order_com = pd.pivot_table(order,index=['company_certificate_num','company_id','city_name'],
                               aggfunc={'order_receive_amt':sum,'order_id':len})
    # 超市ID	地区	订单金额	订单数	笔均金额	金额占比
    order_com['笔均订单金额'] = np.round(order_com['order_receive_amt']/order_com['order_id'],2)
    order_com['订单金额占比'] = order_com['order_receive_amt']/order_com['order_receive_amt'].sum()
    order_com = order_com.rename(columns={'order_receive_amt':'订单金额','order_id':'订单数'})
    return order_com

order_com = order_com_top(order_all_)
order_com_thismonth = order_com_top(order_new_)

# 订单总金额top20
order_com_top20 = order_com.sort_values('订单金额',ascending=False)[:20]
order_com_thismonth_top20 = order_com_thismonth.sort_values('订单金额',ascending=False)[:20]

In [447]:
order_com_top20

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,订单数,订单金额,笔均订单金额,订单金额占比
company_certificate_num,company_id,city_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
92330521MA2B3M6A75,468173.0,浙江省,165,473353.15,2868.81,0.004534
91110109700298389Y,446311.0,北京市,481,390559.39,811.97,0.003741
91110115MA009LXN13,316544.0,北京市,164,374028.29,2280.66,0.003583
92310113MA1L1BKB04,541863.0,上海市,164,363816.12,2218.39,0.003485
330184600202639,269605.0,浙江省,213,345353.78,1621.38,0.003308
91110108MA01BFMC7P,547443.0,北京市,244,340454.65,1395.31,0.003261
91110105MA00AY6G31,406166.0,北京市,117,310984.14,2657.98,0.002979
91110105MA00FHRU2X,123004.0,北京市,248,300891.46,1213.27,0.002882
91310118MA1JL12D2W,438260.0,上海市,128,300162.89,2345.02,0.002875
92310113MA1L17X7XM,513268.0,上海市,243,295539.65,1216.21,0.002831


In [448]:
order_com_thismonth_top20

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,订单数,订单金额,笔均订单金额,订单金额占比
company_certificate_num,company_id,city_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
91120104103387228X,275146.0,天津市,21,96203.66,4581.13,0.007798
92110114L58053298J,551090.0,北京市,18,93602.59,5200.14,0.007587
92110112L37991365C,222009.0,北京市,5,71343.5,14268.7,0.005783
92510104MA6DH8QC8R,39333.0,四川省,4,60570.86,15142.72,0.00491
130013201202240346,284103.0,上海市,27,59730.77,2212.25,0.004842
91110105MA00BLBD0G,1773.0,北京市,48,57490.23,1197.71,0.00466
440111601585677,426520.0,广东省,3,54180.12,18060.04,0.004392
91110105MA00FHRU2X,123004.0,北京市,31,53762.42,1734.27,0.004358
92110111MA00MCCF9A,81494.0,北京市,10,52176.14,5217.61,0.004229
92110111MA00DT8C6E,220468.0,北京市,27,51216.81,1896.92,0.004151


### 3.1.2订单笔均金额排名

In [452]:
# 订单总金额top20
order_com_top20 = order_com.sort_values('笔均订单金额',ascending=False)[:20]
order_com_thismonth_top20 = order_com_thismonth.sort_values('笔均订单金额',ascending=False)[:20] 

In [453]:
order_com_top20

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,订单数,订单金额,笔均订单金额,订单金额占比
company_certificate_num,company_id,city_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
91350200MA349YMF0U,442472.0,福建省,3,150218.5,50072.83,0.001439
91350211MA2XQDCE95,257195.0,福建省,3,148718.0,49572.67,0.001425
91350211678275091L,379611.0,福建省,4,150081.0,37520.25,0.001438
91350212M0001F7D88,285889.0,福建省,6,149999.0,24999.83,0.001437
91350211751627090F,486593.0,福建省,3,70169.0,23389.67,0.000672
92500113MA5Y74U4XU,341715.0,重庆市,13,90386.14,6952.78,0.000866
92110106MA00K77347,63343.0,北京市,4,21589.7,5397.42,0.000207
91330101MA2809F75Q,499512.0,浙江省,42,223412.93,5319.36,0.00214
9112011368770883XQ,409310.0,天津市,20,99114.69,4955.73,0.000949
92131082MA0DDNT31T,560492.0,北京市,9,44214.82,4912.76,0.000424


In [454]:
order_com_thismonth_top20

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,订单数,订单金额,笔均订单金额,订单金额占比
company_certificate_num,company_id,city_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
92440101MA59H3NJ77,154644.0,广东省,2,44695.86,22347.93,0.003623
440111601585677,426520.0,广东省,3,54180.12,18060.04,0.004392
92510104MA6DH8QC8R,39333.0,四川省,4,60570.86,15142.72,0.00491
92110112L37991365C,222009.0,北京市,5,71343.5,14268.7,0.005783
440683600375601,269713.0,广东省,1,11533.4,11533.4,0.000935
91330101MA2809F75Q,499512.0,浙江省,3,29697.93,9899.31,0.002407
510112600050216,450220.0,四川省,1,9396.89,9396.89,0.000762
440121600066906,205119.0,广东省,2,17090.7,8545.35,0.001385
911202237972903744,457495.0,天津市,1,7716.2,7716.2,0.000625
92510112MA6CM40MXX,444942.0,四川省,2,15367.8,7683.9,0.001246


## 3.2商户数、订单数及订单金额按月份分布图

In [455]:
order_month_count = pd.pivot_table(order_all_,index='year_month',values=['company_id','order_receive_amt','order_id'],
               aggfunc={'company_id':unique,'order_receive_amt':sum,'order_id':unique},
               margins=True).reset_index()
order_month_count = order_month_count.rename(columns={'year_month':'月份','company_id':'活跃商户数',
                                                      'order_receive_amt':'订单金额','order_id':'订单数'})
order_month_count['户均订单金额'] = order_month_count['订单金额']/order_month_count['活跃商户数']
order_month_count['笔均订单金额'] = order_month_count['订单金额']/order_month_count['订单数']
order_month_count['订单金额占比'] = order_month_count['订单金额']/order_month_count.loc[len(order_month_count)-1,'订单金额']

In [456]:
order_month_count

Unnamed: 0,月份,活跃商户数,订单数,订单金额,户均订单金额,笔均订单金额,订单金额占比
0,2018-10,277.0,359,368687.1,1331.000542,1026.983705,0.003532
1,2018-11,1063.0,10106,11330040.0,10658.547695,1121.119751,0.10853
2,2018-12,1073.0,11115,12805570.0,11934.36548,1152.09844,0.122664
3,2019-01,1082.0,12005,15569930.0,14389.950952,1296.953514,0.149144
4,2019-02,1022.0,5812,6492242.0,6352.487006,1117.040902,0.062189
5,2019-03,1057.0,10263,11338750.0,10727.298638,1104.818733,0.108614
6,2019-04,1079.0,10229,11346840.0,10516.071566,1109.281574,0.108691
7,2019-05,1076.0,11376,12161510.0,11302.51592,1069.049502,0.116495
8,2019-06,1096.0,9761,10644620.0,9712.248604,1090.526019,0.101965
9,2019-07,1076.0,10438,12337010.0,11465.624322,1181.932532,0.118176


In [458]:
order_month_count.to_excel(result_path+'./每月订单统计.xlsx')

## 3.3商户数、订单数及订单金额按地区分布图

In [465]:
def order_city_dist(order):
    order_city_count = pd.pivot_table(order,index='city_name',values=['company_id','order_receive_amt','order_id'],
                   aggfunc={'company_id':unique,'order_receive_amt':sum,'order_id':unique},margins=True).reset_index()
    order_city_count = order_city_count.rename(columns={'city_name':'地区','company_id':'活跃商户数',
                                                        'order_receive_amt':'订单金额','order_id':'订单数'})
    order_city_count['户均订单金额'] = np.round(order_city_count['订单金额']/order_city_count['活跃商户数'],2)
    order_city_count['笔均订单金额'] = np.round(order_city_count['订单金额']/order_city_count['订单数'],2)
    order_city_count['订单金额占比'] = order_city_count['订单金额']/order_city_count.loc[len(order_city_count)-1,'订单金额']
    return order_city_count

In [466]:
order_city_count = order_city_dist(order_all_)
order_city_new_count = order_city_dist(order_new_)

In [460]:
order_city_count

Unnamed: 0,地区,活跃商户数,订单数,订单金额,户均订单金额,笔均订单金额,订单金额占比
0,上海市,192.0,14592,19583360.0,101996.65,1342.06,0.187589
1,北京市,404.0,41022,40582870.0,100452.66,989.3,0.388743
2,四川省,30.0,1322,2291840.0,76394.67,1733.62,0.021954
3,天津市,109.0,6621,9741315.0,89369.86,1471.28,0.093312
4,安徽省,45.0,1759,2549658.0,56659.06,1449.49,0.024423
5,山东省,91.0,5532,5544684.0,60930.59,1002.29,0.053112
6,广东省,108.0,6504,5876825.0,54415.04,903.57,0.056294
7,河北省,3.0,283,214525.6,71508.52,758.04,0.002055
8,浙江省,102.0,8267,11042050.0,108255.38,1335.68,0.105772
9,福建省,112.0,4373,5427920.0,48463.57,1241.23,0.051994


In [469]:
order_city_new_count.sort_values('订单金额',ascending=False)

Unnamed: 0,地区,活跃商户数,订单数,订单金额,户均订单金额,笔均订单金额,订单金额占比
11,All,1076.0,10438,12337011.77,11465.62,1181.93,1.0
1,北京市,385.0,4787,5104597.98,13258.7,1066.35,0.413763
0,上海市,184.0,2105,2676299.18,14545.1,1271.4,0.216933
8,浙江省,93.0,1016,1259726.58,13545.45,1239.89,0.10211
3,天津市,97.0,703,1171687.94,12079.26,1666.7,0.094973
6,广东省,87.0,672,759202.8,8726.47,1129.77,0.061539
5,山东省,79.0,505,560080.85,7089.63,1109.07,0.045398
9,福建省,80.0,369,390258.43,4878.23,1057.61,0.031633
2,四川省,12.0,31,153271.72,12772.64,4944.25,0.012424
10,重庆市,25.0,138,137355.3,5494.21,995.33,0.011134


In [461]:
order_city_count.to_excel(result_path+'./各地区订单统计.xlsx')

## 3.4商户数按订单总金额分布情况

In [470]:
def amt_dist(company_month_amt):
    amt_bins = list(np.arange(0,10000,1000))
    amt_bins.extend(list(np.arange(10000,20000,2000)))
    amt_bins.extend(list(np.arange(20000,100000,5000)))
    amt_bins.extend([100000,150000,np.inf]) 
    amt_bins = sorted(set(amt_bins))

    ## 月均交易金额分布
    company_month_amt['amt_bins'] = pd.cut(company_month_amt['月交易金额'],bins=amt_bins)
    avg_amt_count = company_month_amt.groupby('amt_bins').agg({'超市ID':len,}).fillna(0).reset_index()
    avg_amt_count = avg_amt_count.rename(columns={'超市ID':'商户数','amt_bins':'月均订单金额'})
    avg_amt_count['商户占比'] = avg_amt_count['商户数'] / avg_amt_count['商户数'].sum()
    avg_amt_count['累计占比'] = np.cumsum(avg_amt_count['商户占比'])
    return avg_amt_count

In [889]:
avg_amt_month = order_analysis(convert_time_col(order_all_201909_)) ## 月均订单统计
avg_amt_new = order_analysis(convert_time_col(order_new)) ## 当月订单统计

amt_count = amt_dist(avg_amt_month)
amt_count_new = amt_dist(avg_amt_new)



coverting :order_buy_time
Wrong date format: NaT
coverting :order_receive_time
Wrong date format: NaT
coverting :order_send_time
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


coverting :order_buy_time
coverting :order_send_time
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
Wrong date format: NaT
coverting :order_receive_time


In [827]:
amt_count

Unnamed: 0,月均订单金额,商户数,商户占比,累计占比
0,"(0.0, 1000.0]",64.0,0.041857,0.041857
1,"(1000.0, 2000.0]",124.0,0.081099,0.122956
2,"(2000.0, 3000.0]",116.0,0.075867,0.198823
3,"(3000.0, 4000.0]",131.0,0.085677,0.2845
4,"(4000.0, 5000.0]",101.0,0.066056,0.350556
5,"(5000.0, 6000.0]",88.0,0.057554,0.40811
6,"(6000.0, 7000.0]",91.0,0.059516,0.467626
7,"(7000.0, 8000.0]",77.0,0.05036,0.517986
8,"(8000.0, 9000.0]",73.0,0.047744,0.565729
9,"(9000.0, 10000.0]",85.0,0.055592,0.621321


In [762]:
amt_count_new

Unnamed: 0,月均订单金额,商户数,商户占比,累计占比
0,"(0.0, 1000.0]",497.0,0.301761,0.301761
1,"(1000.0, 2000.0]",361.0,0.219186,0.520947
2,"(2000.0, 3000.0]",253.0,0.153613,0.67456
3,"(3000.0, 4000.0]",183.0,0.111111,0.785671
4,"(4000.0, 5000.0]",131.0,0.079539,0.865209
5,"(5000.0, 6000.0]",84.0,0.051002,0.916211
6,"(6000.0, 7000.0]",44.0,0.026715,0.942927
7,"(7000.0, 8000.0]",25.0,0.015179,0.958106
8,"(8000.0, 9000.0]",27.0,0.016393,0.974499
9,"(9000.0, 10000.0]",8.0,0.004857,0.979356


In [None]:
amt_count.to_excel(result_path+'./商户月均交易金额分布.xlsx')
amt_count_new.to_excel(result_path+'./商户本月交易金额分布.xlsx')
avg_amt_month.to_excel(result_path+'./商户交易情况.xlsx')

## 3.5商户数单笔订单金额区间分布

In [476]:
amt_bins = list(np.arange(0,2000,100))
amt_bins.extend(list(np.arange(2000,10000,1000)))
amt_bins.extend(list(np.arange(10000,50000,10000)))
amt_bins.extend(list(np.arange(50000,200000,50000)))
amt_bins.extend([200000,np.inf]) 
amt_bins = sorted(set(amt_bins))

def order_amt_dist(order):
    order['amt_bins'] = pd.cut(order['order_receive_amt'],bins=amt_bins) # 分区间

    order_count = order.groupby('amt_bins').agg({'company_id':unique,'order_id':len}).fillna(0).reset_index()
    order_count = order_count.rename(columns={'company_id':'商户数','order_id':'订单数','amt_bins':'单笔订单金额'})
    order_count['订单数占比'] = order_count['订单数'] / order_count['订单数'].sum()
    order_count['累计占比'] = np.cumsum(order_count['订单数占比'])
    return order_count

order_amt_count = order_amt_dist(order_all_) ## 所有订单金额分布
order_amt_new_count = order_amt_dist(order_new_) ## 本月订单金额分布

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [477]:
order_amt_count

Unnamed: 0,单笔订单金额,商户数,订单数,订单数占比,累计占比
0,"(0.0, 100.0]",307.0,5495.0,0.060118,0.060118
1,"(100.0, 200.0]",317.0,2261.0,0.024736,0.084854
2,"(200.0, 300.0]",348.0,2833.0,0.030994,0.115848
3,"(300.0, 400.0]",362.0,1626.0,0.017789,0.133637
4,"(400.0, 500.0]",850.0,5546.0,0.060676,0.194313
5,"(500.0, 600.0]",938.0,6686.0,0.073148,0.267461
6,"(600.0, 700.0]",956.0,5557.0,0.060796,0.328257
7,"(700.0, 800.0]",1019.0,8351.0,0.091364,0.419621
8,"(800.0, 900.0]",1056.0,7700.0,0.084241,0.503862
9,"(900.0, 1000.0]",996.0,5826.0,0.063739,0.567601


In [478]:
order_amt_new_count

Unnamed: 0,单笔订单金额,商户数,订单数,订单数占比,累计占比
0,"(0.0, 100.0]",162.0,569.0,0.054523,0.054523
1,"(100.0, 200.0]",135.0,272.0,0.026064,0.080586
2,"(200.0, 300.0]",142.0,356.0,0.034113,0.114699
3,"(300.0, 400.0]",148.0,303.0,0.029034,0.143733
4,"(400.0, 500.0]",211.0,377.0,0.036125,0.179858
5,"(500.0, 600.0]",271.0,513.0,0.049157,0.229015
6,"(600.0, 700.0]",299.0,556.0,0.053277,0.282292
7,"(700.0, 800.0]",422.0,1030.0,0.098697,0.380989
8,"(800.0, 900.0]",481.0,1051.0,0.100709,0.481698
9,"(900.0, 1000.0]",365.0,628.0,0.060176,0.541874


In [None]:
.to_excel(result_path+'./单笔订单金额分布.xlsx')

## 3.6商户订单品类情况

In [706]:
order_detail_.product_name.unique().size

12901

In [479]:
def order_product_dist(order_detail):
    order_detail['amt'] = order_detail['product_num']*order_detail['product_price']

    product_count = order_detail.pivot_table(index='product_name',
                                             aggfunc={'amt':sum,'order_id':unique,'company_id':unique}
                                ).sort_values('amt',ascending=False).reset_index()
    product_count = product_count.rename(columns={'product_name':'产品名称','company_id':'下单商户数',
                                                  'order_id':'订单数','amt':'订单金额'})
    product_count['订单数占比'] = np.round(product_count['订单数']/product_count['订单数'].sum(),4)
    product_count['金额占比'] = np.round(product_count['订单金额']/product_count['订单金额'].sum(),4)
    product_count = product_count.sort_values('订单金额',ascending=False).reset_index(drop=True)
    return product_count

product_count = order_product_dist(order_detail_)
product_new_count = order_product_dist(order_detail_new_)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [481]:
product_count[:20]

Unnamed: 0,产品名称,订单金额,下单商户数,订单数,订单数占比,金额占比
0,红牛250ml听装,5404061.31,1090,14254,0.0083,0.0456
1,中国体育彩票电彩机,2300000.0,23,23,0.0,0.0194
2,鲁花5S压榨一级花生油5L赠品装,1831015.49,217,519,0.0003,0.0155
3,可口可乐500ml,1153126.29,1072,10365,0.0061,0.0097
4,36°牛栏山百年400ml,1135310.0,195,613,0.0004,0.0096
5,康师傅冰红茶1L,914409.08,1021,10012,0.0059,0.0077
6,统一阿萨姆奶茶500ml,880954.57,1092,10004,0.0059,0.0074
7,燕京10度特质啤酒330ml,844467.5,383,4984,0.0029,0.0071
8,百岁山天然矿泉水570ml,670804.07,931,7225,0.0042,0.0057
9,可口雪碧500ml,668988.31,1006,6227,0.0036,0.0056


In [704]:
product_count.shape

(12901, 6)

In [482]:
product_new_count[:20]

Unnamed: 0,产品名称,订单金额,下单商户数,订单数,订单数占比,金额占比
0,红牛250ml听装,786747.95,631,1724,0.0083,0.0578
1,东鹏特饮维生素功能饮料500ml瓶装,229450.28,130,319,0.0015,0.0169
2,可口可乐500ml,159297.93,680,1766,0.0085,0.0117
3,康师傅冰红茶1L,154568.96,580,1559,0.0075,0.0114
4,52°五粮液歪嘴100ml,124352.0,6,10,0.0,0.0091
5,康师傅冰红茶500ml,120571.52,640,1532,0.0074,0.0089
6,百岁山天然矿泉水570ml,103208.1,513,1155,0.0056,0.0076
7,燕京10度特质啤酒330ml,101121.0,260,675,0.0033,0.0074
8,百威啤酒500ml*3*6听装,97805.55,246,436,0.0021,0.0072
9,怡宝饮用纯净水555ml,97100.97,507,1378,0.0066,0.0071


In [None]:
# product_count.to_excel(result_path+'./品类情况.xlsx')

## 3.7商户订单物流天数情况

In [483]:
def delivery_dist(order):
    order['rec_buy_days'] = (order['order_receive_time'] - order['order_buy_time']).dt.days
    order['send_buy_days'] = (order['order_send_time'] - order['order_buy_time']).dt.days
    order['rec_send_days'] = (order['order_receive_time'] - order['order_send_time']).dt.days

    # 订单下单到完成总时间情况
    order_rec_buy_days = order.groupby(['rec_buy_days']).agg({'order_id':len})
    order_rec_buy_days['订单数占比'] = order_rec_buy_days['order_id']/order_rec_buy_days['order_id'].sum()
    # 备货天数
    order_send_buy_days = order.groupby(['send_buy_days']).agg({'order_id':len})
    order_send_buy_days['订单数占比'] = order_send_buy_days['order_id']/order_send_buy_days['order_id'].sum()
    # 物流天数
    order_rec_send_days = order.groupby(['rec_send_days']).agg({'order_id':len})
    order_rec_send_days['订单数占比'] = order_rec_send_days['order_id']/order_rec_send_days['order_id'].sum()
    return order_rec_buy_days,order_send_buy_days,order_rec_send_days

order_rec_buy_days,order_send_buy_days,order_rec_send_days = delivery_dist(order_all_)
order_rec_buy_days_new,order_send_buy_days_new,order_rec_send_days_new = delivery_dist(order_new_)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [490]:
def delivery_outlier(order):
    order['rec_buy_days'] = (order['order_receive_time'] - order['order_buy_time']).dt.days
    order['send_buy_days'] = (order['order_send_time'] - order['order_buy_time']).dt.days
    order['rec_send_days'] = (order['order_receive_time'] - order['order_send_time']).dt.days
    unnormal_order = order[order['rec_send_days'] < 0]
    return unnormal_order

unnormal_order = delivery_outlier(order_all_)
unnormal_order.to_excel(result_path+'./收货时间早于发货时间的异常订单.xlsx',index=False)

In [489]:
order_rec_send_days_new

Unnamed: 0_level_0,order_id,订单数占比
rec_send_days,Unnamed: 1_level_1,Unnamed: 2_level_1
-1.0,5,0.000479
0.0,10090,0.966753
1.0,185,0.017725
2.0,141,0.01351
3.0,13,0.001246
4.0,1,9.6e-05
5.0,1,9.6e-05
7.0,1,9.6e-05


# 4.	供应链金融业务分析

In [943]:
bank_results = bank_datasets(bank_loan,bank_repay)

In [944]:
bank_results['loan_balance_count']

Unnamed: 0,月份,贷款次数,贷款企业数,贷款金额,次均贷款金额,还款金额,还款次数,还款企业数,次均还款金额,贷款余额
0,2018-08,3.0,3.0,400000.0,133333.333333,0.0,0.0,0.0,0.0,400000.0
1,2018-09,689.0,637.0,75565000.0,109673.439768,18293.89,3.0,3.0,6097.963333,75946710.0
2,2018-10,139.0,136.0,16145000.0,116151.079137,4220510.0,702.0,639.0,6012.121895,87871200.0
3,2018-11,223.0,219.0,27559810.0,123586.606771,5701956.0,847.0,755.0,6731.942916,109729100.0
4,2018-12,76.0,74.0,6180000.0,81315.789474,7840470.0,1061.0,948.0,7389.698294,108068600.0
5,2019-01,65.0,64.0,8955000.0,137769.230769,8022464.0,1116.0,1000.0,7188.588244,109001100.0
6,2019-02,16.0,16.0,1615000.0,100937.5,7664375.0,1164.0,1034.0,6584.514244,102951700.0
7,2019-03,42.0,40.0,4840000.0,115238.095238,8834776.0,1171.0,1032.0,7544.642186,98956970.0
8,2019-04,63.0,60.0,8605000.0,136587.301587,7637475.0,1120.0,1001.0,6819.174187,99924490.0
9,2019-05,46.0,44.0,6260000.0,136086.956522,8503882.0,1167.0,1037.0,7286.959494,97680610.0


In [945]:
bank_results['loan_balance_count'].to_excel(
    '/Users/candi/Documents/项目资料/供应链金融资料/月报/data/zshm/201910/zshm_loan_balance_count.xlsx',index=False)

In [930]:
bank_results['compensation_detail'][bank_results['compensation_detail']['repay_month']=='2019-09']

Unnamed: 0,JNLNO,MERCHANTNO,PRODID,DUENO,CONTNO,IDTYPE,IDNO,IDNAME,LOANAMOUNT,LOANDATE,...,REPAYDATE,CAPITAL,REPAYSOURCE,NORMALINTEREST,PENALTYINTEREST,repay_month,repay_loan_days,overdue_days,due_loan_days,repay_loan_days_bins
135,010120181127000060046201000000,20180001,LN0026,20180010020286,20180904000000001236,ZJ01,352129197204273011,马寿全,50000.0,2018-09-04,...,2019-09-12,4275.18,3,12.75,17.1,2019-09,373,8,365,"[360.0, 540.0)"
651,010120181127000060093101000000,20180001,LN0026,20180010020458,20180906000000001321,ZJ01,23080519850831003X,冯文学,150000.0,2018-09-06,...,2019-09-23,6232.04,3,392.42,24.93,2019-09,382,-349,731,"[360.0, 540.0)"
663,010120181127000060093301000000,20180001,LN0026,20180010020463,20180906000000001332,ZJ01,350725198404044053,王志钦,60000.0,2018-09-06,...,2019-09-14,5130.17,3,16.91,20.53,2019-09,373,8,365,"[360.0, 540.0)"
920,010120181127000060098601000000,20180001,LN0026,20180010020509,20180906000000001323,ZJ01,330322197508301617,尤国庆,150000.0,2018-09-06,...,2019-09-23,6232.04,3,392.42,24.93,2019-09,382,-349,731,"[360.0, 540.0)"
2260,010120181127000060157101000000,20180001,LN0026,20180010021011,20180907000000001536,ZJ01,350429197212081049,李花,150000.0,2018-09-09,...,2019-09-23,6232.04,3,392.42,24.93,2019-09,379,-352,731,"[360.0, 540.0)"
2459,010120181127000060169001000000,20180001,LN0026,20180010021257,20180909000000001630,ZJ01,51032119900110379X,曹建康,150000.0,2018-09-10,...,2019-09-23,6232.04,3,392.42,24.93,2019-09,378,-353,731,"[360.0, 540.0)"
3320,010120181127000060275001000000,20180001,LN0026,20180010021362,20180911000000001854,ZJ01,500234199207102738,唐刚军,100000.0,2018-09-11,...,2019-09-23,5619.51,3,187.85,22.48,2019-09,377,-170,547,"[360.0, 540.0)"
3595,010120181127000060278901000000,20180001,LN0026,20180010021544,20180911000000002019,ZJ01,230281198811043919,戴云龙,200000.0,2018-09-11,...,2019-09-23,8309.39,3,523.22,33.23,2019-09,377,-354,731,"[360.0, 540.0)"
4492,010120181127000060328801000000,20180001,LN0026,20180010021752,20180912000000002120,ZJ01,330681198812167820,黄漪燕,120000.0,2018-09-13,...,2019-09-23,6743.41,3,225.42,26.97,2019-09,375,-172,547,"[360.0, 540.0)"
6143,010120181127000060442701000000,20180001,LN0026,20180010023533,20180917000000002372,ZJ01,350823198606184612,李华林,50000.0,2018-09-19,...,2019-09-23,2809.76,3,93.92,11.23,2019-09,369,-178,547,"[360.0, 540.0)"


In [269]:
bank_repay.columns

Index(['JNLNO', 'HJJNLNO', 'HJGLOBALJNLNO', 'MERCHANTNO', 'PRODID', 'DUENO',
       'CONTNO', 'IDTYPE', 'IDNO', 'IDNAME', 'REPAYDATE', 'CAPITAL',
       'NORMALINTEREST', 'PENALTYINTEREST', 'COMPOUNDINTEREST', 'REPAYACCNO',
       'REPAYSOURCE', 'CHANNELJNLNO', 'SendRouterJnlNo', 'CREATETIME',
       'UPDATETIME', 'REPAYMONTH', 'RATE', 'PROFITSHARING', 'PROFITAMOUNT',
       'year_month'],
      dtype='object')

In [675]:
bank_loan.columns

Index(['JNLNO', 'MERCHANTNO', 'PRODID', 'DUENO', 'CONTNO', 'IDTYPE', 'IDNO',
       'IDNAME', 'LOANAMOUNT', 'LOANDATE', 'DUEDATE', 'NORMALRATES',
       'PENALTYRATES', 'REPAYTYPE', 'CHANNELJNLNO', 'SendRouterJnlNo',
       'CREATETIME', 'UPDATETIME', 'year_month', 'due_month', 'diff'],
      dtype='object')

In [829]:
print('''中商下游合作商户数{0}户，有贷款记录的客户{1}户，累计放款金额{2}元，
户均贷款{3}元，累计还款{4}元，贷款余额{5}元；代偿了{6}户，共代偿{7}次，代偿金额{8}元'''.format(
        white_list.shape[0],
        bank_loan['IDNO'].unique().size,
        bank_loan['LOANAMOUNT'].sum(),
        np.round(bank_loan['LOANAMOUNT'].sum()/bank_loan['IDNO'].unique().size,2),
        bank_repay['CAPITAL'].sum(),
        np.round(bank_loan['LOANAMOUNT'].sum() - bank_repay['CAPITAL'].sum(),2),
        bank_results['compensation_detail']['IDNO'].unique().size,
        len(bank_results['compensation_detail']),
        np.round(bank_results['compensation_detail']['CAPITAL'].sum(),2),
))

this_month = '2019-08'
print('''本月放款{0}户，共放款{1}元，还款{2}元，代偿了{3}户，共代偿{4}次，代偿金额{5}元。'''.format(
        bank_loan[bank_loan['year_month']==this_month]['IDNO'].unique().size,
        bank_loan[bank_loan['year_month']==this_month]['LOANAMOUNT'].sum(),
        np.round(bank_repay[bank_repay['year_month']==this_month]['CAPITAL'].sum(),2),
        bank_results['compensation_detail'][bank_results['compensation_detail']
                                            ['repay_month']==this_month]['IDNO'].unique().size,
        bank_results['compensation_detail'][bank_results['compensation_detail']
                                                ['repay_month']==this_month].shape[0],
        np.round(bank_results['compensation_detail'][bank_results['compensation_detail']
                                            ['repay_month']==this_month]['CAPITAL'].sum(),2)
))

中商下游合作商户数1619户，有贷款记录的客户1245户，累计放款金额162184813.31元，
户均贷款130268.93元，累计还款94610232.24000001元，贷款余额67574581.07元；代偿了55户，共代偿124次，代偿金额805997.34元
本月放款1户，共放款20000.0元，还款9917838.06元，代偿了0户，共代偿0次，代偿金额0.0元。


In [938]:
bank_results['interest_income']

Unnamed: 0,月份,利息收入,累计利息收入
0,2018-09,1274.03,1274.03
1,2018-10,377194.28,378468.31
2,2018-11,401125.6,779593.91
3,2018-12,510324.95,1289918.86
4,2019-01,506008.03,1795926.89
5,2019-02,518777.12,2314704.01
6,2019-03,485831.83,2800535.84
7,2019-04,466369.2,3266905.04
8,2019-05,468917.43,3735822.47
9,2019-06,460053.09,4195875.56


## 4.1供应链金融业务情况总表

In [504]:
loan_com_month_count = pd.pivot_table(bank_loan,index=['IDNO','IDNAME'],columns=['year_month'],
               aggfunc={'LOANAMOUNT':sum,'DUENO':unique})

repay_com_month_count = pd.pivot_table(bank_repay,index=['IDNO','IDNAME'],columns=['year_month'],
               aggfunc={'CAPITAL':sum,'DUENO':len})

In [656]:
loan_com_month_count=loan_com_month_count.rename(columns={'DUENO':'贷款次数','LOANAMOUNT':'贷款金额'})
repay_com_month_count=repay_com_month_count.rename(columns={'DUENO':'还款次数','LOANAMOUNT':'还款金额'})

In [673]:
a.to_excel(
    result_path+'3-中商下游贷款还款信息.xlsx')

In [671]:
a = company_use.drop_duplicates('company_id').merge(loan_com_month_count.merge(
    repay_com_month_count,left_index=True,right_index=True,how='right'),
    left_on=['company_legal_person_id','company_legal_person'],right_index=True,how='left')



In [672]:
a

Unnamed: 0,company_id,company_name,company_nickname,company_type,company_certificate_type,company_certificate_num,city_name,s_name,district_name,company_address,...,"(还款次数, 2018-10)","(还款次数, 2018-11)","(还款次数, 2018-12)","(还款次数, 2019-01)","(还款次数, 2019-02)","(还款次数, 2019-03)","(还款次数, 2019-04)","(还款次数, 2019-05)","(还款次数, 2019-06)","(还款次数, 2019-07)"
0,100420.0,福廉美超市老店（带ps机）,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110112MA007XX567,北京市,北京辖区,通州区,张家湾开发区张家湾开发区,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0
1,100932.0,九九副食,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,510106601605201,四川省,成都市,武侯区,人民南路人民南路三段,...,,,,,,,,,,
2,101044.0,北京金雅德超市,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,92110116L76799675Y,北京市,北京辖区,怀柔区,庙城镇西台下村,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,102128.0,北京城乡·118便利店,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110115MA00E5XK07,北京市,北京辖区,大兴区,地盛西路,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,102510.0,北京嗨家密云宾阳里店NO.0076（8open）,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110228L41129188N,北京市,北京辖区,密云县,行宫前街,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
5,102588.0,京东便利店,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110228764248049X,北京市,北京辖区,密云县,密云城区车站路,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,103558.0,双宝超市（7open）,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,92131082MA09NA0E2A,北京市,北京辖区,廊坊市,燕郊镇燕灵路,...,,,,,,,,,,
7,103832.0,北京茶坞华祥便利超市,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,110116604098894,北京市,北京辖区,怀柔区,桥梓镇茶坞,...,,,,,,,,,,
8,103926.0,远东超市,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,92131082MA080BH7XH,北京市,北京辖区,廊坊市,燕郊镇迎宾路,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
9,105146.0,万家超市,,1.0,http://hmres.huimin100.cn/cms-huimin/picture/1...,91110105MA01AL4M5B,北京市,北京辖区,朝阳区,常营街道像素北区,...,3.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0


In [666]:
loan_com_month_count.merge(
    repay_com_month_count,left_index=True,right_index=True,how='left')

Unnamed: 0_level_0,Unnamed: 1_level_0,贷款次数,贷款次数,贷款次数,贷款次数,贷款次数,贷款次数,贷款次数,贷款次数,贷款次数,贷款次数,...,还款次数,还款次数,还款次数,还款次数,还款次数,还款次数,还款次数,还款次数,还款次数,还款次数
Unnamed: 0_level_1,year_month,2018-08,2018-09,2018-10,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,...,2018-10,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07
IDNO,IDNAME,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
110103198405210935,刘彬,,,,1.0,,,,,,,...,,,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0
110104196404280509,安长丽,,1.0,,,,,,,,,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
110106196808052732,杨忠利,,,,,,,,,,,...,,,,,,,,,,3.0
110106197609222721,李静,,,,,,,,,,,...,,,,,,,,,,1.0
11010719770803241X,谷宇,,,,,,,,,,1.0,...,,,,,,,,,1.0,1.0
110107198101142115,孙泰祺,,,,1.0,,,,,,,...,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
110108196902126833,杨有文,,1.0,,,,,,,,,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
110109197205093133,石瑞海,,,,1.0,,,,,,,...,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
110109197909053420,杜维,,,,,,,,,1.0,,...,,,,,,,,1.0,1.0,2.0
110111196909016125,安雪莲,,,,,,,,,1.0,,...,,,,,,,,1.0,1.0,1.0


In [335]:
company_use.drop_duplicates('company_id').merge(loan_com_month_count,
                right_on=['company_legal_person_id','company_name'],left_index=True,how='left')






Unnamed: 0,"(DUENO, 2018-08)","(DUENO, 2018-09)","(DUENO, 2018-10)","(DUENO, 2018-11)","(DUENO, 2018-12)","(DUENO, 2019-01)","(DUENO, 2019-02)","(DUENO, 2019-03)","(DUENO, 2019-04)","(DUENO, 2019-05)",...,PBIDNO,STATUS,MODELLIMIT,CREDITLimit,PUSERID,SIGNSTATUS,AUDITTIME,APPLYCOUNT,remark,UPDATETIME
1576,,,,1.0,,,,,,,...,,,,,,,NaT,,,NaT
1576,,1.0,,,,,,,,,...,,,,,,,NaT,,,NaT
1576,,,,,,,,,,,...,,,,,,,NaT,,,NaT
1576,,,,,,,,,,,...,,,,,,,NaT,,,NaT
1576,,,,,,,,,,1.0,...,,,,,,,NaT,,,NaT
1576,,,,1.0,,,,,,,...,,,,,,,NaT,,,NaT
1576,,1.0,,,,,,,,,...,,,,,,,NaT,,,NaT
1576,,,,1.0,,,,,,,...,,,,,,,NaT,,,NaT
1576,,,,,,,,,1.0,,...,,,,,,,NaT,,,NaT
1576,,,,,,,,,1.0,,...,,,,,,,NaT,,,NaT


In [641]:
bank_results['loan_balance_count'].to_excel(result_path+'./每月贷款余额.xlsx')

## 4.2商户贷款次数及金额按月分布

In [638]:
bank_results['bank_loan_count']

Unnamed: 0,月份,贷款次数,贷款企业数,贷款金额,次均贷款金额
0,2018-08,3,3,400000.0,133333.333333
1,2018-09,689,637,75565000.0,109673.439768
2,2018-10,139,136,16145000.0,116151.079137
3,2018-11,223,219,27559810.0,123586.606771
4,2018-12,76,74,6180000.0,81315.789474
5,2019-01,65,64,8955000.0,137769.230769
6,2019-02,16,16,1615000.0,100937.5
7,2019-03,42,40,4840000.0,115238.095238
8,2019-04,63,60,8605000.0,136587.301587
9,2019-05,46,44,6260000.0,136086.956522


In [506]:
bank_results['bank_loan_count'].to_excel(result_path+'./每月贷款.xlsx')

## 4.3商户还款次数及金额按月分布

In [507]:
bank_results['bank_repay_count']

Unnamed: 0,月份,还款金额,还款次数,还款企业数,次均还款金额
0,2018-09,18293.89,3,3,6097.963333
1,2018-10,4220510.0,702,639,6012.121895
2,2018-11,5701956.0,847,755,6731.942916
3,2018-12,7840470.0,1061,948,7389.698294
4,2019-01,8022464.0,1116,1000,7188.588244
5,2019-02,7664375.0,1164,1034,6584.514244
6,2019-03,8834776.0,1171,1032,7544.642186
7,2019-04,7637475.0,1120,1001,6819.174187
8,2019-05,8503882.0,1167,1037,7286.959494
9,2019-06,7988946.0,1156,1037,6910.852777


In [510]:
bank_repay.query("year_month == '2019-07'").CAPITAL.sum()

18259247.49

In [509]:
bank_results['bank_repay_count'].to_excel(result_path+'./每月还款.xlsx')

## 4.4商户账期情况

In [511]:
bank_results['account_period_count']

Unnamed: 0,账期（天）,企业数,借据数,还款金额
0,"[0.0, 5.0)",3.0,3.0,350000.0
1,"[5.0, 10.0)",3.0,3.0,500000.0
2,"[10.0, 15.0)",1.0,1.0,30000.0
3,"[15.0, 20.0)",155.0,164.0,1629913.29
4,"[20.0, 25.0)",176.0,181.0,1233531.91
5,"[25.0, 30.0)",267.0,279.0,2238986.93
6,"[30.0, 35.0)",259.0,272.0,1645376.11
7,"[35.0, 40.0)",320.0,332.0,2182387.43
8,"[40.0, 45.0)",153.0,158.0,1240110.01
9,"[45.0, 50.0)",156.0,163.0,1514447.24


In [512]:
bank_results['bank_loan_repay']

Unnamed: 0,JNLNO,MERCHANTNO,PRODID,DUENO,CONTNO,IDTYPE,IDNO,IDNAME,LOANAMOUNT,LOANDATE,...,REPAYDATE,CAPITAL,REPAYSOURCE,NORMALINTEREST,PENALTYINTEREST,repay_month,repay_loan_days,due_repay_days,due_loan_days,repay_loan_days_bins
0,010120181127000059947201000000,20180001,LN0026,20180010018482,20180828000000001017,ZJ01,371322198605302710,杜从都,50000.0,2018-08-28,...,2018-09-16,1918.61,1.0,188.89,0.0,2018-09,19.0,712.0,731,"[15.0, 20.0)"
1,010120181127000059947201000000,20180001,LN0026,20180010018482,20180828000000001017,ZJ01,371322198605302710,杜从都,50000.0,2018-08-28,...,2018-10-15,1932.20,1.0,340.58,0.0,2018-10,48.0,683.0,731,"[45.0, 50.0)"
2,010120181127000059947201000000,20180001,LN0026,20180010018482,20180828000000001017,ZJ01,371322198605302710,杜从都,50000.0,2018-08-28,...,2018-11-15,1945.89,1.0,326.89,0.0,2018-11,79.0,652.0,731,"[70.0, 80.0)"
3,010120181127000059947201000000,20180001,LN0026,20180010018482,20180828000000001017,ZJ01,371322198605302710,杜从都,50000.0,2018-08-28,...,2018-12-15,1959.67,1.0,313.11,0.0,2018-12,109.0,622.0,731,"[100.0, 110.0)"
4,010120181127000059947201000000,20180001,LN0026,20180010018482,20180828000000001017,ZJ01,371322198605302710,杜从都,50000.0,2018-08-28,...,2019-01-15,1973.55,1.0,299.23,0.0,2019-01,140.0,591.0,731,"[135.0, 150.0)"
5,010120181127000059947201000000,20180001,LN0026,20180010018482,20180828000000001017,ZJ01,371322198605302710,杜从都,50000.0,2018-08-28,...,2019-02-15,1987.53,1.0,285.25,0.0,2019-02,171.0,560.0,731,"[165.0, 180.0)"
6,010120181127000059947201000000,20180001,LN0026,20180010018482,20180828000000001017,ZJ01,371322198605302710,杜从都,50000.0,2018-08-28,...,2019-03-15,2001.61,1.0,271.17,0.0,2019-03,199.0,532.0,731,"[180.0, 210.0)"
7,010120181127000059947201000000,20180001,LN0026,20180010018482,20180828000000001017,ZJ01,371322198605302710,杜从都,50000.0,2018-08-28,...,2019-04-15,2015.79,1.0,256.99,0.0,2019-04,230.0,501.0,731,"[210.0, 240.0)"
8,010120181127000059947201000000,20180001,LN0026,20180010018482,20180828000000001017,ZJ01,371322198605302710,杜从都,50000.0,2018-08-28,...,2019-05-16,2030.07,1.0,242.71,0.0,2019-05,261.0,470.0,731,"[240.0, 270.0)"
9,010120181127000059947201000000,20180001,LN0026,20180010018482,20180828000000001017,ZJ01,371322198605302710,杜从都,50000.0,2018-08-28,...,2019-06-15,2044.45,1.0,228.33,0.0,2019-06,291.0,440.0,731,"[270.0, 300.0)"


In [561]:
## 账期这块儿想一下用什么数据比较好
repay_diff = diff_days(bank_repay,['IDNO'],'REPAYDATE',prefix='还款')

In [562]:
repay_diff

Unnamed: 0_level_0,还款间隔时间均值（天）,还款间隔时间标准差（天）,还款最长间隔时间（天）,还款最短间隔时间（天）
IDNO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
110103198405210935,27.50,8.30,39.0,12.0
110104196404280509,30.33,1.87,34.0,28.0
110106196808052732,2.50,3.54,5.0,0.0
110106197609222721,,,,
11010719770803241X,30.00,,30.0,30.0
110107198101142115,29.86,1.57,32.0,28.0
110108196902126833,30.33,1.00,31.0,28.0
110109197205093133,30.14,1.07,31.0,28.0
110109197909053420,22.33,16.77,33.0,3.0
110111196909016125,30.50,0.71,31.0,30.0


In [515]:
zs_loan_diff = diff_days_2col(bank_results['bank_loan_repay'],groupby_col=[
                                'IDNO','IDNAME'],prefix='账期')

In [651]:
def loan_repay_days_dist(order_diff_compare,prefix):
    days_bins = list(np.arange(0,100,5))
    days_bins.extend(list(np.arange(100,300,10)))
    days_bins.extend([300,np.inf])
    days_bins = sorted(set(days_bins))

    order_diff_compare['mean'] = pd.cut(order_diff_compare[prefix+'间隔时间均值（天）'],bins=days_bins)
    order_diff_compare['std'] = pd.cut(order_diff_compare[prefix+'间隔时间标准差（天）'],bins=days_bins)
    order_diff_compare['max'] = pd.cut(order_diff_compare[prefix+'最大间隔时间（天）'],bins=days_bins)
    order_diff_compare['min'] = pd.cut(order_diff_compare[prefix+'最小间隔时间（天）'],bins=days_bins)
    order_diff_bins_count1 = order_diff_compare.groupby('mean').agg({'IDNO':unique}).fillna(0)
    order_diff_bins_count2 = order_diff_compare.groupby('std').agg({'IDNO':unique}).fillna(0)
    order_diff_bins_count3 = order_diff_compare.groupby('max').agg({'IDNO':unique}).fillna(0)
    order_diff_bins_count4 = order_diff_compare.groupby('min').agg({'IDNO':unique}).fillna(0)
    order_diff_bins_count1.rename(columns={'IDNO':'mean_'},inplace=True)
    order_diff_bins_count2.rename(columns={'IDNO':'std_'},inplace=True)
    order_diff_bins_count3.rename(columns={'IDNO':'max_'},inplace=True)
    order_diff_bins_count4.rename(columns={'IDNO':'min_'},inplace=True)
    order_diff_bins_count = pd.concat([order_diff_bins_count1,order_diff_bins_count2,
                                        order_diff_bins_count3,order_diff_bins_count4],axis=1)
    return order_diff_bins_count



In [652]:
loan_repay_days_dist(zs_loan_diff.reset_index(),'账期')

Unnamed: 0,mean_,std_,max_,min_
"(0.0, 5.0]",1.0,11.0,1,2.0
"(5.0, 10.0]",2.0,4.0,2,2.0
"(10.0, 15.0]",1.0,3.0,1,22.0
"(15.0, 20.0]",11.0,15.0,11,163.0
"(20.0, 25.0]",6.0,30.0,6,166.0
"(25.0, 30.0]",6.0,25.0,5,265.0
"(30.0, 35.0]",6.0,50.0,1,285.0
"(35.0, 40.0]",10.0,32.0,5,230.0
"(40.0, 45.0]",16.0,12.0,8,95.0
"(45.0, 50.0]",20.0,17.0,7,7.0


In [516]:
zs_loan_diff

Unnamed: 0_level_0,Unnamed: 1_level_0,账期间隔时间均值（天）,账期间隔时间标准差（天）,账期最大间隔时间（天）,账期最小间隔时间（天）
IDNO,IDNAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
110103198405210935,刘彬,140.89,75.50,244.0,24.0
110104196404280509,安长丽,176.30,91.85,312.0,39.0
110106196808052732,杨忠利,26.33,2.89,28.0,23.0
110106197609222721,李静,20.00,,20.0,20.0
11010719770803241X,谷宇,53.00,21.21,68.0,38.0
110107198101142115,孙泰祺,124.12,73.29,229.0,20.0
110108196902126833,杨有文,170.00,91.64,307.0,34.0
110109197205093133,石瑞海,137.25,73.78,243.0,32.0
110109197909053420,杜维,58.50,31.54,85.0,18.0
110111196909016125,安雪莲,59.67,30.50,90.0,29.0


## 4.5逾期情况

In [518]:
bank_results['overdue_detail'].to_excel(result_path+'./逾期明细.xlsx')
bank_results['compensation_detail'].to_excel(result_path+'./代偿明细.xlsx')

In [718]:
bank_results['bank_loan_repay']['overdue_days'] = (bank_results['bank_loan_repay']['REPAYDATE']-
                                                   bank_results['bank_loan_repay']['DUEDATE']).apply(days)

In [722]:
bank_results['bank_loan_repay'].columns

Index(['JNLNO', 'MERCHANTNO', 'PRODID', 'DUENO', 'CONTNO', 'IDTYPE', 'IDNO',
       'IDNAME', 'LOANAMOUNT', 'LOANDATE', 'DUEDATE', 'NORMALRATES',
       'PENALTYRATES', 'REPAYTYPE', 'CHANNELJNLNO', 'SendRouterJnlNo',
       'CREATETIME', 'UPDATETIME', 'year_month', 'due_month', 'REPAYDATE',
       'CAPITAL', 'REPAYSOURCE', 'NORMALINTEREST', 'PENALTYINTEREST',
       'repay_month', 'repay_loan_days', 'due_repay_days', 'due_loan_days',
       'repay_loan_days_bins', 'loan_month', 'overdue_days', 'C', 'M1', 'M2',
       'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'M10', 'M11', 'M12'],
      dtype='object')

In [726]:
bank_results['bank_loan_repay'][bank_results['bank_loan_repay']['overdue_days']>0][['DUENO', 'IDNO',
       'IDNAME', 'LOANAMOUNT', 'LOANDATE', 'DUEDATE', 'NORMALRATES',
       'PENALTYRATES', 'REPAYTYPE',  'REPAYDATE',
       'CAPITAL', 'REPAYSOURCE', 'overdue_days']]

Unnamed: 0,DUENO,IDNO,IDNAME,LOANAMOUNT,LOANDATE,DUEDATE,NORMALRATES,PENALTYRATES,REPAYTYPE,REPAYDATE,CAPITAL,REPAYSOURCE,overdue_days
895,20180010020724,340111199111202099,朱锡文,30000.0,2018-09-07,2019-03-07,5.65,12.75,1,2019-03-12,5058.89,1.0,5.0
1554,20180010020809,445221198112141015,张永旋,30000.0,2018-09-08,2019-03-08,5.65,12.75,1,2019-03-13,5058.89,1.0,5.0
2174,20180010021283,350124197912132152,陈鑫,30000.0,2018-09-10,2019-03-10,5.65,12.75,1,2019-03-17,5058.89,3.0,7.0
2225,20180010021293,352229197803304516,周道泽,30000.0,2018-09-10,2019-03-10,5.65,12.75,1,2019-03-11,5058.89,1.0,1.0
2790,20180010021400,12010619790404160X,王春燕,30000.0,2018-09-11,2019-03-11,5.65,12.75,1,2019-03-12,5058.89,1.0,1.0
3030,20180010021534,362421197802262619,周胜芳,30000.0,2018-09-11,2019-03-11,5.65,12.75,1,2019-03-12,5058.89,1.0,1.0
3512,20180010021680,350623197104266612,陈立枝,30000.0,2018-09-12,2019-03-12,5.65,12.75,1,2019-03-15,5058.89,1.0,3.0
4700,20180010023163,440881199611105958,黄鑫谟,30000.0,2018-09-17,2019-03-17,5.65,12.75,1,2019-03-18,5058.89,1.0,1.0
6165,20180010027038,342101198007234626,余泽影,30000.0,2018-09-29,2019-03-29,5.65,12.75,1,2019-03-30,5058.89,1.0,1.0
6535,20180010028564,412726199504224915,陈前力,30000.0,2018-10-11,2019-04-11,5.65,18.0,1,2019-04-17,5058.89,1.0,6.0


In [565]:
bank_results['overdue_detail'].shape

(15, 30)

In [633]:
def bank_acount(loan,repay):
    loan = loan[['MERCHANTNO', 'PRODID','IDNO','IDNAME','DUENO','LOANAMOUNT', 
                    'LOANDATE', 'DUEDATE']].drop_duplicates('DUENO').sort_values('LOANDATE').reset_index(drop=True)

    repay = repay.groupby(['DUENO', 'REPAYDATE']).agg({'CAPITAL':sum}).sort_values('REPAYDATE').reset_index()
    account = loan.merge(repay,on='DUENO',how='left')

    account['CAPITAL'] = account['CAPITAL'].fillna(0)
    account['total_capital'] = account.groupby(['DUENO']).agg({'CAPITAL':np.cumsum})
    account['loan_balance'] = account['LOANAMOUNT'] - account['total_capital']
    account['before_loan_balance'] = account['LOANAMOUNT'] - account['total_capital'] - account['CAPITAL']
    account['overdue_days'] = (account['REPAYDATE']-account['DUEDATE']).apply(days)
    account['overdue_days'] = account['overdue_days'].fillna((today-account['DUEDATE']).apply(days)-1)
    max_days = (today - min(account['LOANDATE'])).days
    months = int(np.ceil(max_days/30))
    account['is_overdue'] = account['overdue_days'].apply(lambda x:1 if 0>=x else 0)
    account['C'] = account['LOANAMOUNT']-account['before_loan_balance']
    for i in range(months):
        account['M'+str(i+1)] = list(map(lambda x,y:y if 30*i< x <=30*(i+1) else 0,
                                         account['overdue_days'],account['before_loan_balance']))
    return account

In [634]:
account = bank_acount(bank_loan,bank_repay)

In [631]:
account.columns

Index(['MERCHANTNO', 'PRODID', 'IDNO', 'IDNAME', 'DUENO', 'LOANAMOUNT',
       'LOANDATE', 'DUEDATE', 'REPAYDATE', 'CAPITAL', 'total_capital',
       'loan_balance', 'overdue_days', 'C', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6',
       'M7', 'M8', 'M9', 'M10', 'M11', 'M12'],
      dtype='object')

In [635]:
account.query('overdue_days > 0')[['IDNO', 'IDNAME', 'DUENO', 'LOANAMOUNT',
       'LOANDATE', 'DUEDATE', 'REPAYDATE', 'CAPITAL', 'total_capital',
       'loan_balance', 'overdue_days', 'C', 'M1', 'M2', 'M3']]

Unnamed: 0,IDNO,IDNAME,DUENO,LOANAMOUNT,LOANDATE,DUEDATE,REPAYDATE,CAPITAL,total_capital,loan_balance,overdue_days,C,M1,M2,M3
1186,340111199111202099,朱锡文,20180010020724,30000.0,2018-09-07,2019-03-07,2019-03-12,5058.89,30000.0,3.637979e-12,5.0,24941.11,5058.89,0,0
1728,445221198112141015,张永旋,20180010020809,30000.0,2018-09-08,2019-03-08,2019-03-13,5058.89,30000.0,3.637979e-12,5.0,24941.11,5058.89,0,0
1950,350124197912132152,陈鑫,20180010021283,30000.0,2018-09-10,2019-03-10,2019-03-17,5058.89,30000.0,3.637979e-12,7.0,24941.11,5058.89,0,0
2001,352229197803304516,周道泽,20180010021293,30000.0,2018-09-10,2019-03-10,2019-03-11,5058.89,30000.0,3.637979e-12,1.0,24941.11,5058.89,0,0
2539,362421197802262619,周胜芳,20180010021534,30000.0,2018-09-11,2019-03-11,2019-03-12,5058.89,30000.0,3.637979e-12,1.0,24941.11,5058.89,0,0
2799,12010619790404160X,王春燕,20180010021400,30000.0,2018-09-11,2019-03-11,2019-03-12,5058.89,30000.0,3.637979e-12,1.0,24941.11,5058.89,0,0
3301,350623197104266612,陈立枝,20180010021680,30000.0,2018-09-12,2019-03-12,2019-03-15,5058.89,30000.0,3.637979e-12,3.0,24941.11,5058.89,0,0
4510,440881199611105958,黄鑫谟,20180010023163,30000.0,2018-09-17,2019-03-17,2019-03-18,5058.89,30000.0,3.637979e-12,1.0,24941.11,5058.89,0,0
6005,342101198007234626,余泽影,20180010027038,30000.0,2018-09-29,2019-03-29,2019-03-30,5058.89,30000.0,3.637979e-12,1.0,24941.11,5058.89,0,0
6488,412726199504224915,陈前力,20180010028564,30000.0,2018-10-11,2019-04-11,2019-04-17,5058.89,30000.0,3.637979e-12,6.0,24941.11,5058.89,0,0


In [624]:
def days(x):
    try:return x.days
    except:None
        

def loan_repay_dist(loan_repay):
    loan_repay['loan_month'] = loan_repay['LOANDATE'].astype(str).str[:7]
    loan_repay['repay_month'] = loan_repay['REPAYDATE'].astype(str).str[:7]
    loan_count = pd.pivot_table(loan_repay.drop_duplicates('DUENO'),index='loan_month',
                                values=['LOANAMOUNT'],margins=True,margins_name='合计',
                                aggfunc={'LOANAMOUNT':sum})
    loan_count = loan_count.rename(columns={'LOANAMOUNT':'放款金额'})
    repay_count = pd.pivot_table(loan_repay,index='loan_month',columns='repay_month',
                                 values=['CAPITAL'],margins=True,margins_name='合计',
                                aggfunc={'CAPITAL':sum})
    repay_count = repay_count.rename(columns={'CAPITAL':'还款本金'})
    loan_repay_count = loan_count.merge(repay_count,left_index=True,right_index=True,how='left')
    loan_repay_count['未还本金'] = loan_repay_count['放款金额'] - loan_repay_count[('还款本金', '合计')]
    return loan_repay_count

def loan_repay_com_dist(loan_repay):
    loan_repay['loan_month'] = loan_repay['LOANDATE'].astype(str).str[:7]
    loan_repay['repay_month'] = loan_repay['REPAYDATE'].astype(str).str[:7]
    loan_count = pd.pivot_table(loan_repay.drop_duplicates('DUENO'),index=['loan_month','IDNO','IDNAME'],
                                values=['LOANAMOUNT'],margins=True,margins_name='合计',
                                aggfunc={'LOANAMOUNT':sum})
    loan_count = loan_count.rename(columns={'LOANAMOUNT':'放款金额'})
    repay_count = pd.pivot_table(loan_repay,index='loan_month',columns=['repay_month','IDNO','IDNAME'],
                                 values=['CAPITAL'],margins=True,margins_name='合计',
                                aggfunc={'CAPITAL':sum})
    repay_count = repay_count.rename(columns={'CAPITAL':'还款本金'})
    loan_repay_count = loan_count.merge(repay_count,left_index=True,right_index=True,how='left')
#     loan_repay_count['未还本金'] = loan_repay_count['放款金额'] - loan_repay_count[('还款本金', 'IDNO','IDNAME','合计')]
    return loan_repay_count

In [625]:
bbb = loan_repay_dist(bank_results['bank_loan_repay'])
ccc = loan_repay_com_dist(bank_results['bank_loan_repay'])



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,放款金额,"(还款本金, 2018-09, 371322198605302710, 杜从都)","(还款本金, 2018-09, 372922197910063954, 钟保银)","(还款本金, 2018-09, 430103197708034035, 范旭辉)","(还款本金, 2018-10, 110104196404280509, 安长丽)","(还款本金, 2018-10, 110108196902126833, 杨有文)","(还款本金, 2018-10, 11011119700918403X, 常洪升)","(还款本金, 2018-10, 110111198007121814, 段成绪)","(还款本金, 2018-10, 110111198404193627, 鲁敬美)","(还款本金, 2018-10, 110221196708144225, 朱义荣)",...,"(还款本金, NaT, 130434198201012456, 李雷雷)","(还款本金, NaT, 142623199301162611, 王涛)","(还款本金, NaT, 14273019910709102X, 毛少丹)","(还款本金, NaT, 150404197409077153, 史清杰)","(还款本金, NaT, 362302199112123018, 董樟盛)","(还款本金, NaT, 371322198703302722, 陈学红)","(还款本金, NaT, 411525199110039364, 杨书鸽)","(还款本金, NaT, 412821197810114428, 潘玉红)","(还款本金, NaT, 440882199408132359, 黄炎)","(还款本金, 合计, , )"
loan_month,IDNO,IDNAME,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2018-08,371322198605302710,杜从都,5.000000e+04,1918.61,5918.21,10457.07,,,,,,,...,,,,,,,,,,2.077125e+05
2018-08,372922197910063954,钟保银,1.500000e+05,1918.61,5918.21,10457.07,,,,,,,...,,,,,,,,,,2.077125e+05
2018-08,430103197708034035,范旭辉,2.000000e+05,1918.61,5918.21,10457.07,,,,,,,...,,,,,,,,,,2.077125e+05
2018-09,110104196404280509,安长丽,2.000000e+05,,,,7890.94,7890.94,7890.94,7890.94,7890.94,4059.86,...,,,,,,,,,,4.480123e+07
2018-09,110108196902126833,杨有文,2.000000e+05,,,,7890.94,7890.94,7890.94,7890.94,7890.94,4059.86,...,,,,,,,,,,4.480123e+07
2018-09,11011119700918403X,常洪升,2.000000e+05,,,,7890.94,7890.94,7890.94,7890.94,7890.94,4059.86,...,,,,,,,,,,4.480123e+07
2018-09,110111198007121814,段成绪,2.000000e+05,,,,7890.94,7890.94,7890.94,7890.94,7890.94,4059.86,...,,,,,,,,,,4.480123e+07
2018-09,110111198404193627,鲁敬美,2.000000e+05,,,,7890.94,7890.94,7890.94,7890.94,7890.94,4059.86,...,,,,,,,,,,4.480123e+07
2018-09,110221196708144225,朱义荣,5.000000e+04,,,,7890.94,7890.94,7890.94,7890.94,7890.94,4059.86,...,,,,,,,,,,4.480123e+07
2018-09,110223196504251865,张颖,2.000000e+05,,,,7890.94,7890.94,7890.94,7890.94,7890.94,4059.86,...,,,,,,,,,,4.480123e+07


In [619]:
bbb.to_excel(result_path+'./每月放贷金额的还款情况.xlsx')

In [588]:
aaa = vintage(bank_results['bank_loan_repay'],'LOANDATE','REPAYDATE','DUEDATE','LOANAMOUNT','CAPITAL')

In [591]:
aaa[aaa['M1']>0][['LOANAMOUNT','LOANDATE','DUEDATE','REPAYDATE','CAPITAL','overdue_days','C','M1']]

Unnamed: 0,LOANAMOUNT,LOANDATE,DUEDATE,REPAYDATE,CAPITAL,overdue_days,C,M1
895,30000.0,2018-09-07,2019-03-07,2019-03-12,5058.89,5.0,0.0,30000.0
1554,30000.0,2018-09-08,2019-03-08,2019-03-13,5058.89,5.0,0.0,30000.0
2174,30000.0,2018-09-10,2019-03-10,2019-03-17,5058.89,7.0,0.0,30000.0
2225,30000.0,2018-09-10,2019-03-10,2019-03-11,5058.89,1.0,0.0,30000.0
2790,30000.0,2018-09-11,2019-03-11,2019-03-12,5058.89,1.0,0.0,30000.0
3030,30000.0,2018-09-11,2019-03-11,2019-03-12,5058.89,1.0,0.0,30000.0
3512,30000.0,2018-09-12,2019-03-12,2019-03-15,5058.89,3.0,0.0,30000.0
4700,30000.0,2018-09-17,2019-03-17,2019-03-18,5058.89,1.0,0.0,30000.0
6165,30000.0,2018-09-29,2019-03-29,2019-03-30,5058.89,1.0,0.0,30000.0
6535,30000.0,2018-10-11,2019-04-11,2019-04-17,5058.89,6.0,0.0,30000.0


In [570]:
bank_results['bank_loan_repay'].columns

Index(['JNLNO', 'MERCHANTNO', 'PRODID', 'DUENO', 'CONTNO', 'IDTYPE', 'IDNO',
       'IDNAME', 'LOANAMOUNT', 'LOANDATE', 'DUEDATE', 'NORMALRATES',
       'PENALTYRATES', 'REPAYTYPE', 'CHANNELJNLNO', 'SendRouterJnlNo',
       'CREATETIME', 'UPDATETIME', 'year_month', 'due_month', 'REPAYDATE',
       'CAPITAL', 'REPAYSOURCE', 'NORMALINTEREST', 'PENALTYINTEREST',
       'repay_month', 'repay_loan_days', 'due_repay_days', 'due_loan_days',
       'repay_loan_days_bins'],
      dtype='object')

In [531]:
bank_results['compensation_detail'].DUENO.unique().size

61

In [525]:
bank_results['overdue_detail']

Unnamed: 0,JNLNO,MERCHANTNO,PRODID,DUENO,CONTNO,IDTYPE,IDNO,IDNAME,LOANAMOUNT,LOANDATE,...,REPAYDATE,CAPITAL,REPAYSOURCE,NORMALINTEREST,PENALTYINTEREST,repay_month,repay_loan_days,due_repay_days,due_loan_days,repay_loan_days_bins
895,010120181127000060115501000000,20180001,LN0026,20180010020724,20180906000000001431,ZJ01,340111199111202099,朱锡文,30000.0,2018-09-07,...,2019-03-12,5058.89,1.0,17.47,0.0,2019-03,186.0,-5.0,181,"[180.0, 210.0)"
1554,010120181127000060148801000000,20180001,LN0026,20180010020809,20180908000000001609,ZJ01,445221198112141015,张永旋,30000.0,2018-09-08,...,2019-03-13,5058.89,1.0,18.26,0.0,2019-03,186.0,-5.0,181,"[180.0, 210.0)"
2174,010120181127000060169701000000,20180001,LN0026,20180010021283,20180910000000001806,ZJ01,350124197912132152,陈鑫,30000.0,2018-09-10,...,2019-03-17,5058.89,3.0,19.85,0.0,2019-03,188.0,-7.0,181,"[180.0, 210.0)"
2225,010120181127000060170701000000,20180001,LN0026,20180010021293,20180910000000001823,ZJ01,352229197803304516,周道泽,30000.0,2018-09-10,...,2019-03-11,5058.89,1.0,19.85,0.0,2019-03,182.0,-1.0,181,"[180.0, 210.0)"
2790,010120181127000060273501000000,20180001,LN0026,20180010021400,20180911000000001856,ZJ01,12010619790404160X,王春燕,30000.0,2018-09-11,...,2019-03-12,5058.89,1.0,20.64,0.0,2019-03,182.0,-1.0,181,"[180.0, 210.0)"
3030,010120181127000060277201000000,20180001,LN0026,20180010021534,20180911000000001998,ZJ01,362421197802262619,周胜芳,30000.0,2018-09-11,...,2019-03-12,5058.89,1.0,20.64,0.0,2019-03,182.0,-1.0,181,"[180.0, 210.0)"
3512,010120181127000060296401000000,20180001,LN0026,20180010021680,20180912000000002099,ZJ01,350623197104266612,陈立枝,30000.0,2018-09-12,...,2019-03-15,5058.89,1.0,21.44,0.0,2019-03,184.0,-3.0,181,"[180.0, 210.0)"
4700,010120181127000060389701000000,20180001,LN0026,20180010023163,20180917000000002438,ZJ01,440881199611105958,黄鑫谟,30000.0,2018-09-17,...,2019-03-18,5058.89,1.0,25.41,0.0,2019-03,182.0,-1.0,181,"[180.0, 210.0)"
6165,010120181127000060686801000000,20180001,LN0026,20180010027038,20180911000000001940,ZJ01,342101198007234626,余泽影,30000.0,2018-09-29,...,2019-03-30,5058.89,1.0,34.93,0.0,2019-03,182.0,-1.0,181,"[180.0, 210.0)"
6535,010120181127000060963401000000,20180001,LN0026,20180010028564,20181011000000003941,ZJ01,412726199504224915,陈前力,30000.0,2018-10-11,...,2019-04-17,5058.89,1.0,20.64,0.0,2019-04,188.0,-6.0,182,"[180.0, 210.0)"


In [533]:
pd.pivot_table(bank_results['compensation_detail'],index='repay_month',values=['CAPITAL','DUENO','IDNO'],
               aggfunc={'CAPITAL':sum,'DUENO':len,'IDNO':unique},margins=True,margins_name='合计')

Unnamed: 0_level_0,CAPITAL,DUENO,IDNO
repay_month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-10,137393.95,27,25
2018-11,59941.09,11,10
2018-12,44166.67,7,6
2019-01,63407.57,10,10
2019-02,88972.67,15,13
2019-03,85326.27,14,14
2019-04,155620.08,12,9
2019-05,38626.61,6,6
2019-06,56322.01,9,9
2019-07,76220.42,13,12


In [None]:
pd.pivot_table(bank_results['overdue_detail'],index='due_month',values=['CAPITAL','DUENO','IDNO'],
               aggfunc={'CAPITAL':sum,'DUENO':len,'IDNO':unique},margins=True,margins_name='合计')

In [785]:
avg_amt_month


Unnamed: 0,超市ID,最早交易时间,最近交易时间,交易存续天数,交易次数,交易金额,月交易密度,月交易金额,amt_bins
0,201.0,2018-11-01 22:52:48,2019-08-30 23:54:55,304,161,149941.72,15.888158,14796.880263,"(14000.0, 16000.0]"
1,313.0,2019-01-18 15:25:44,2019-08-28 21:22:15,226,157,161755.29,20.840708,21471.941150,"(20000.0, 25000.0]"
2,733.0,2018-11-01 15:55:40,2019-08-29 15:48:41,304,144,148281.07,14.210526,14633.000329,"(14000.0, 16000.0]"
3,1667.0,2018-11-01 12:40:42,2019-08-25 17:39:03,304,54,43967.98,5.328947,4338.945395,"(4000.0, 5000.0]"
4,1773.0,2019-05-22 18:10:34,2019-08-30 18:51:52,102,159,162542.66,46.764706,47806.664706,"(45000.0, 50000.0]"
5,1787.0,2018-11-03 21:13:01,2019-08-26 17:00:00,302,95,127303.55,9.437086,12646.048013,"(12000.0, 14000.0]"
6,1801.0,2018-11-12 11:58:03,2019-08-24 10:11:47,293,41,40377.08,4.197952,4134.172014,"(4000.0, 5000.0]"
7,2231.0,2019-04-04 11:21:47,2019-08-01 23:57:07,150,23,19380.08,4.600000,3876.016000,"(3000.0, 4000.0]"
8,2837.0,2018-11-07 16:27:46,2019-07-28 11:25:46,298,44,42376.85,4.429530,4266.125839,"(4000.0, 5000.0]"
9,3091.0,2018-11-03 08:25:18,2019-08-29 19:50:02,302,100,134396.86,9.933775,13350.681457,"(12000.0, 14000.0]"


# 5.	供应链业务及金融行为比较分析

## 5.1商户授信及订单总金额情况比较分析

In [890]:

order_result_credit = avg_amt_month.merge(white_list[white_list['CREDITLimit']>0][['PBIDNO','CREDITLimit','BIDNO']],
                                         left_on='超市ID',right_on='PBIDNO')
order_result_credit['预计年交易金额'] = order_result_credit['月交易金额']*12
order_result_credit['预计年交易金额'] = order_result_credit['预计年交易金额'].fillna(0)
order_result_credit['月交易金额/授信额度'] = order_result_credit['月交易金额']/order_result_credit['CREDITLimit']
order_result_credit['预计年交易金额/授信额度'] = order_result_credit['预计年交易金额']/order_result_credit['CREDITLimit']

perc_bins = list(np.arange(0,1.2,0.1))
perc_bins.extend(list(np.arange(1.2,2,0.2)))
perc_bins.extend(list(np.arange(2,5,0.5)))
perc_bins.extend(list(np.arange(5,10,1)))
perc_bins.extend([10,np.inf])
perc_bins = sorted(set(perc_bins))

order_result_credit['perc_bins'] = pd.cut(order_result_credit['预计年交易金额/授信额度'],bins=perc_bins,right=True)
order_result_credit_count = order_result_credit.groupby('perc_bins').agg({'超市ID':unique}).fillna(0).reset_index()
order_result_credit_count = order_result_credit_count.rename(columns={'超市ID':'商户数','perc_bins':'预计年交易金额/授信额度'})
order_result_credit_count['占比'] = order_result_credit_count['商户数'] / order_result_credit_count['商户数'].sum()
order_result_credit_count['累计占比'] = np.cumsum(order_result_credit_count['占比'])

In [891]:
order_result_credit_count

Unnamed: 0,预计年交易金额/授信额度,商户数,占比,累计占比
0,"(0.0, 0.1]",24.0,0.019576,0.019576
1,"(0.1, 0.2]",83.0,0.0677,0.087276
2,"(0.2, 0.3]",103.0,0.084013,0.171289
3,"(0.3, 0.4]",72.0,0.058728,0.230016
4,"(0.4, 0.5]",87.0,0.070962,0.300979
5,"(0.5, 0.6]",87.0,0.070962,0.371941
6,"(0.6, 0.7]",102.0,0.083197,0.455139
7,"(0.7, 0.8]",115.0,0.093801,0.54894
8,"(0.8, 0.9]",71.0,0.057912,0.606852
9,"(0.9, 1.0]",96.0,0.078303,0.685155


In [899]:
order_result_credit_count['商户数'].sum()

1226.0

In [788]:
order_result_credit_count

Unnamed: 0,预计年交易金额/授信额度,商户数,占比,累计占比
0,"(0.0, 0.0001]",0.0,0.0,0.0
1,"(0.0001, 0.1]",29.0,0.023654,0.023654
2,"(0.1, 0.2]",94.0,0.076672,0.100326
3,"(0.2, 0.3]",103.0,0.084013,0.184339
4,"(0.3, 0.4]",80.0,0.065253,0.249592
5,"(0.4, 0.5]",92.0,0.075041,0.324633
6,"(0.5, 0.6]",128.0,0.104405,0.429038
7,"(0.6, 0.7]",116.0,0.094617,0.523654
8,"(0.7, 0.8]",81.0,0.066069,0.589723
9,"(0.8, 0.9]",96.0,0.078303,0.668026


In [535]:
order_result_credit_count

Unnamed: 0,预计年交易金额/授信额度,商户数,占比,累计占比
0,"(0.0, 0.0001]",0.0,0.0,0.0
1,"(0.0001, 0.1]",27.0,0.021969,0.021969
2,"(0.1, 0.2]",86.0,0.069976,0.091945
3,"(0.2, 0.3]",108.0,0.087876,0.179821
4,"(0.3, 0.4]",84.0,0.068348,0.248169
5,"(0.4, 0.5]",84.0,0.068348,0.316517
6,"(0.5, 0.6]",130.0,0.105777,0.422295
7,"(0.6, 0.7]",113.0,0.091945,0.514239
8,"(0.7, 0.8]",91.0,0.074044,0.588283
9,"(0.8, 0.9]",90.0,0.07323,0.661513


## 5.2商户单笔贷款及单笔订单情况比较分析

In [895]:
bank_loan

Unnamed: 0,JNLNO,MERCHANTNO,PRODID,DUENO,CONTNO,IDTYPE,IDNO,IDNAME,LOANAMOUNT,LOANDATE,...,NORMALRATES,PENALTYRATES,REPAYTYPE,CHANNELJNLNO,SendRouterJnlNo,CREATETIME,UPDATETIME,year_month,diff,due_month
3525,010120181127000059947201000000,20180001,LN0026,20180010018482,20180828000000001017,ZJ01,371322198605302710,杜从都,50000.0,2018-08-28,...,8.50,12.75,1,32138220180828469355511992020000,32138220180828469355511992020000,2018-11-27 15:54:46,2018-11-27 15:54:46,2018-08,,2020-08
3597,010120181127000059970001000000,20180001,LN0026,20180010018561,20180829000000001029,ZJ01,430103197708034035,范旭辉,200000.0,2018-08-29,...,8.50,12.75,1,32138220180829545918411992020000,32138220180829545918411992020000,2018-11-27 15:55:08,2018-11-27 15:55:08,2018-08,,2020-02
3697,010120181127000059998401000000,20180001,LN0026,20180010018688,20180831000000001074,ZJ01,372922197910063954,钟保银,150000.0,2018-08-31,...,5.65,12.75,1,32138220180830712923659992020000,32138220180830712923659992020000,2018-11-27 15:55:47,2018-11-27 15:55:47,2018-08,,2020-08
3714,010120181127000060017501000000,20180001,LN0026,20180010019006,20180902000000001086,ZJ01,370911198610131672,国伟,150000.0,2018-09-02,...,5.65,12.75,1,32138220180830855331153992020000,32138220180830855331153992020000,2018-11-27 15:57:21,2018-11-27 15:57:21,2018-09,,2020-09
3736,010120181127000060021901000000,20180001,LN0026,20180010020174,20180903000000001164,ZJ01,412702197103206580,王娟,200000.0,2018-09-03,...,5.65,12.75,1,32138220180903980582480992020000,32138220180903980582480992020000,2018-11-27 15:57:43,2018-11-27 15:57:43,2018-09,,2020-09
3737,010120181127000060022001000000,20180001,LN0026,20180010020140,20180903000000001119,ZJ01,352202197807153612,陈将辉,180000.0,2018-09-03,...,5.65,12.75,1,32138220180903978443862992020000,32138220180903978443862992020000,2018-11-27 15:57:43,2018-11-27 15:57:43,2018-09,,2020-09
3755,010120181127000060023801000000,20180001,LN0026,20180010020120,20180903000000001106,ZJ01,120107198010160619,李渊,60000.0,2018-09-03,...,5.65,12.75,1,32138220180903977095884992020000,32138220180903977095884992020000,2018-11-27 15:57:43,2018-11-27 15:57:43,2018-09,,2019-09
3817,010120181127000060045401000000,20180001,LN0026,20180010020249,20180904000000001210,ZJ01,370123197908284740,刘辉,150000.0,2018-09-04,...,5.65,12.75,1,32138220180903030339900992020000,32138220180903030339900992020000,2018-11-27 15:58:15,2018-11-27 15:58:15,2018-09,,2020-09
3819,010120181127000060045601000000,20180001,LN0026,20180010020253,20180904000000001212,ZJ01,372523198408120510,相爱雨,150000.0,2018-09-04,...,5.65,12.75,1,32138220180903033578285992020000,32138220180903033578285992020000,2018-11-27 15:58:15,2018-11-27 15:58:15,2018-09,,2020-09
3820,010120181127000060045701000000,20180001,LN0026,20180010020257,20180904000000001214,ZJ01,342221196501039211,臧浩,100000.0,2018-09-04,...,5.65,12.75,1,32138220180903037789552992020000,32138220180903037789552992020000,2018-11-27 15:58:15,2018-11-27 15:58:15,2018-09,,2020-03


In [902]:
## 预计年交易金额/年贷款
loan_year = loan_analysis(bank_loan, groupby_col="IDNO", period=730)
order_loan_year = order_result_credit.merge(loan_year,left_on='BIDNO',right_on='IDNO',how='right')
order_loan_year['预计年交易金额/年贷款金额'] = order_loan_year['预计年交易金额']/order_loan_year['年贷款金额']

order_loan_year['perc_bins'] = pd.cut(order_loan_year['预计年交易金额/年贷款金额'],bins=perc_bins,right=True)
order_loan_year_count = order_loan_year.groupby('perc_bins').agg({'IDNO':len}).fillna(0).reset_index()
order_loan_year_count = order_loan_year_count.rename(columns={'IDNO':'商户数','perc_bins':'预计年交易金额/年贷款金额'})
order_loan_year_count['占比'] = order_loan_year_count['商户数'] / order_loan_year_count['商户数'].sum()
order_loan_year_count['累计占比'] = np.cumsum(order_loan_year_count['占比'])

In [903]:
order_loan_year_count['商户数'].sum(),bank_loan.IDNO.unique().size

(1226, 1245)

In [793]:
order_loan_year[order_loan_year['预计年交易金额/年贷款金额']>10]

Unnamed: 0,超市ID,最早交易时间,最近交易时间,交易存续天数,交易次数,交易金额,月交易密度,月交易金额,amt_bins,PBIDNO,CREDITLimit,BIDNO,预计年交易金额,月交易金额/授信额度,预计年交易金额/授信额度,perc_bins,IDNO,年贷款次数,年贷款金额,预计年交易金额/年贷款金额
66,30501.0,2018-11-01 23:49:59,2019-08-30 23:59:23,304.0,261.0,260599.32,25.756579,25717.038158,"(25000.0, 30000.0]",30501.0,200000.0,412822198011142079,308604.457895,0.128585,1.543022,"(10.0, inf]",412822198011142079,1,30000.0,10.286815
581,269605.0,2018-11-01 15:14:28,2019-08-30 16:04:32,304.0,241.0,389007.5,23.782895,38388.898026,"(35000.0, 40000.0]",269605.0,70000.0,330125197505131614,460666.776316,0.548413,6.580954,"(10.0, inf]",330125197505131614,1,30000.0,15.355559


In [904]:
order_loan_year_count

Unnamed: 0,预计年交易金额/年贷款金额,商户数,占比,累计占比
0,"(0.0, 0.1]",5,0.004078,0.004078
1,"(0.1, 0.2]",8,0.006525,0.010604
2,"(0.2, 0.3]",12,0.009788,0.020392
3,"(0.3, 0.4]",12,0.009788,0.030179
4,"(0.4, 0.5]",12,0.009788,0.039967
5,"(0.5, 0.6]",16,0.013051,0.053018
6,"(0.6, 0.7]",15,0.012235,0.065253
7,"(0.7, 0.8]",14,0.011419,0.076672
8,"(0.8, 0.9]",33,0.026917,0.103589
9,"(0.9, 1.0]",33,0.026917,0.130506


In [790]:
order_loan_year_count

Unnamed: 0,预计年交易金额/年贷款金额,商户数,占比,累计占比
0,"(0.0, 0.0001]",0.0,0.0,0.0
1,"(0.0001, 0.1]",14.0,0.011438,0.011438
2,"(0.1, 0.2]",25.0,0.020425,0.031863
3,"(0.2, 0.3]",31.0,0.025327,0.05719
4,"(0.3, 0.4]",36.0,0.029412,0.086601
5,"(0.4, 0.5]",77.0,0.062908,0.14951
6,"(0.5, 0.6]",105.0,0.085784,0.235294
7,"(0.6, 0.7]",113.0,0.09232,0.327614
8,"(0.7, 0.8]",95.0,0.077614,0.405229
9,"(0.8, 0.9]",117.0,0.095588,0.500817


In [537]:
order_loan_year_count

Unnamed: 0,预计年交易金额/年贷款金额,商户数,占比,累计占比
0,"(0.0, 0.0001]",0.0,0.0,0.0
1,"(0.0001, 0.1]",14.0,0.011391,0.011391
2,"(0.1, 0.2]",19.0,0.01546,0.026851
3,"(0.2, 0.3]",33.0,0.026851,0.053702
4,"(0.3, 0.4]",40.0,0.032547,0.086249
5,"(0.4, 0.5]",68.0,0.05533,0.141579
6,"(0.5, 0.6]",109.0,0.08869,0.230269
7,"(0.6, 0.7]",114.0,0.092758,0.323027
8,"(0.7, 0.8]",96.0,0.078112,0.401139
9,"(0.8, 0.9]",106.0,0.086249,0.487388


## 5.3商户授信及贷款比较分析(新增)

In [769]:
order_loan_year = order_loan_year.merge(zgc_customer[['company_id','city_name']].drop_duplicates(),
                      left_on='超市ID',right_on='company_id',how='left')

In [780]:
order_loan_year[order_loan_year['年贷款金额/授信额度'] == np.inf][['年贷款金额','CREDITLimit']]

Unnamed: 0,年贷款金额,CREDITLimit
104,80000.0,
475,80000.0,
627,150000.0,


In [779]:

order_loan_year['CREDITLimit'] = np.where(order_loan_year['CREDITLimit']==0,np.NaN,order_loan_year['CREDITLimit'])

In [781]:
order_loan_year['年贷款金额/授信额度'] = order_loan_year['年贷款金额']/order_loan_year['CREDITLimit']
order_loan_year['perc_bins'] = pd.cut(order_loan_year['年贷款金额/授信额度'],bins=perc_bins,right=True)
order_loan_year_count = order_loan_year.groupby('perc_bins').agg({'IDNO':len}).fillna(0).reset_index()
order_loan_year_count = order_loan_year_count.rename(columns={'IDNO':'商户数','perc_bins':'年贷款金额/授信额度'})
order_loan_year_count['占比'] = order_loan_year_count['商户数'] / order_loan_year_count['商户数'].sum()
order_loan_year_count['累计占比'] = np.cumsum(order_loan_year_count['占比'])


In [782]:
order_loan_year_count

Unnamed: 0,年贷款金额/授信额度,商户数,占比,累计占比
0,"(0.0, 0.0001]",0.0,0.0,0.0
1,"(0.0001, 0.1]",0.0,0.0,0.0
2,"(0.1, 0.2]",92.0,0.075163,0.075163
3,"(0.2, 0.3]",76.0,0.062092,0.137255
4,"(0.3, 0.4]",61.0,0.049837,0.187092
5,"(0.4, 0.5]",68.0,0.055556,0.242647
6,"(0.5, 0.6]",14.0,0.011438,0.254085
7,"(0.6, 0.7]",52.0,0.042484,0.296569
8,"(0.7, 0.8]",21.0,0.017157,0.313725
9,"(0.8, 0.9]",6.0,0.004902,0.318627


In [539]:
order_loan_year_count

Unnamed: 0,年贷款金额/授信额度,商户数,占比,累计占比
0,"(0.0, 0.0001]",0.0,0.0,0.0
1,"(0.0001, 0.1]",0.0,0.0,0.0
2,"(0.1, 0.2]",92.0,0.074858,0.074858
3,"(0.2, 0.3]",76.0,0.061839,0.136697
4,"(0.3, 0.4]",61.0,0.049634,0.18633
5,"(0.4, 0.5]",68.0,0.05533,0.24166
6,"(0.5, 0.6]",14.0,0.011391,0.253051
7,"(0.6, 0.7]",52.0,0.042311,0.295362
8,"(0.7, 0.8]",20.0,0.016273,0.311635
9,"(0.8, 0.9]",7.0,0.005696,0.317331


## 5.4商户订单间隔天数及账期情况比较分析

In [905]:
order_diff = diff_days(order_all_201909_,['company_id','company_name','company_certificate_num'],'order_buy_time','交易')
loan_diff = diff_days(bank_loan,['IDNO'],'LOANDATE',prefix='贷款')
order_diff_compare = order_diff.reset_index().merge(
    company_use[['company_certificate_num','company_legal_person_id','company_legal_person']],
    on='company_certificate_num').merge(loan_diff,left_on='company_legal_person_id',
                                                    right_index=True)



In [907]:
order_diff_201909 = diff_days(order_all_201909_,['company_certificate_num'],'order_buy_time','交易')

In [908]:
order_diff_201909

Unnamed: 0_level_0,交易间隔时间均值（天）,交易间隔时间标准差（天）,交易最长间隔时间（天）,交易最短间隔时间（天）
company_certificate_num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
040000200803030011,5.07,4.09,20.02,0.00
040002200907070005,3.41,2.50,18.75,0.01
040004201206280010,3.60,2.36,21.13,0.91
040004201607140025,8.27,5.72,29.83,2.76
040004201611230004,9.28,32.48,160.81,0.00
040004201708240021,5.94,4.28,25.05,0.00
040005201112200015,4.11,3.67,23.04,0.10
040008201706300027,5.06,2.67,15.98,0.92
040009201406090013,4.22,3.59,26.07,0.41
040009201709210021,2.96,1.83,14.95,0.00


In [795]:
order_diff_compare

Unnamed: 0,company_id,company_name,company_certificate_num,交易间隔时间均值（天）,交易间隔时间标准差（天）,交易最长间隔时间（天）,交易最短间隔时间（天）,company_legal_person_id,company_legal_person,贷款间隔时间均值（天）,贷款间隔时间标准差（天）,贷款最长间隔时间（天）,贷款最短间隔时间（天）
0,201.0,喜客超市,911101057999884853,1.89,1.11,9.90,0.00,371427198402174349,陈秀荣,,,,
1,313.0,北京嗨家豪杰店（NO.0093）,91110108MA0020TY0C,1.42,2.69,26.17,0.00,142623198912092013,李豪,,,,
2,733.0,好顺园便民超市（晚上8点前送货）,91110106802233607B,2.10,2.75,18.69,0.00,34262319641210715X,蒋克忠,,,,
3,1667.0,中韩超市（庆伟千辉超市）,92110105MA012HFU6C,5.61,5.27,17.91,0.01,130726198310293927,张喜红,,,,
4,1773.0,京东便利店,91110105MA00BLBD0G,0.63,0.57,2.82,0.00,41082719700910153X,牛长卫,,,,
6,501570.0,金梧桐咖啡便利店（7open）,91110105MA00BLBD0G,0.97,4.24,65.00,0.00,41082719700910153X,牛长卫,,,,
5,1773.0,京东便利店,91110105MA00BLBD0G,0.63,0.57,2.82,0.00,410881197608137524,王岚,,,,
7,501570.0,金梧桐咖啡便利店（7open）,91110105MA00BLBD0G,0.97,4.24,65.00,0.00,410881197608137524,王岚,,,,
8,1787.0,中商惠民（家乐超市）,91110108MA004C4QX9,3.15,2.29,9.82,0.00,131121197204203424,闫春红,,,,
9,1801.0,Mini超市,110105604668051,7.12,4.83,20.80,0.00,220524197001151574,张波,,,,


In [796]:
order_diff

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,交易间隔时间均值（天）,交易间隔时间标准差（天）,交易最长间隔时间（天）,交易最短间隔时间（天）
company_id,company_name,company_certificate_num,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
201.0,喜客超市,911101057999884853,1.89,1.11,9.90,0.00
313.0,北京嗨家豪杰店（NO.0093）,91110108MA0020TY0C,1.42,2.69,26.17,0.00
733.0,好顺园便民超市（晚上8点前送货）,91110106802233607B,2.10,2.75,18.69,0.00
1667.0,中韩超市（庆伟千辉超市）,92110105MA012HFU6C,5.61,5.27,17.91,0.01
1773.0,京东便利店,91110105MA00BLBD0G,0.63,0.57,2.82,0.00
1787.0,中商惠民（家乐超市）,91110108MA004C4QX9,3.15,2.29,9.82,0.00
1801.0,Mini超市,110105604668051,7.12,4.83,20.80,0.00
2231.0,鑫鑫超市（BQ）,91110114MA002WEUXY,5.43,2.84,12.18,1.98
2837.0,青田超市（东三）,92110114MA016J6C9A,6.11,10.20,54.86,0.00
3091.0,浔庐超市,92110105L76126550E,3.03,3.21,21.91,0.00


In [680]:
order_diff

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,交易间隔时间均值（天）,交易间隔时间标准差（天）,交易最长间隔时间（天）,交易最短间隔时间（天）
company_id,company_name,company_certificate_num,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
201.0,喜客超市,911101057999884853,1.90,1.15,9.90,0.00
313.0,北京嗨家豪杰店（NO.0093）,91110108MA0020TY0C,1.46,2.87,26.17,0.00
733.0,好顺园便民超市（晚上8点前送货）,91110106802233607B,2.02,2.59,18.69,0.00
1667.0,中韩超市（庆伟千辉超市）,92110105MA012HFU6C,5.58,5.14,17.91,0.01
1773.0,京东便利店,91110105MA00BLBD0G,0.63,0.63,3.21,0.00
1787.0,中商惠民（家乐超市）,91110108MA004C4QX9,3.16,2.35,9.82,0.00
1801.0,Mini超市,110105604668051,6.73,4.69,20.80,0.00
2231.0,鑫鑫超市（BQ）,91110114MA002WEUXY,5.25,2.77,12.18,1.98
2837.0,青田超市（东三）,92110114MA016J6C9A,6.11,10.20,54.86,0.00
3091.0,浔庐超市,92110105L76126550E,3.12,3.41,21.91,0.00


In [None]:
order_diff_compare[''] = order_diff_compare['交易间隔时间均值（天）'] 

In [808]:

def loan_repay_days_dist(order_diff_compare,prefix):
#     days_bins = list(np.arange(0,100,5))
#     days_bins.extend(list(np.arange(100,300,10)))
#     days_bins.extend([300,np.inf])
#     days_bins = sorted(set(days_bins))
    
    days_bins = list(np.arange(0,50,5))
    days_bins.extend(list(np.arange(50,100+10,10)))
    days_bins.extend([np.inf])
    days_bins = sorted(set(days_bins))

    order_diff_compare['mean'] = pd.cut(order_diff_compare[prefix+'间隔时间均值（天）'],bins=days_bins)
    order_diff_compare['std'] = pd.cut(order_diff_compare[prefix+'间隔时间标准差（天）'],bins=days_bins)
    order_diff_compare['max'] = pd.cut(order_diff_compare[prefix+'最长间隔时间（天）'],bins=days_bins)
    order_diff_compare['min'] = pd.cut(order_diff_compare[prefix+'最短间隔时间（天）'],bins=days_bins)
    order_diff_bins_count1 = order_diff_compare.groupby('mean').agg({'company_certificate_num':unique}).fillna(0)
    order_diff_bins_count2 = order_diff_compare.groupby('std').agg({'company_certificate_num':unique}).fillna(0)
    order_diff_bins_count3 = order_diff_compare.groupby('max').agg({'company_certificate_num':unique}).fillna(0)
    order_diff_bins_count4 = order_diff_compare.groupby('min').agg({'company_certificate_num':unique}).fillna(0)
    order_diff_bins_count1.rename(columns={'company_certificate_num':'mean_'},inplace=True)
    order_diff_bins_count2.rename(columns={'company_certificate_num':'std_'},inplace=True)
    order_diff_bins_count3.rename(columns={'company_certificate_num':'max_'},inplace=True)
    order_diff_bins_count4.rename(columns={'company_certificate_num':'min_'},inplace=True)
    order_diff_bins_count = pd.concat([order_diff_bins_count1,order_diff_bins_count2,
                                        order_diff_bins_count3,order_diff_bins_count4],axis=1)
    return order_diff_bins_count



In [909]:
order_diff_bins_count = loan_repay_days_dist(order_diff_201909.reset_index(),'交易')

In [910]:
order_diff_bins_count

Unnamed: 0,mean_,std_,max_,min_
"(0.0, 5.0]",792.0,796.0,19,488.0
"(5.0, 10.0]",314.0,273.0,79,9.0
"(10.0, 15.0]",69.0,83.0,230,1.0
"(15.0, 20.0]",21.0,26.0,245,1.0
"(20.0, 25.0]",11.0,13.0,178,0.0
"(25.0, 30.0]",5.0,10.0,105,0.0
"(30.0, 35.0]",1.0,3.0,84,0.0
"(35.0, 40.0]",1.0,2.0,48,0.0
"(40.0, 45.0]",3.0,3.0,47,0.0
"(45.0, 50.0]",1.0,2.0,39,0.0


In [812]:
bank_loan.sort_values('LOANDATE',ascending=False)

Unnamed: 0,JNLNO,MERCHANTNO,PRODID,DUENO,CONTNO,IDTYPE,IDNO,IDNAME,LOANAMOUNT,LOANDATE,DUEDATE,NORMALRATES,PENALTYRATES,REPAYTYPE,CHANNELJNLNO,SendRouterJnlNo,CREATETIME,UPDATETIME,year_month,diff
12659,010120190803000033020701000000,20180001,LN0026,20190010074036,20181011000000003940,ZJ01,350628198903062014,叶添成,20000.0,2019-08-02,2020-02-02,5.65,18.00,1,03321382000000000000000004062283,03321382000000000000000004062283,2019-08-03 07:00:01,2019-08-03 07:00:01,2019-08,106.0
12589,010120190724000032581501000000,20180001,LN0026,20190010072936,20190723000000014322,ZJ01,371322198703302722,陈学红,200000.0,2019-07-23,2021-07-23,5.65,18.00,1,32138220190723860135752992028630,32138220190723860135752992028630,2019-07-24 07:00:01,2019-07-24 07:00:01,2019-07,
12572,010120190723000032535601000000,20180001,LN0026,20190010072860,20190418000000008222,ZJ01,362302199112123018,董樟盛,5000.0,2019-07-22,2020-01-22,5.65,18.00,1,32138220190722753344211992023323,32138220190722753344211992023323,2019-07-23 07:00:01,2019-07-23 07:00:01,2019-07,15.0
12486,010120190717000032168101000000,20180001,LN0026,20190010069714,20190716000000011190,ZJ01,411525199110039364,杨书鸽,200000.0,2019-07-16,2021-07-16,5.65,18.00,1,32138220190716285232049992022049,32138220190716285232049992022049,2019-07-17 07:00:01,2019-07-17 07:00:01,2019-07,
12472,010120190715000031891601000000,20180001,LN0026,20190010069318,20180915000000002336,ZJ01,150404197409077153,史清杰,100000.0,2019-07-14,2021-01-14,5.65,18.00,1,03321382000000000000000003721911,03321382000000000000000003721911,2019-07-15 07:00:01,2019-07-15 07:00:01,2019-07,302.0
12464,010120190713000031811201000000,20180001,LN0026,20190010069070,20190712000000010578,ZJ01,130434198201012456,李雷雷,200000.0,2019-07-12,2021-07-12,5.65,18.00,1,32138220190712925359476992027718,32138220190712925359476992027718,2019-07-13 07:00:01,2019-07-13 07:00:01,2019-07,
12447,010120190711000031717501000000,20180001,LN0026,20190010068825,20190710000000010309,ZJ01,142623199301162611,王涛,200000.0,2019-07-10,2021-01-10,5.65,18.00,1,32138220190710758041933992028213,32138220190710758041933992028213,2019-07-11 07:00:01,2019-07-11 07:00:01,2019-07,
12432,010120190709000031634301000000,20180001,LN0026,20190010068465,20180915000000002326,ZJ01,440882199408132359,黄炎,35000.0,2019-07-08,2020-07-08,5.65,18.00,1,03321382000000000000000003604863,03321382000000000000000003604863,2019-07-09 07:00:01,2019-07-09 07:00:01,2019-07,294.0
12430,010120190708000031592601000000,20180001,LN0026,20190010068455,20190418000000008222,ZJ01,362302199112123018,董樟盛,5000.0,2019-07-07,2020-01-07,5.65,18.00,1,32138220190707508793638992024693,32138220190707508793638992024693,2019-07-08 07:00:01,2019-07-08 07:00:01,2019-07,9.0
12412,010120190704000031420101000000,20180001,LN0026,20190010068207,20190703000000009761,ZJ01,14273019910709102X,毛少丹,200000.0,2019-07-03,2021-07-03,5.65,18.00,1,32138220190703139521084992026095,32138220190703139521084992026095,2019-07-04 07:00:01,2019-07-04 07:00:01,2019-07,


In [560]:
order_diff_bins_count

Unnamed: 0,mean_,std_,max_,min_
"(0.0, 5.0]",799.0,836.0,35.0,552.0
"(5.0, 10.0]",319.0,265.0,106.0,10.0
"(10.0, 15.0]",73.0,69.0,256.0,1.0
"(15.0, 20.0]",15.0,26.0,263.0,3.0
"(20.0, 25.0]",9.0,10.0,184.0,0.0
"(25.0, 30.0]",6.0,7.0,110.0,0.0
"(30.0, 35.0]",1.0,3.0,85.0,0.0
"(35.0, 40.0]",1.0,2.0,46.0,0.0
"(40.0, 45.0]",3.0,1.0,37.0,0.0
"(45.0, 50.0]",1.0,2.0,28.0,0.0


In [None]:
order_diff_compare.to_excel(result_path+'./商户订单间隔天数及贷款间隔天数情况比较.xlsx')

# 6.银行收益分析及其他

## 6.1放款及还款总金额按月分布

In [914]:
bank_results['loan_balance_count']

Unnamed: 0,月份,贷款次数,贷款企业数,贷款金额,次均贷款金额,还款金额,还款次数,还款企业数,次均还款金额,贷款余额
0,2018-08,3.0,3.0,400000.0,133333.333333,0.0,0.0,0.0,0.0,400000.0
1,2018-09,689.0,637.0,75565000.0,109673.439768,18293.89,3.0,3.0,6097.963333,75946710.0
2,2018-10,139.0,136.0,16145000.0,116151.079137,4220510.0,702.0,639.0,6012.121895,87871200.0
3,2018-11,223.0,219.0,27559810.0,123586.606771,5701956.0,847.0,755.0,6731.942916,109729100.0
4,2018-12,76.0,74.0,6180000.0,81315.789474,7840470.0,1061.0,948.0,7389.698294,108068600.0
5,2019-01,65.0,64.0,8955000.0,137769.230769,8022464.0,1116.0,1000.0,7188.588244,109001100.0
6,2019-02,16.0,16.0,1615000.0,100937.5,7664375.0,1164.0,1034.0,6584.514244,102951700.0
7,2019-03,42.0,40.0,4840000.0,115238.095238,8834776.0,1171.0,1032.0,7544.642186,98956970.0
8,2019-04,63.0,60.0,8605000.0,136587.301587,7637475.0,1120.0,1001.0,6819.174187,99924490.0
9,2019-05,46.0,44.0,6260000.0,136086.956522,8503882.0,1167.0,1037.0,7286.959494,97680610.0


In [554]:
bank_results['loan_balance_count'].to_excel(result_path+'./balance.xlsx')

## 6.2银行利息收入按月分布

In [555]:
bank_results['interest_income']

Unnamed: 0,月份,利息收入,累计利息收入
0,2018-09,1274.03,1274.03
1,2018-10,377194.28,378468.31
2,2018-11,401125.6,779593.91
3,2018-12,510324.95,1289918.86
4,2019-01,506008.03,1795926.89
5,2019-02,518777.12,2314704.01
6,2019-03,485831.83,2800535.84
7,2019-04,466369.2,3266905.04
8,2019-05,468917.43,3735822.47
9,2019-06,460053.09,4195875.56
