In [29]:
import pandas as pd
import pymysql
from pyhive import hive
import pandas as pd
from pyspark import SparkContext,SQLContext
from pyspark.sql import SQLContext
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf, col
from pyspark.sql.types import StringType
import datetime


In [30]:
spark=SparkSession \
        .builder \
        .config("spark.eventLog.enabled", "false") \
        .config("spark.executor.memory", "4g")\
        .config("spark.driver.memory", "8g")\
        .config("spark.cores.max", "10")\
        .config("spark.task.maxFailures", "1000")\
        .config("spark.default.parallelism", "500")\
        .config("spark.sql.shuffle.partitions",50)\
        .appName('renhang_etl') \
        .master('yarn')\
        .getOrCreate()

In [31]:
class hiveParse(object):
    """
    该方法用于读取hive 数据，并转化成pandas dataframe
    """
    def __init__(self, host='', port=10000, user='', passwd='', dbName=''):
        self.host = host
        self.port = port
        self.user = user
        self.passwd = passwd
        self.dbName = dbName

    def conn_hive(self):
        self.connection = hive.Connection(host=self.host,port=self.port,username=self.user)

    def hive_connect(self,sql_select):
        cur = self.connection.cursor()
        cur.execute(sql_select)
        columns = [col[0] for col in cur.description]
        data = cur.fetchall()
        query_df = pd.DataFrame(data)
        query_df.columns = columns
        return query_df

class mysqldbParse(object):
    """
    该方法用于连接mysql，并执行相应操作
    """

    def __init__(self, host='', port=3306, user='', passwd='', dbName='', charset='utf8', connect_timeout=31536000):
        self.host = host
        self.port = port
        self.user = user
        self.passwd = passwd
        self.dbName = dbName
        self.charset = charset
        self.connect_timeout = connect_timeout

    def conn_mysql(self):
        self.conn = pymysql.connect(host=self.host,
                                    port=self.port,
                                    user=self.user,
                                    password=self.passwd,
                                    db=self.dbName,
                                    charset=self.charset,
                                    connect_timeout=self.connect_timeout,
                                    cursorclass=pymysql.cursors.DictCursor)

    def insert_sql(self, insertSql):
        with self.conn.cursor() as cursor:
            cursor.execute(insertSql)
            self.conn.commit()

    def select_Sql(self, selectSql):
        df = pd.read_sql(selectSql, self.conn)
        return df

    def close(self):
        self.conn.close()

In [32]:
def hive_df(sql):
    host = "58.59.18.61"
    port = 10000
    user = "hdfs"
    dbName = "ods"
    hive_parse = hiveParse(host,port,user,dbName)
    hive_parse.conn_hive()
    hive_pd = hive_parse.hive_connect(sql)
    return hive_pd

def mysql_df(selectSql):
    host = "58.59.11.86"
    port = 3306
    user = "tangdinghai"
    passwd = "Tangdinghai2019!"
    dbName = "reportpublic"
    mysql_parse = mysqldbParse(host,port,user,passwd,dbName)
    mysql_parse.conn_mysql()
    mysql_pd = mysql_parse.select_Sql(selectSql)
    mysql_parse.close()
    return mysql_pd



In [33]:
#### 头条分发模型：有授信额度相关客户级和订单级汇总监控报表---日报

In [75]:
base_dict = {'score_creditcard':['409-560','561-583','584-597','598-608','609-618','619-628','629-638','639-649','650-664','665-752','汇总'],\
             'toutiao_develop':['2.80%','8.09%','10.02%','10.57%','10.63%','11.82%','11.28%','11.31%','11.67%','11.81%','-']}
dairy_report = pd.DataFrame(base_dict)
dairy_report['product_no'] = "PN00000053"
dairy_report['product_name'] = '今日头条放心借'

dairy_report = dairy_report[['product_name','product_no','score_creditcard','toutiao_develop']]
dairy_report


Unnamed: 0,product_name,product_no,score_creditcard,toutiao_develop
0,今日头条放心借,PN00000053,409-560,2.80%
1,今日头条放心借,PN00000053,561-583,8.09%
2,今日头条放心借,PN00000053,584-597,10.02%
3,今日头条放心借,PN00000053,598-608,10.57%
4,今日头条放心借,PN00000053,609-618,10.63%
5,今日头条放心借,PN00000053,619-628,11.82%
6,今日头条放心借,PN00000053,629-638,11.28%
7,今日头条放心借,PN00000053,639-649,11.31%
8,今日头条放心借,PN00000053,650-664,11.67%
9,今日头条放心借,PN00000053,665-752,11.81%


In [35]:
table1 = spark.read \
    .format("jdbc") \
    .option("url", "jdbc:mysql://58.59.11.86:3306") \
    .option("dbtable", "reportpublic.ods_credit_credit_info") \
    .option("user", "tangdinghai") \
    .option("password", "Tangdinghai2019!") \
    .load()
table1.createOrReplaceTempView("ods_credit_credit_info")

table1 = spark.read \
    .format("jdbc") \
    .option("url", "jdbc:mysql://58.59.11.86:3306") \
    .option("dbtable", "reportpublic.ods_credit_credit_apply") \
    .option("user", "tangdinghai") \
    .option("password", "Tangdinghai2019!") \
    .load()
table1.createOrReplaceTempView("ods_credit_credit_apply")

table1 = spark.read \
    .format("jdbc") \
    .option("url", "jdbc:mysql://58.59.11.86:3306") \
    .option("dbtable", "reportpublic.ods_loan_invoice") \
    .option("user", "tangdinghai") \
    .option("password", "Tangdinghai2019!") \
    .load()
table1.createOrReplaceTempView("ods_loan_invoice")

In [36]:
hive_sql = """


select
	score_creditcard,
	count(distinct(user_id)) as apply_uid_%s,
    count(distinct(session_id)) as apply_sessionId_%s

from
(
	select
		user_id,
        session_id,
		case
			when cast(t.out_credit_score as int) >= 409 and cast(t.out_credit_score as int) <= 560 then "409-560"
			when cast(t.out_credit_score as int) >= 561 and cast(t.out_credit_score as int) <= 583 then "561-583"
			when cast(t.out_credit_score as int) >= 584 and cast(t.out_credit_score as int) <= 597 then "584-597"
			when cast(t.out_credit_score as int) >= 598 and cast(t.out_credit_score as int) <= 608 then "598-608"
			when cast(t.out_credit_score as int) >= 609 and cast(t.out_credit_score as int) <= 618 then "609-618"
			when cast(t.out_credit_score as int) >= 619 and cast(t.out_credit_score as int) <= 628 then "619-628"
			when cast(t.out_credit_score as int) >= 629 and cast(t.out_credit_score as int) <= 638 then "629-638"			
			when cast(t.out_credit_score as int) >= 639 and cast(t.out_credit_score as int) <= 649 then "639-649"
			when cast(t.out_credit_score as int) >= 650 and cast(t.out_credit_score as int) <= 664 then "650-664"
			when cast(t.out_credit_score as int) >= 665 and cast(t.out_credit_score as int) <= 752 then "665-752"
		end as score_creditcard
	from 
			(
				SELECT
     				a.session_id as session_id,
     				a.user_id as user_id,
     				a.t02_apply_time as apply_time,
     				b.out_credit_score as out_credit_score,
                    b.OUT_FRONT_RISK_DEGREE as OUT_FRONT_RISK_DEGREE
				FROM 
				(
					SELECT 
						session_id,
       					user_id,
       					t02_apply_time
					FROM ods.ods_toutiao_shouxin_input 
				) AS a

                left JOIN
                (
                    SELECT 
                        session_id,
                        user_id,
                        out_credit_score,
                        OUT_FRONT_RISK_DEGREE
                    FROM ods.ods_toutiao_shouxin_output
                ) AS b ON a.session_id = b.session_id
                

                
        ) as t  where from_unixtime(unix_timestamp(substr(t.apply_time,0,8),'yyyymmdd'),'yyyy-mm-dd') 
            between %s and %s  
) as a group by a.score_creditcard



"""

dairly_apply_day = dairly_monitor(hive_sql,'today',0,0)
dairly_apply_day



Unnamed: 0,score_creditcard,apply_uid_today,apply_sessionId_today
0,629-638,1,1
1,409-560,20,20
2,561-583,12,12
3,598-608,4,4
4,584-597,3,3
5,650-664,1,1
6,609-618,2,2


In [37]:
def dairly_monitor(sql,flag,daydiff1=0,daydiff2=0):
    
    if(flag == 'today'):
        time1 = """date_add(CURRENT_DATE,-1)"""
        time2 = """date_add(CURRENT_DATE,-1)"""
        flag = 'today'
        excute_sql = sql%(flag,flag,time1,time2)
#         print(excute_sql)
    if(flag == 'week'):
        time1 = """date_add(CURRENT_DATE,-%d)"""%daydiff1
        time2 = """date_add(CURRENT_DATE,-%d)"""%daydiff2
        excute_sql = sql%(flag,flag,time1,time2)
        
    if(flag == 'month'):
        time1 = """date_add(add_months(CURRENT_DATE, -%d ),-%d)"""%(daydiff1,daydiff2)
        time2 = """date_add(CURRENT_DATE,-%d)"""%daydiff2
        excute_sql = sql%(flag,flag,time1,time2)
    #获取每个月的时间
    credit_apply = spark.sql(excute_sql).toPandas()
    return credit_apply

def monthly_monitor(sql,flag,monthdiff):
    
    if(flag == 'online'):
        time1 = """date_add(last_day(add_months('2019-01-14', -1 )),1)"""
        time2 = """last_day('2019-01-14')"""
        flag = 'online'
        excute_sql = sql%(flag,flag,time1,time2)
#         print(excute_sql)
    else:
        time1 = """date_add(last_day(add_months(CURRENT_DATE, -%d )),1)"""%(monthdiff+1)    #当月第一天：上月最后一天+1天
        time2 = """last_day(add_months(CURRENT_DATE,-%d))"""%monthdiff                  #当月最后一天
        
        date_sql = """select last_day(add_months(CURRENT_DATE,-%d)) as dt"""%monthdiff
        date =  hive_df(date_sql)['dt'].astype('str')
        flag = date[0][0:7].replace('-','_')
        excute_sql = sql%(flag,flag,time1,time2)
#         print(excute_sql)

    #获取每个月的时间
    credit_apply = spark.sql(excute_sql).toPandas()
    return credit_apply

In [38]:
dairly_apply_day = dairly_monitor(hive_sql,'today',0,0)
dairly_apply_week = dairly_monitor(hive_sql,'week',8,2)
dairly_apply_month = dairly_monitor(hive_sql,'month',1,2)


join_key = 'score_creditcard'

apply_monitor_dairly = dairy_report.merge(dairly_apply_month,on = join_key ,how='left')\
                                  .merge(dairly_apply_week,on = join_key, how='left')\
                                  .merge(dairly_apply_day,on = join_key, how='left').fillna(0)

col_keep = list(apply_monitor_dairly.columns)
col_keep.remove(join_key)
col_keep.remove('toutiao_develop')

for col in col_keep:
    col_total = sum(apply_monitor_dairly[col])
    idx = len(dairy_report['toutiao_develop']) - 1
    apply_monitor_dairly[col][idx] = col_total
    apply_monitor_dairly[col+"_percent"] = apply_monitor_dairly[col].map(lambda x: x/(col_total+0.000001))
    apply_monitor_dairly[col+"_percent"][idx] = col_total 

    
apply_monitor_dairly


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,score_creditcard,toutiao_develop,apply_uid_month,apply_sessionId_month,apply_uid_week,apply_sessionId_week,apply_uid_today,apply_sessionId_today,apply_uid_month_percent,apply_sessionId_month_percent,apply_uid_week_percent,apply_sessionId_week_percent,apply_uid_today_percent,apply_sessionId_today_percent
0,409-560,2.80%,289.0,289.0,235.0,235.0,20.0,20.0,0.227559,0.22738,0.236657,0.236419,0.465116,0.465116
1,561-583,8.09%,311.0,312.0,267.0,268.0,12.0,12.0,0.244882,0.245476,0.268882,0.269618,0.27907,0.27907
2,584-597,10.02%,222.0,222.0,177.0,177.0,3.0,3.0,0.174803,0.174666,0.178248,0.178068,0.069767,0.069767
3,598-608,10.57%,134.0,134.0,105.0,105.0,4.0,4.0,0.105512,0.105429,0.10574,0.105634,0.093023,0.093023
4,609-618,10.63%,78.0,78.0,52.0,52.0,2.0,2.0,0.061417,0.061369,0.052367,0.052314,0.046512,0.046512
5,619-628,11.82%,72.0,72.0,45.0,45.0,0.0,0.0,0.056693,0.056648,0.045317,0.045272,0.0,0.0
6,629-638,11.28%,62.0,62.0,39.0,39.0,1.0,1.0,0.048819,0.04878,0.039275,0.039235,0.023256,0.023256
7,639-649,11.31%,27.0,27.0,19.0,19.0,0.0,0.0,0.02126,0.021243,0.019134,0.019115,0.0,0.0
8,650-664,11.67%,41.0,41.0,27.0,27.0,1.0,1.0,0.032283,0.032258,0.02719,0.027163,0.023256,0.023256
9,665-752,11.81%,34.0,34.0,27.0,27.0,0.0,0.0,0.026772,0.026751,0.02719,0.027163,0.0,0.0


In [39]:
#上线月
apply_month_online = monthly_monitor(hive_sql,'online',-1)
#当月
apply_month_now = monthly_monitor(hive_sql,'not_online',0)
#上月
apply_month_last1 = monthly_monitor(hive_sql,'not_online',1)
#上两月
apply_month_last2 = monthly_monitor(hive_sql,'not_online',2)
#上3月
apply_month_last3 = monthly_monitor(hive_sql,'not_online',3)
#上4月
apply_month_last4 = monthly_monitor(hive_sql,'not_online',4)
#上5月
apply_month_last5 = monthly_monitor(hive_sql,'not_online',5)
#上6月
apply_month_last6 = monthly_monitor(hive_sql,'not_online',6)



join_key = 'score_creditcard'

apply_monitor_monthly = dairy_report.merge(apply_month_online,on = join_key,how='left')\
                                  .merge(apply_month_last6,on = join_key,how='left')\
                                  .merge(apply_month_last5,on = join_key,how='left')\
                                  .merge(apply_month_last4,on = join_key,how='left')\
                                  .merge(apply_month_last3,on = join_key,how='left')\
                                  .merge(apply_month_last2,on = join_key,how='left')\
                                  .merge(apply_month_last1,on = join_key,how='left')\
                                  .merge(apply_month_now,on = join_key,how='left').fillna(0)

col_keep = list(apply_monitor_monthly.columns)
col_keep.remove(join_key)
col_keep.remove('toutiao_develop')

for col in col_keep:
    col_total = sum(apply_monitor_monthly[col])
    idx = len(dairy_report['toutiao_develop']) - 1
    apply_monitor_monthly[col][idx] = col_total
    apply_monitor_monthly[col+"_percent"] = apply_monitor_monthly[col].map(lambda x: x/(col_total+0.000001))
    apply_monitor_monthly[col+"_percent"][idx] = col_total 

    
apply_monitor_monthly

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,score_creditcard,toutiao_develop,apply_uid_online,apply_sessionId_online,apply_uid_2018_07,apply_sessionId_2018_07,apply_uid_2018_08,apply_sessionId_2018_08,apply_uid_2018_09,apply_sessionId_2018_09,...,apply_uid_2018_09_percent,apply_sessionId_2018_09_percent,apply_uid_2018_10_percent,apply_sessionId_2018_10_percent,apply_uid_2018_11_percent,apply_sessionId_2018_11_percent,apply_uid_2018_12_percent,apply_sessionId_2018_12_percent,apply_uid_2019_01_percent,apply_sessionId_2019_01_percent
0,409-560,2.80%,309.0,309.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.233737,0.23356
1,561-583,8.09%,325.0,326.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.24584,0.24641
2,584-597,10.02%,226.0,226.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.170953,0.170824
3,598-608,10.57%,139.0,139.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105144,0.105064
4,609-618,10.63%,83.0,83.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.062784,0.062736
5,619-628,11.82%,72.0,72.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.054463,0.054422
6,629-638,11.28%,64.0,64.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.048411,0.048375
7,639-649,11.31%,28.0,28.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02118,0.021164
8,650-664,11.67%,42.0,42.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03177,0.031746
9,665-752,11.81%,34.0,34.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025719,0.025699


In [40]:
##### 授信相关日报和月报

In [44]:
shouxin_hive_sql = """


select
	score_creditcard,
	count(distinct(user_id)) as shouxin_uid_%s,
    count(distinct(session_id)) as shouxin_sessionId_%s

from
(
	select
		user_id,
        session_id,
		case
			when cast(t.out_credit_score as int) >= 409 and cast(t.out_credit_score as int) <= 560 then "409-560"
			when cast(t.out_credit_score as int) >= 561 and cast(t.out_credit_score as int) <= 583 then "561-583"
			when cast(t.out_credit_score as int) >= 584 and cast(t.out_credit_score as int) <= 597 then "584-597"
			when cast(t.out_credit_score as int) >= 598 and cast(t.out_credit_score as int) <= 608 then "598-608"
			when cast(t.out_credit_score as int) >= 609 and cast(t.out_credit_score as int) <= 618 then "609-618"
			when cast(t.out_credit_score as int) >= 619 and cast(t.out_credit_score as int) <= 628 then "619-628"
			when cast(t.out_credit_score as int) >= 629 and cast(t.out_credit_score as int) <= 638 then "629-638"			
			when cast(t.out_credit_score as int) >= 639 and cast(t.out_credit_score as int) <= 649 then "639-649"
			when cast(t.out_credit_score as int) >= 650 and cast(t.out_credit_score as int) <= 664 then "650-664"
			when cast(t.out_credit_score as int) >= 665 and cast(t.out_credit_score as int) <= 752 then "665-752"
		end as score_creditcard
	from 
			(
				SELECT
     				a.session_id as session_id,
     				a.user_id as user_id,
     				a.t02_apply_time as apply_time,
     				b.out_credit_score as out_credit_score,
                    b.OUT_FRONT_RISK_DEGREE as OUT_FRONT_RISK_DEGREE
				FROM 
				(
					SELECT 
						session_id,
       					user_id,
       					t02_apply_time
					FROM ods.ods_toutiao_shouxin_input 
				) AS a

                left JOIN
                (
                    SELECT 
                        session_id,
                        user_id,
                        out_credit_score,
                        OUT_FRONT_RISK_DEGREE
                    FROM ods.ods_toutiao_shouxin_output
                ) AS b ON a.session_id = b.session_id
                
                right join 
                (
                
                    select 
                        user_id
                    from ods_credit_credit_info
                
                ) as c on a.user_id = c.user_id
                
        ) as t  where from_unixtime(unix_timestamp(substr(t.apply_time,0,8),'yyyymmdd'),'yyyy-mm-dd') 
            between %s and %s   
) as a group by a.score_creditcard



"""

# dairly_shouxin_day = dairly_monitor(shouxin_hive_sql,'today',0,0)


In [45]:
dairly_shouxin_day = dairly_monitor(shouxin_hive_sql,'today',0,0)
dairly_shouxin_week = dairly_monitor(shouxin_hive_sql,'week',8,2)
dairly_shouxin_month = dairly_monitor(shouxin_hive_sql,'month',1,2)


join_key = 'score_creditcard'

shouxin_monitor_dairly = dairy_report.merge(dairly_shouxin_month,on = join_key ,how='left')\
                                  .merge(dairly_shouxin_week,on = join_key, how='left')\
                                  .merge(dairly_shouxin_day,on = join_key, how='left').fillna(0)

col_keep = list(shouxin_monitor_dairly.columns)
col_keep.remove(join_key)
col_keep.remove('toutiao_develop')

for col in col_keep:
    col_total = sum(shouxin_monitor_dairly[col])
    idx = len(dairy_report['toutiao_develop']) - 1
    shouxin_monitor_dairly[col][idx] = col_total
    shouxin_monitor_dairly[col+"_percent"] = shouxin_monitor_dairly[col].map(lambda x: x/(col_total+0.000001))
    shouxin_monitor_dairly[col+"_percent"][idx] = col_total 

    
shouxin_monitor_dairly


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,score_creditcard,toutiao_develop,shouxin_uid_month,shouxin_sessionId_month,shouxin_uid_week,shouxin_sessionId_week,shouxin_uid_today,shouxin_sessionId_today,shouxin_uid_month_percent,shouxin_sessionId_month_percent,shouxin_uid_week_percent,shouxin_sessionId_week_percent,shouxin_uid_today_percent,shouxin_sessionId_today_percent
0,409-560,2.80%,10.0,10.0,7.0,7.0,1.0,1.0,0.052356,0.052356,0.047297,0.047297,0.333333,0.333333
1,561-583,8.09%,21.0,21.0,18.0,18.0,1.0,1.0,0.109948,0.109948,0.121622,0.121622,0.333333,0.333333
2,584-597,10.02%,13.0,13.0,10.0,10.0,0.0,0.0,0.068063,0.068063,0.067568,0.067568,0.0,0.0
3,598-608,10.57%,20.0,20.0,19.0,19.0,1.0,1.0,0.104712,0.104712,0.128378,0.128378,0.333333,0.333333
4,609-618,10.63%,21.0,21.0,14.0,14.0,0.0,0.0,0.109948,0.109948,0.094595,0.094595,0.0,0.0
5,619-628,11.82%,26.0,26.0,18.0,18.0,0.0,0.0,0.136126,0.136126,0.121622,0.121622,0.0,0.0
6,629-638,11.28%,26.0,26.0,21.0,21.0,0.0,0.0,0.136126,0.136126,0.141892,0.141892,0.0,0.0
7,639-649,11.31%,10.0,10.0,10.0,10.0,0.0,0.0,0.052356,0.052356,0.067568,0.067568,0.0,0.0
8,650-664,11.67%,26.0,26.0,16.0,16.0,0.0,0.0,0.136126,0.136126,0.108108,0.108108,0.0,0.0
9,665-752,11.81%,18.0,18.0,15.0,15.0,0.0,0.0,0.094241,0.094241,0.101351,0.101351,0.0,0.0


In [21]:
#上线月
shouxin_month_online = monthly_monitor(hive_sql,'online',-1)
#当月
shouxin_month_now = monthly_monitor(hive_sql,'not_online',0)
#上月
shouxin_month_last1 = monthly_monitor(hive_sql,'not_online',1)
#上两月
shouxin_month_last2 = monthly_monitor(hive_sql,'not_online',2)
#上3月
shouxin_month_last3 = monthly_monitor(hive_sql,'not_online',3)
#上4月
shouxin_month_last4 = monthly_monitor(hive_sql,'not_online',4)
#上5月
shouxin_month_last5 = monthly_monitor(hive_sql,'not_online',5)
#上6月
shouxin_month_last6 = monthly_monitor(hive_sql,'not_online',6)



join_key = 'score_creditcard'

shouxin_monitor_monthly = dairy_report.merge(shouxin_month_online,on = join_key,how='left')\
                                  .merge(shouxin_month_last6,on = join_key,how='left')\
                                  .merge(shouxin_month_last5,on = join_key,how='left')\
                                  .merge(shouxin_month_last4,on = join_key,how='left')\
                                  .merge(shouxin_month_last3,on = join_key,how='left')\
                                  .merge(shouxin_month_last2,on = join_key,how='left')\
                                  .merge(shouxin_month_last1,on = join_key,how='left')\
                                  .merge(shouxin_month_now,on = join_key,how='left').fillna(0)

col_keep = list(shouxin_monitor_monthly.columns)
col_keep.remove(join_key)
col_keep.remove('toutiao_develop')

for col in col_keep:
    col_total = sum(shouxin_monitor_monthly[col])
    idx = len(dairy_report['toutiao_develop']) - 1
    shouxin_monitor_monthly[col][idx] = col_total
    shouxin_monitor_monthly[col+"_percent"] = shouxin_monitor_monthly[col].map(lambda x: x/(col_total+0.000001))
    shouxin_monitor_monthly[col+"_percent"][idx] = col_total 

    
shouxin_monitor_monthly

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,score_creditcard,toutiao_develop,apply_uid_online,apply_sessionId_online,apply_uid_2018_07,apply_sessionId_2018_07,apply_uid_2018_08,apply_sessionId_2018_08,apply_uid_2018_09,apply_sessionId_2018_09,...,apply_uid_2018_09_percent,apply_sessionId_2018_09_percent,apply_uid_2018_10_percent,apply_sessionId_2018_10_percent,apply_uid_2018_11_percent,apply_sessionId_2018_11_percent,apply_uid_2018_12_percent,apply_sessionId_2018_12_percent,apply_uid_2019_01_percent,apply_sessionId_2019_01_percent
0,409-560,2.80%,309.0,309.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.233737,0.23356
1,561-583,8.09%,325.0,326.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.24584,0.24641
2,584-597,10.02%,226.0,226.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.170953,0.170824
3,598-608,10.57%,139.0,139.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105144,0.105064
4,609-618,10.63%,83.0,83.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.062784,0.062736
5,619-628,11.82%,72.0,72.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.054463,0.054422
6,629-638,11.28%,64.0,64.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.048411,0.048375
7,639-649,11.31%,28.0,28.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02118,0.021164
8,650-664,11.67%,42.0,42.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03177,0.031746
9,665-752,11.81%,34.0,34.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025719,0.025699


In [22]:
###########################################
#### 有授信额度相关客户级和订单级汇总监控报表---月报

In [23]:
#### 申请表通过率

In [24]:
accept_rate_dairly = shouxin_monitor_dairly[['score_creditcard']]
idx = len(dairy_report['toutiao_develop']) - 1

for i in range(len(shouxin_monitor_dairly.columns)):
    if(i>=2 and i <=7):
        shouxin_col = shouxin_monitor_dairly.columns[i]
        apply_col = apply_monitor_dairly.columns[i]
#         print(shouxin_col,apply_col)

        accept_col = shouxin_col.split("_")[1]+"_"+shouxin_col.split("_")[2]
        accept_rate_dairly[accept_col+"_rate"] = shouxin_monitor_dairly[shouxin_col]/(apply_monitor_dairly[apply_col])
        accept_rate_dairly[accept_col+"_rate"][idx] = apply_monitor_dairly[apply_col][idx]

accept_rate_dairly

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,score_creditcard,uid_month_rate,sessionId_month_rate,uid_week_rate,sessionId_week_rate,uid_today_rate,sessionId_today_rate
0,409-560,0.034602,0.034602,0.029787,0.029787,0.05,0.05
1,561-583,0.067524,0.067308,0.067416,0.067164,0.083333,0.083333
2,584-597,0.058559,0.058559,0.056497,0.056497,0.0,0.0
3,598-608,0.149254,0.149254,0.180952,0.180952,0.25,0.25
4,609-618,0.269231,0.269231,0.269231,0.269231,0.0,0.0
5,619-628,0.361111,0.361111,0.4,0.4,,
6,629-638,0.419355,0.419355,0.538462,0.538462,0.0,0.0
7,639-649,0.37037,0.37037,0.526316,0.526316,,
8,650-664,0.634146,0.634146,0.592593,0.592593,0.0,0.0
9,665-752,0.529412,0.529412,0.555556,0.555556,,


In [26]:
accept_rate_monthly = shouxin_monitor_monthly[['score_creditcard']]
idx = len(dairy_report['toutiao_develop']) - 1

for i in range(len(shouxin_monitor_monthly.columns)):
    if(i>=2 and i <=16):
        shouxin_col = shouxin_monitor_monthly.columns[i]
        apply_col = apply_monitor_monthly.columns[i]
#       print(shouxin_col,apply_col)

        accept_col = shouxin_col[6:]
        accept_rate_monthly[accept_col+"_rate"] = shouxin_monitor_monthly[shouxin_col]/(apply_monitor_monthly[apply_col])
        accept_rate_monthly[accept_col+"_rate"][idx] = apply_monitor_monthly[apply_col][idx]

accept_rate_monthly = accept_rate_monthly.fillna("0")
accept_rate_monthly


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


Unnamed: 0,score_creditcard,uid_online_rate,sessionId_online_rate,uid_2018_07_rate,sessionId_2018_07_rate,uid_2018_08_rate,sessionId_2018_08_rate,uid_2018_09_rate,sessionId_2018_09_rate,uid_2018_10_rate,sessionId_2018_10_rate,uid_2018_11_rate,sessionId_2018_11_rate,uid_2018_12_rate,sessionId_2018_12_rate,uid_2019_01_rate
0,409-560,1.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,1.0
1,561-583,1.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,1.0
2,584-597,1.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,1.0
3,598-608,1.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,1.0
4,609-618,1.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,1.0
5,619-628,1.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,1.0
6,629-638,1.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,1.0
7,639-649,1.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,1.0
8,650-664,1.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,1.0
9,665-752,1.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,1.0


In [65]:
test_Sel = """

select
	score_creditcard,
	count(distinct(user_id)) as shouxin_uid_%s,
    count(distinct(session_id)) as shouxin_sessionId_%s

from
(
	select
		user_id,
        session_id,
		case
			when cast(t.out_credit_score as int) >= 409 and cast(t.out_credit_score as int) <= 560 then "409-560"
			when cast(t.out_credit_score as int) >= 561 and cast(t.out_credit_score as int) <= 583 then "561-583"
			when cast(t.out_credit_score as int) >= 584 and cast(t.out_credit_score as int) <= 597 then "584-597"
			when cast(t.out_credit_score as int) >= 598 and cast(t.out_credit_score as int) <= 608 then "598-608"
			when cast(t.out_credit_score as int) >= 609 and cast(t.out_credit_score as int) <= 618 then "609-618"
			when cast(t.out_credit_score as int) >= 619 and cast(t.out_credit_score as int) <= 628 then "619-628"
			when cast(t.out_credit_score as int) >= 629 and cast(t.out_credit_score as int) <= 638 then "629-638"			
			when cast(t.out_credit_score as int) >= 639 and cast(t.out_credit_score as int) <= 649 then "639-649"
			when cast(t.out_credit_score as int) >= 650 and cast(t.out_credit_score as int) <= 664 then "650-664"
			when cast(t.out_credit_score as int) >= 665 and cast(t.out_credit_score as int) <= 752 then "665-752"
		end as score_creditcard
	from 
			(
				SELECT
     				a.session_id as session_id,
     				a.user_id as user_id,
     				a.t02_apply_time as apply_time,
     				b.out_credit_score as out_credit_score,
                    b.OUT_FRONT_RISK_DEGREE as OUT_FRONT_RISK_DEGREE
				FROM 
				(
					SELECT 
						session_id,
       					user_id,
       					t02_apply_time
					FROM ods.ods_toutiao_shouxin_input 
				) AS a

                left JOIN
                (
                    SELECT 
                        session_id,
                        user_id,
                        out_credit_score,
                        OUT_FRONT_RISK_DEGREE
                    FROM ods.ods_toutiao_shouxin_output
                ) AS b ON a.session_id = b.session_id
                
                right join 
                (
                
                    select 
                        user_id,
                        APPLY_STATUS
                    from ods_credit_credit_apply
                    where product_no like 'PN00000053%%' and APPLY_STATUS = 12

                
                ) as c on a.user_id = c.user_id
                
        ) as t  where from_unixtime(unix_timestamp(substr(t.apply_time,0,8),'yyyymmdd'),'yyyy-mm-dd') 
            between %s and %s   
) as a group by a.score_creditcard
                

        

"""


In [66]:
dairly_shouxin_month = dairly_monitor(test_Sel,'month',1,2)
dairly_shouxin_month

Unnamed: 0,score_creditcard,shouxin_uid_month,shouxin_sessionId_month
0,629-638,26,26
1,639-649,10,10
2,665-752,18,18
3,619-628,22,22
4,598-608,15,15
5,584-597,3,3
6,650-664,26,26
7,609-618,21,21


In [67]:
dairly_shouxin_day = dairly_monitor(test_Sel,'today',0,0)
dairly_shouxin_week = dairly_monitor(test_Sel,'week',8,2)
dairly_shouxin_month = dairly_monitor(test_Sel,'month',1,2)


join_key = 'score_creditcard'

shouxin_monitor_dairly = dairy_report.merge(dairly_shouxin_month,on = join_key ,how='left')\
                                  .merge(dairly_shouxin_week,on = join_key, how='left')\
                                  .merge(dairly_shouxin_day,on = join_key, how='left').fillna(0)

col_keep = list(shouxin_monitor_dairly.columns)
col_keep.remove(join_key)
col_keep.remove('toutiao_develop')

for col in col_keep:
    col_total = sum(shouxin_monitor_dairly[col])
    idx = len(dairy_report['toutiao_develop']) - 1
    shouxin_monitor_dairly[col][idx] = col_total
    shouxin_monitor_dairly[col+"_percent"] = shouxin_monitor_dairly[col].map(lambda x: x/(col_total+0.000001))
    shouxin_monitor_dairly[col+"_percent"][idx] = col_total 

    
shouxin_monitor_dairly


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,score_creditcard,toutiao_develop,shouxin_uid_month,shouxin_sessionId_month,shouxin_uid_week,shouxin_sessionId_week,shouxin_uid_today,shouxin_sessionId_today,shouxin_uid_month_percent,shouxin_sessionId_month_percent,shouxin_uid_week_percent,shouxin_sessionId_week_percent,shouxin_uid_today_percent,shouxin_sessionId_today_percent
0,409-560,2.80%,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,561-583,8.09%,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,584-597,10.02%,3.0,3.0,2.0,2.0,0,0,0.021277,0.021277,0.018349,0.018349,0.0,0.0
3,598-608,10.57%,15.0,15.0,14.0,14.0,0,0,0.106383,0.106383,0.12844,0.12844,0.0,0.0
4,609-618,10.63%,21.0,21.0,14.0,14.0,0,0,0.148936,0.148936,0.12844,0.12844,0.0,0.0
5,619-628,11.82%,22.0,22.0,17.0,17.0,0,0,0.156028,0.156028,0.155963,0.155963,0.0,0.0
6,629-638,11.28%,26.0,26.0,21.0,21.0,0,0,0.184397,0.184397,0.192661,0.192661,0.0,0.0
7,639-649,11.31%,10.0,10.0,10.0,10.0,0,0,0.070922,0.070922,0.091743,0.091743,0.0,0.0
8,650-664,11.67%,26.0,26.0,16.0,16.0,0,0,0.184397,0.184397,0.146789,0.146789,0.0,0.0
9,665-752,11.81%,18.0,18.0,15.0,15.0,0,0,0.12766,0.12766,0.137615,0.137615,0.0,0.0
