In [28]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
import numpy as np
import findspark
findspark.init("/opt/cloudera/parcels/CDH-6.3.1-1.cdh6.3.1.p0.1470567/lib/spark","/usr/bin/python2.7")

import os
os.environ["JAVA_HOME"] = "/usr/java/jdk1.8.0_181-cloudera"

from pyspark.sql import functions as F
from pyspark import SparkContext
from pyspark.sql import SparkSession,HiveContext,Window
from pyspark.sql.types import IntegerType, FloatType, DoubleType, ArrayType, StringType, DecimalType
from pyspark.sql.functions import *
import re
import datetime
from pychattr.channel_attribution import MarkovModel

spark_session = SparkSession.builder.enableHiveSupport().appName("artefact_attribution_analysis") \
    .config("spark.driver.memory","10g") \
    .config("spark.pyspark.driver.python","/usr/bin/python2.7")\
    .config("spark.pyspark.python","/usr/bin/python2.7") \
    .config("spark.yarn.executor.memoryOverhead","8G") \
    .getOrCreate()
hc = HiveContext(spark_session.sparkContext)

In [1]:
import pandas as pd
import numpy as np
import datetime
from datetime import datetime
import itertools

In [3]:
names = ['mobile','touchpoint_id','action_time','row_number','time_lag','tp_id_through','row_number_lag','conv_period']
df = pd.read_csv('data/tp_analysis_base.csv',sep = '\t', names = names)
df['mobile'] = df['mobile'].astype(str)

In [5]:
# fetch id mapping system
#id_mapping = hc.sql('select * from marketing_modeling.dw_touchpoints_id_system').toPandas()

In [6]:
id_mapping = pd.read_csv('data/id_mapping.csv')

tp_name = {k:v for k,v in zip(id_mapping.touchpoint_id,id_mapping.touchpoint_name)}
inbound_map = {k:v for k,v in zip(id_mapping.touchpoint_id,id_mapping.is_inbound)}

In [None]:
len(df.mobile.unique())

##### 5.6.1 试驾后成交时长

In [9]:
trial_tid_list = ['007003000000_tp','007004000000_tp'] #完成试乘试驾，评价试乘试驾
trial_df = df[df.touchpoint_id.isin(trial_tid_list)].groupby(by='mobile',as_index=False).agg({'action_time':'min'})
trial_df.columns = ['mobile','trial_time']

trail_ana = visit_df.merge(trial_df,on='mobile',how='left').merge(deal_df,on='mobile',how='left')

trail_ana['visit_trial_diff'] = (pd.to_datetime(trail_ana['trial_time']) - pd.to_datetime(trail_ana['visit_time']))\
.apply(lambda x:x.total_seconds()/3600)
trail_ana['visit_deal_diff'] = (pd.to_datetime(trail_ana['deal_time']) - pd.to_datetime(trail_ana['visit_time']))\
.apply(lambda x:x.total_seconds()/3600)

In [None]:
# 试驾后成交时长
print(trail_ana['visit_deal_diff'].value_counts())

##### 5.6.2 试驾转化漏斗

In [None]:
# 意向建卡
base = df[(df.touchpoint_id == '004000000000_tp') | (df.touchpoint_id == '005000000000_tp')][['mobile','action_time']]\
.rename(columns={'action_time':'cust_time'})\
.groupby(by='mobile',as_index=False).agg({'cust_time':'min'})

visit_df_g = df[df.touchpoint_id == '006000000000_tp'][['mobile','action_time']].rename(columns={'action_time':'visit_time'})\
.groupby(by='mobile',as_index=False).agg({'visit_time':'min'})

trial_df_g = df[(df.touchpoint_id == '007003000000_tp') | (df.touchpoint_id == '007004000000_tp')][['mobile','action_time']]\
.rename(columns={'action_time':'trial_time'}).groupby(by='mobile',as_index=False).agg({'trial_time':'min'})

deal_df_g = df[df.touchpoint_id == '011000000000_tp'][['mobile','action_time']]\
.rename(columns={'action_time':'deal_time'}).groupby(by='mobile',as_index=False).agg({'deal_time':'min'})

base_visit = base.merge(visit_df_g,on='mobile',how='left')
base_visit = base_visit.merge(trial_df_g,on='mobile',how='left')
base_visit = base_visit.merge(deal_df_g,on='mobile',how='left')

base_visit['trial_deal_diff'] = (pd.to_datetime(base_visit.deal_time) - pd.to_datetime(base_visit.trial_time)).apply(lambda x:x.total_seconds()/3600)
base_visit['visit_deal_diff'] = (pd.to_datetime(base_visit.deal_time) - pd.to_datetime(base_visit.visit_time)).apply(lambda x:x.total_seconds()/3600)
base_visit['visit_trial_diff'] = (pd.to_datetime(base_visit.trial_time) - pd.to_datetime(base_visit.visit_time)).apply(lambda x:x.total_seconds()/3600)
base_visit['trial_cust_diff'] = (pd.to_datetime(base_visit.trial_time) - pd.to_datetime(base_visit.cust_time)).apply(lambda x:x.total_seconds()/3600)
base_visit['visit_cust_diff'] = (pd.to_datetime(base_visit.visit_time) - pd.to_datetime(base_visit.cust_time)).apply(lambda x:x.total_seconds()/3600)

# 到店
print('visit: ',len(base_visit[(base_visit.visit_cust_diff >= -24)].mobile.unique()))

# 到店后试驾
print('trial: ',len(base_visit[(base_visit.visit_trial_diff >= -24)\
                                                 & (base_visit.visit_cust_diff >= -24)].mobile.unique()))

# 到店后试驾，试驾后成交(非当日)
print('trial_deal: ',len(base_visit[(base_visit.trial_deal_diff > 24)
                                                      & (base_visit.visit_trial_diff >= -24)\
                                                      & (base_visit.visit_cust_diff >= -24)].mobile.unique()))
# 到店后试驾，试驾后成交(当日)
print('trial_deal: ',len(base_visit[(base_visit.trial_deal_diff >= -24) & ((base_visit.trial_deal_diff <= 24))\
                                    & (base_visit.visit_trial_diff >= -24)\
                                    & (base_visit.visit_cust_diff >= -24)].mobile.unique()))

# 到店后直接成交(非当日)
print('direct_deal: ',len(base_visit[(base_visit.trial_time.isna() == True)\
                                                       & (base_visit.visit_cust_diff >= -24)\
                                                       & (base_visit.visit_deal_diff > 24)].mobile.unique()))
# 到店后直接成交(当日)
print('direct_deal: ',len(base_visit[(base_visit.trial_time.isna() == True) & ((base_visit.trial_deal_diff <= 24))\
                                     & (base_visit.visit_cust_diff >= -24)\
                                     & (base_visit.visit_deal_diff >= -24)].mobile.unique()))

##### 5.6.3 三类用户 (到店试驾、到店未试驾当天成交、到店未试驾非当天成交) 的触点覆盖度

In [None]:
# 到店后试驾
trial_cust = base_visit[(base_visit.visit_trial_diff >= -24)\
                                                 & (base_visit.visit_cust_diff >= -24)].mobile.unique()

# 到店后直接成交(非当日)
deal_not_sd = base_visit[(base_visit.trial_time.isna() == True) & (base_visit.visit_cust_diff >= -24)\
           & (base_visit.visit_deal_diff > 24)].mobile.unique()

# 到店后直接成交(当日)
deal_sd = base_visit[(base_visit.trial_time.isna() == True) & ((base_visit.trial_deal_diff <= 24))\
                                     & (base_visit.visit_cust_diff >= -24)\
                                     & (base_visit.visit_deal_diff >= -24)].mobile.unique())

In [None]:
base = df.merge(deal_df_g,on='mobile',how='left')

df[df.mobile.isin(trial_cust) & ((df.action_time < df.deal_time) | (df.deal_time.isna()))][['mobile','touchpoint_id']].drop_duplicates()\
.groupby(by='touchpoint_id',as_index=False).agg({'mobile':'count'})

df[df.mobile.isin(deal_not_sd) & ((df.action_time < df.deal_time) | (df.deal_time.isna()))][['mobile','touchpoint_id']].drop_duplicates()\
.groupby(by='touchpoint_id',as_index=False).agg({'mobile':'count'})

df[df.mobile.isin(deal_sd) & ((df.action_time < df.deal_time) | (df.deal_time.isna()))][['mobile','touchpoint_id']].drop_duplicates()\
.groupby(by='touchpoint_id',as_index=False).agg({'mobile':'count'})