**분석개요**

    A. Wallpaper 로그인 전환 이탈 리포트
        1. KPI
            - PV
            - 다운로드
            - 전환율

In [1]:
# basic
import gc
import os
import sys
import warnings
warnings.filterwarnings(action='ignore') 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

#--------------------#
# handling
#--------------------#
import math
import time
import random
# import openpyxl
import importlib
import xlsxwriter
import numpy as np
import pandas as pd
from collections import Counter
from datetime import datetime, timedelta
from scipy.stats import pearsonr
import difflib

#--------------------#
# Vis
#--------------------#
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
## Vis for jupyter theme
from IPython.display import Image
from jupyterthemes import jtplot
jtplot.style(theme= 'grade3', context='notebook', ticks=True, grid=False) ## dark backgroud jupyter notebook theme
# plt.style.use(['seaborn-white']) ## dark background style ## white style : 'seaborn-white'
plt.style.use(['dark_background']) ## dark background style ## white style : 'seaborn-white'
## Vis for korean 
import matplotlib.font_manager as fm
font_location = '/usr/share/fonts/truetype/nanum/NanumSquareRoundB.ttf'
fprop = fm.FontProperties(fname=font_location)
font_name = fprop.get_name()
matplotlib.rc('font', family=font_name)

In [2]:
#--------------------#
# sphere package
#--------------------#
sys.path.append("/home/das_share/sphere_class/")
import SpherePackage
from SpherePackage import *
for pkg in [SpherePackage] :
    _ = importlib.reload(pkg)

#--------------------#
# kto package
#--------------------#
sys.path.append("../src/")
import kto_config
import kto_util
import kto_prep
from kto_config import *
from kto_prep import *
from kto_util import *

for pkg in [kto_config, kto_prep, kto_util] :
    _ = importlib.reload(pkg)

#-------------------#
# pickle
#-------------------#
sys.path.append("/home/minkyung62/analysis_report/00_custom_analysis/kto_custom/notebook_git/return_pickle.py")
import return_pickle
from return_pickle import *
for pkg in [return_pickle] :
    _ = importlib.reload(pkg)



sys.path.append("/home/das_share/analysis/cdj/src/SphereCDJ.py")
import SphereCDJ
from SphereCDJ import *
for pkg in [SphereCDJ] :
    _ = importlib.reload(pkg)

# 1.Data Import

In [4]:
#------------------------------------------------#
# 1. log Data import
#------------------------------------------------#
## target period
s_date, e_date = '20221020', '20221130' 
today = datetime.strptime(e_date, '%Y%m%d') # today = datetime(2021,5,6)
dates = (datetime.strptime(e_date, '%Y%m%d') - datetime.strptime(s_date, '%Y%m%d')).days + 1 
change_date = datetime(2022,11,3)    

CheckDate.check_date_return_pickle(e_date, s_date = s_date)

df_app_log = ReadFile.read_pickle(e_date=e_date, s_date = s_date)
df_app_log_before = ReadFile.read_pickle(e_date='20221102', s_date=s_date)
df_app_log_after = ReadFile.read_pickle(e_date=e_date, s_date='20221103')

# 2.Preprocess

In [5]:
#------------------------------------------------#
# 1. prep
#------------------------------------------------#
## 1) log
df_app_log = SpherePrep.BasicPrep.basic_prep(df_app_log)                # log data

## 2) user prop
df_prop = SpherePrep.Prop.df_prop_pipe(df_app_log, KEY_ID = [KEY_ID_DEVICE, KEY_ID_USER], drop_none_prop=False)       # user data

## 3) param - 1
# _, df_param_all = DataImport.json_to_dataframe_nodeN(
#     df_app_log, [KEY_ID_DEVICE, KEY_ID_USER]
#     )

#------------------------------------------------#
# 2. param detail data
#------------------------------------------------#
## 1) wallpaper 관련 param data 생성
EVENT_MAIN_WALLPAPER = 'wallPaperMain'
EVENT_DOWNLOAD_WALLPAPER = 'wallPaperDownload'

df_app_log_wallpaper = df_app_log[df_app_log['abs_events'].apply(lambda x: True if EVENT_MAIN_WALLPAPER in x else False)]
df_app_log_download = df_app_log[df_app_log['abs_events'].apply(lambda x: True if EVENT_DOWNLOAD_WALLPAPER in x else False)]

## 2) datetime으로 type 변환
df_app_log_wallpaper['day'] = pd.to_datetime(df_app_log_wallpaper['day'], format = '%Y-%m-%d')
df_app_log_download['day'] = pd.to_datetime(df_app_log_download['day'], format = '%Y-%m-%d')

## 1)Utils

In [6]:
def date_setting(date):
    _date = datetime.strptime(date, "%Y-%m-%d")
    return _date

def return_df(_df_target1, _df_target2, end_date, start_date = None):
    ## 특정 주차 데이터 구하기
    if start_date != None:
        _df_target1 = _df_target1[_df_target1['day'] >= start_date]
        _df_target2 = _df_target2[_df_target2['day'] >= start_date]

    _df_output1 = _df_target1[_df_target1['day'] < end_date]
    _df_output2 = _df_target2[_df_target2['day'] < end_date]


    return _df_output1, _df_output2

def calculate_kpi(df_pv, df_download, text, per_user_kpi = False):
    dict_kpi= {}

    ## 1) 주차별 KPI 
    df_pv['pv_cnt'] = df_pv['abs_events'].apply(lambda x : Counter(x)[EVENT_MAIN_WALLPAPER] if EVENT_MAIN_WALLPAPER in x else 0)
    dict_kpi['pv_cnt'] = df_pv['pv_cnt'].sum()

    dict_kpi['pv_device_cnt'] = df_pv[KEY_ID_DEVICE].nunique()
    dict_kpi['dw_device_cnt'] = df_download[KEY_ID_DEVICE].nunique()

    df_download['download_cnt'] = df_download['abs_events'].apply(lambda x: Counter(x)[EVENT_DOWNLOAD_WALLPAPER] if EVENT_DOWNLOAD_WALLPAPER in x else 0)
    dict_kpi['download_cnt']  = df_download['download_cnt'].sum()

    if (dict_kpi['pv_cnt'] != 0 and dict_kpi['pv_device_cnt']!= 0):
        dict_kpi['페이지뷰 대비 전환율']  = dict_kpi['download_cnt'] / dict_kpi['pv_cnt']
        dict_kpi['사용자 대비 전환율']  = dict_kpi['download_cnt'] / dict_kpi['pv_device_cnt']
    
    else:
        dict_kpi['페이지뷰 대비 전환율']  = 0
        dict_kpi['사용자 대비 전환율']  = 0
    
    df_week_kpi = pd.DataFrame.from_dict([dict_kpi]).rename(index={0:text})

    ## 2) 일별 KPI
    _df_output_1_1 = df_pv.groupby('day')['pv_cnt'].sum().to_frame(name = 'pv_cnt')
    _df_output_1_2 = df_pv.groupby('day')[KEY_ID_DEVICE].nunique().to_frame(name = 'pv_device_cnt')
    _df_output_1_3 = df_download.groupby('day')['download_cnt'].sum().to_frame(name = 'download_cnt')
    _df_output_1_4 = df_download.groupby('day')[KEY_ID_DEVICE].nunique().to_frame(name = 'dw_device_cnt')


    df_daily_kpi = pd.concat([_df_output_1_1, _df_output_1_2, _df_output_1_3, _df_output_1_4], axis = 1)
    df_daily_kpi['페이지뷰 대비 전환율'] = df_daily_kpi['download_cnt'] / df_daily_kpi['pv_cnt']
    df_daily_kpi['사용자 대비 전환율'] = df_daily_kpi['download_cnt'] / df_daily_kpi['pv_device_cnt']

    ## 3) 평균 KPI
    # if get_mean == True:
    #     _df_output_mean = df_pv.groupby('day')['pv_cnt'].mean().to_frame(name = 'mean_pv_cnt')
    #     _df_ouput_mean2 = df_pv.groupby('day')[KEY_ID_DEVICE].nunique().mean().to_frame(name = 'mean_pv_device_cnt')
    #     _df_ouput_mean3 = df_download.groupby('day')['download_cnt'].mean().to_frame(name = 'mean_download_cnt')
    #     df_mean_kpi = pd.concat([_df_output_1_1, _df_output_1_2, _df_output_1_3], axis = 1)
    #     df_mean_kpi['평균 페이지뷰 대비 전환율'] = df_mean_kpi['mean_download_cnt'] / df_mean_kpi['mean_pv_cnt']
    #     df_mean_kpi['평균 사용자 대비 전환율'] = df_mean_kpi['mean_download_cnt'] / df_mean_kpi['mean_pv_device_cnt']

    if per_user_kpi == True:
        ## 한 유저당 중복 집계된 download 수를 제외한 전환율 도출 !!!!!!!!!!!!! 평균 전환율!!!!!!!!
        ### 3) 주차별 전환율
        _download_device_cnt = df_download[KEY_ID_DEVICE].nunique()
        
        if dict_kpi['pv_device_cnt']!= 0:
            df_week_kpi['사용자 대비 전환율(유저 기준 중복 집계 제외)'] =\
                _download_device_cnt / dict_kpi['pv_device_cnt']
        else:
            df_week_kpi['사용자 대비 전환율(유저 기준 중복 집계 제외)'] = 0

        ### 4) 일별 전환율
        _download_device_cnt = df_download.groupby('day')[KEY_ID_DEVICE].nunique().to_frame()[KEY_ID_DEVICE]

        df_daily_kpi['사용자 대비 전환율(유저 기준 중복 집계 제외)'] =\
            _download_device_cnt / df_daily_kpi['pv_device_cnt']


    return df_week_kpi, df_daily_kpi

# 3.Analysis

## 1)KPI
- PV
- 다운로드
- 전환율
    - 페이지뷰 대비
    - 사용자 대비

In [7]:
#########################################
# 0. 전체 KPI
## ** 서비스 전환 전후 평균 KPI 도출
#########################################
## 1) 전환 전
_date = date_setting("2022-11-03")

_df_event_wallpaper_before, _df_event_download_before =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date=_date)

_df_event_wallpaper_after, _df_event_download_after =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date= date_setting("2022-11-28"), start_date=_date)

_df_output_before, _ =\
    calculate_kpi(_df_event_wallpaper_before, _df_event_download_before, text = '전환 전', per_user_kpi=True)

_df_output_after, _ =\
    calculate_kpi(_df_event_wallpaper_after, _df_event_download_after, text = '전환 후', per_user_kpi=True)

_df_output_mean = pd.concat([_df_output_before, _df_output_after], axis=0)
_df_output_mean

Unnamed: 0,pv_cnt,pv_device_cnt,dw_device_cnt,download_cnt,페이지뷰 대비 전환율,사용자 대비 전환율,사용자 대비 전환율(유저 기준 중복 집계 제외)
전환 전,3237,2273,798,4262,1.316651,1.875055,0.351078
전환 후,4750,2591,370,2737,0.576211,1.056349,0.142802


In [8]:
#########################################
# 1. 주차별/일별 KPI
## ** 한 주 시작일 = 월요일 -> 태깅이 월요일에 반영되어 일요일 데이터가 없어서 월요일로 시작점을 설정함
#########################################
## date setting
_date_1 = date_setting("2022-10-31")
_date_2 = date_setting("2022-11-07")
_date_3 = date_setting("2022-11-14")
_date_4 = date_setting("2022-11-21")
_date_5 = date_setting("2022-11-28")
_date_6 = date_setting("2022-12-01")


# ## data setting
_df_event_wallpaper_1, _df_event_download_1 =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date=_date_1)

_df_event_wallpaper_2, _df_event_download_2 =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date=_date_2, start_date=_date_1)

_df_event_wallpaper_3,_df_event_download_3 =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date=_date_3, start_date=_date_2)

_df_event_wallpaper_4,_df_event_download_4 =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date=_date_4, start_date=_date_3)

_df_event_wallpaper_5,_df_event_download_5 =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date=_date_5, start_date=_date_4)

_df_event_wallpaper_6,_df_event_download_6 =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date=_date_6, start_date=_date_5)

## calculate KPI
lst_output_1 = [_df_event_wallpaper_1, _df_event_wallpaper_2, _df_event_wallpaper_3, _df_event_wallpaper_4, _df_event_wallpaper_5, _df_event_wallpaper_6]
lst_output_2 = [_df_event_download_1, _df_event_download_2, _df_event_download_3, _df_event_download_4, _df_event_download_5, _df_event_download_6]
lst_text = ['10월_4주차', '11월_1주차', '11월_2주차', '11월_3주차', '11월_4주차', '11월_5주차']

df_output_week = pd.DataFrame()
df_output_daily = pd.DataFrame()

for _df1, _df2, _t in zip(lst_output_1, lst_output_2, lst_text):
    _df_output1, _df_output2 =\
        calculate_kpi(_df1, _df2, text = _t, per_user_kpi=True)
    
    df_output_week = pd.concat([df_output_week, _df_output1])
    df_output_daily = pd.concat([df_output_daily, _df_output2])

df_output_week
df_output_daily

Unnamed: 0,pv_cnt,pv_device_cnt,dw_device_cnt,download_cnt,페이지뷰 대비 전환율,사용자 대비 전환율,사용자 대비 전환율(유저 기준 중복 집계 제외)
10월_4주차,1975,1448,459,2302,1.16557,1.589779,0.316989
11월_1주차,2247,1449,442,2397,1.066756,1.654244,0.305038
11월_2주차,1438,849,114,977,0.679416,1.150766,0.134276
11월_3주차,1222,707,95,681,0.557283,0.963225,0.134371
11월_4주차,1105,624,84,642,0.580995,1.028846,0.134615
11월_5주차,617,331,69,573,0.928687,1.731118,0.208459


Unnamed: 0_level_0,pv_cnt,pv_device_cnt,download_cnt,dw_device_cnt,페이지뷰 대비 전환율,사용자 대비 전환율,사용자 대비 전환율(유저 기준 중복 집계 제외)
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-10-24,114,64,77,13,0.675439,1.203125,0.203125
2022-10-25,149,96,190,31,1.275168,1.979167,0.322917
2022-10-26,116,89,154,29,1.327586,1.730337,0.325843
2022-10-27,94,84,93,19,0.989362,1.107143,0.22619
2022-10-28,290,237,371,82,1.27931,1.565401,0.345992
2022-10-29,762,577,944,177,1.238845,1.636049,0.306759
2022-10-30,450,353,473,117,1.051111,1.339943,0.331445
2022-10-31,324,257,376,92,1.160494,1.463035,0.357977
2022-11-01,664,453,1390,192,2.093373,3.068433,0.423841
2022-11-02,274,207,194,66,0.708029,0.937198,0.318841


In [9]:
## 일평균 다운로드 완료 기기 수 추출
_, _df_event_download_before =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date = date_setting("2022-11-03"))

_, _df_event_download_after =\
    return_df(df_app_log_wallpaper, df_app_log_download, start_date = date_setting("2022-11-03"),\
        end_date = date_setting("2022-11-28"))

_df_event_download_before.groupby('day')[KEY_ID_DEVICE].nunique().mean()
_df_event_download_after.groupby('day')[KEY_ID_DEVICE].nunique().mean()

81.8

16.28

## 2)2차피드백
- 신규 사용자 비율

In [10]:
#########################################
# 2. 신규 사용자 비율
#########################################
# 0. segment setting
df_prop_after = SpherePrep.Prop.df_prop_pipe(
                df_app_log_after, KEY_ID = [KEY_ID_DEVICE, KEY_ID_USER], drop_none_prop=False)  

df_prop_before = SpherePrep.Prop.df_prop_pipe(
                df_app_log_before, KEY_ID = [KEY_ID_DEVICE, KEY_ID_USER], drop_none_prop=False)  

# 1. 전환 이전 데이터에서 로그인 하지 않은 기기 id 추출
## 1) 전환 이전 데이터에서 로그인 o/x에 따라 데이터 분리함
df_prop_notlogin_before = df_prop_before[
                            df_prop_before[KEY_ID_USER].apply(
                                lambda x: True if pd.isna(x)==True else False)]
df_prop_login_before = df_prop_before[
                            df_prop_before[KEY_ID_USER].apply(
                                lambda x: True if pd.isna(x)==False else False)]

## 2) 전환 이전 데이터에서 (로그인 하지 않은 기기 - 로그인한 기기)인 기기 리스트 구하기
### (1) 3일 이전에 로그인 x 데이터와 로그인 o 데이터의 교집합에 속하는 기기 추출
lst_target_device_remove = df_prop_login_before[KEY_ID_DEVICE][
                            df_prop_login_before[KEY_ID_DEVICE].isin(
                                df_prop_notlogin_before[KEY_ID_DEVICE])].unique() 

### (2) 전환 이전에 (로그인 x 기기 - (로그인 o & 로그인 x의 교집합))인 기기 추출 = 3일 이전에 로그인 한 번도 안 한 기기
lst_target = list(set(df_prop_notlogin_before[KEY_ID_DEVICE]) - set(lst_target_device_remove))           # KPI 분모에 들어갈 모수

##############################################################################################

# 2. 전환 이후 월페이퍼 접속한 기기 탐색
lst_device_af_conn_wall = df_app_log_wallpaper[KEY_ID_DEVICE][df_app_log_wallpaper['day'] > '20221102'].unique()

# 3. 전환 이전 한 번도 로그인하지 않은 기기 & 전환 이후 월페이퍼 접속한 기기 간의 교집합 탐색
lst_device_bf_loginx_af_conn_wall = list(
                                        set(lst_target) & set(lst_device_af_conn_wall)
                                    )
                                    
# 4. 위 교잡합에서 전환 이후 웚페이퍼 접속 시 로그인 상태였던 기기 탐색 후 비율 구하기
df_target_wallpaper = df_app_log_wallpaper[df_app_log_wallpaper[KEY_ID_DEVICE].isin(lst_device_bf_loginx_af_conn_wall)]

_x = df_target_wallpaper[KEY_ID_DEVICE][
        df_target_wallpaper[KEY_ID_USER].apply(
            lambda x: True if pd.isna(x)==False else False)].nunique()

_df1 = df_app_log_wallpaper[
        df_app_log_wallpaper['day'] < "20221103"]

_y = _df1[KEY_ID_DEVICE][
        _df1[KEY_ID_DEVICE].isin(lst_target)].nunique()

signup_conversion_rate = _x/_y

signup_conversion_rate*100
_x
_y

4.970087436723423

108

2173

In [11]:
len(set(lst_target) & set(list(df_app_log_after[~df_app_log_after[KEY_ID_USER].isna()][KEY_ID_DEVICE].unique())))

1979