**분석개요**

    A. Wallpaper 로그인 전환 이탈 리포트
        1. KPI
            - PV
            - 다운로드
            - 전환율

In [1]:
# basic
import gc
import os
import sys
import warnings
warnings.filterwarnings(action='ignore') 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

#--------------------#
# handling
#--------------------#
import math
import time
import random
# import openpyxl
import importlib
import xlsxwriter
import numpy as np
import pandas as pd
from collections import Counter
from datetime import datetime, timedelta
from scipy.stats import pearsonr
import difflib

#--------------------#
# Vis
#--------------------#
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
## Vis for jupyter theme
from IPython.display import Image
from jupyterthemes import jtplot
jtplot.style(theme= 'grade3', context='notebook', ticks=True, grid=False) ## dark backgroud jupyter notebook theme
# plt.style.use(['seaborn-white']) ## dark background style ## white style : 'seaborn-white'
plt.style.use(['dark_background']) ## dark background style ## white style : 'seaborn-white'
## Vis for korean 
import matplotlib.font_manager as fm
font_location = '/usr/share/fonts/truetype/nanum/NanumSquareRoundB.ttf'
fprop = fm.FontProperties(fname=font_location)
font_name = fprop.get_name()
matplotlib.rc('font', family=font_name)

In [2]:
#--------------------#
# sphere package
#--------------------#
sys.path.append("/home/das_share/sphere_class/")
import SpherePackage
from SpherePackage import *
for pkg in [SpherePackage] :
    _ = importlib.reload(pkg)

#--------------------#
# kto package
#--------------------#
sys.path.append("../src/")
import kto_config
import kto_util
import kto_prep
from kto_config import *
from kto_prep import *
from kto_util import *

for pkg in [kto_config, kto_prep, kto_util] :
    _ = importlib.reload(pkg)

#-------------------#
# pickle
#-------------------#
sys.path.append("/home/minkyung62/analysis_report/00_custom_analysis/kto_custom/notebook_git/return_pickle.py")
import return_pickle
from return_pickle import *
for pkg in [return_pickle] :
    _ = importlib.reload(pkg)

# 1.Data Import

In [11]:
#------------------------------------------------#
# 1. log Data import
#------------------------------------------------#
## target period
s_date, e_date = '20221020', '20221113' 
today = datetime.strptime(e_date, '%Y%m%d') # today = datetime(2021,5,6)
dates = (datetime.strptime(e_date, '%Y%m%d') - datetime.strptime(s_date, '%Y%m%d')).days + 1 
change_date = datetime(2022,11,3)    

df_app_log = ReadFile.read_pickle(e_date=e_date, s_date = s_date)
df_app_log_before = ReadFile.read_pickle(e_date='20221102', s_date='20221020')
df_app_log_after = ReadFile.read_pickle(e_date='20221113', s_date='20221103')

# 2.Preprocess

In [4]:
#------------------------------------------------#
# 1. prep
#------------------------------------------------#
## 1) log
df_app_log = SpherePrep.BasicPrep.basic_prep(df_app_log)                # log data

## 2) user prop
df_prop = SpherePrep.Prop.df_prop_pipe(df_app_log, KEY_ID = [KEY_ID_DEVICE, KEY_ID_USER], drop_none_prop=False)       # user data

## 3) param - 1
# _, df_param_all = DataImport.json_to_dataframe_nodeN(
#     df_app_log, [KEY_ID_DEVICE, KEY_ID_USER]
#     )

#------------------------------------------------#
# 2. param detail data
#------------------------------------------------#
## 1) wallpaper 관련 param data 생성
EVENT_MAIN_WALLPAPER = 'wallPaperMain'
EVENT_DOWNLOAD_WALLPAPER = 'wallPaperDownload'

df_app_log_wallpaper = df_app_log[df_app_log['abs_events'].apply(lambda x: True if EVENT_MAIN_WALLPAPER in x else False)]
df_app_log_download = df_app_log[df_app_log['abs_events'].apply(lambda x: True if EVENT_DOWNLOAD_WALLPAPER in x else False)]

## 2) datetime으로 type 변환
df_app_log_wallpaper['day'] = pd.to_datetime(df_app_log_wallpaper['day'], format = '%Y-%m-%d')
df_app_log_download['day'] = pd.to_datetime(df_app_log_download['day'], format = '%Y-%m-%d')

## 1)Utils

In [5]:
def date_setting(date):
    _date = datetime.strptime(date, "%Y-%m-%d")
    return _date

def return_df(_df_target1, _df_target2, end_date, start_date = None):
    ## 특정 주차 데이터 구하기
    if start_date != None:
        _df_target1 = _df_target1[_df_target1['day'] >= start_date]
        _df_target2 = _df_target2[_df_target2['day'] >= start_date]

    _df_output1 = _df_target1[_df_target1['day'] < end_date]
    _df_output2 = _df_target2[_df_target2['day'] < end_date]


    return _df_output1, _df_output2

def calculate_kpi(df_pv, df_download, text, per_user_kpi = False):
    dict_kpi= {}

    ## 1) 주차별 KPI 
    df_pv['pv_cnt'] = df_pv['abs_events'].apply(lambda x : Counter(x)[EVENT_MAIN_WALLPAPER] if EVENT_MAIN_WALLPAPER in x else 0)
    dict_kpi['pv_cnt'] = df_pv['pv_cnt'].sum()

    dict_kpi['pv_device_cnt'] = df_pv[KEY_ID_DEVICE].nunique()
    dict_kpi['dw_device_cnt'] = df_download[KEY_ID_DEVICE].nunique()

    df_download['download_cnt'] = df_download['abs_events'].apply(lambda x: Counter(x)[EVENT_DOWNLOAD_WALLPAPER] if EVENT_DOWNLOAD_WALLPAPER in x else 0)
    dict_kpi['download_cnt']  = df_download['download_cnt'].sum()

    if (dict_kpi['pv_cnt'] != 0 and dict_kpi['pv_device_cnt']!= 0):
        dict_kpi['페이지뷰 대비 전환율']  = dict_kpi['download_cnt'] / dict_kpi['pv_cnt']
        dict_kpi['사용자 대비 전환율']  = dict_kpi['download_cnt'] / dict_kpi['pv_device_cnt']
    
    else:
        dict_kpi['페이지뷰 대비 전환율']  = 0
        dict_kpi['사용자 대비 전환율']  = 0
    
    df_week_kpi = pd.DataFrame.from_dict([dict_kpi]).rename(index={0:text})

    ## 2) 일별 KPI
    _df_output_1_1 = df_pv.groupby('day')['pv_cnt'].sum().to_frame(name = 'pv_cnt')
    _df_output_1_2 = df_pv.groupby('day')[KEY_ID_DEVICE].nunique().to_frame(name = 'pv_device_cnt')
    _df_output_1_3 = df_download.groupby('day')['download_cnt'].sum().to_frame(name = 'download_cnt')
    df_daily_kpi = pd.concat([_df_output_1_1, _df_output_1_2, _df_output_1_3], axis = 1)
    df_daily_kpi['페이지뷰 대비 전환율'] = df_daily_kpi['download_cnt'] / df_daily_kpi['pv_cnt']
    df_daily_kpi['사용자 대비 전환율'] = df_daily_kpi['download_cnt'] / df_daily_kpi['pv_device_cnt']

    ## 3) 평균 KPI
    # if get_mean == True:
    #     _df_output_mean = df_pv.groupby('day')['pv_cnt'].mean().to_frame(name = 'mean_pv_cnt')
    #     _df_ouput_mean2 = df_pv.groupby('day')[KEY_ID_DEVICE].nunique().mean().to_frame(name = 'mean_pv_device_cnt')
    #     _df_ouput_mean3 = df_download.groupby('day')['download_cnt'].mean().to_frame(name = 'mean_download_cnt')
    #     df_mean_kpi = pd.concat([_df_output_1_1, _df_output_1_2, _df_output_1_3], axis = 1)
    #     df_mean_kpi['평균 페이지뷰 대비 전환율'] = df_mean_kpi['mean_download_cnt'] / df_mean_kpi['mean_pv_cnt']
    #     df_mean_kpi['평균 사용자 대비 전환율'] = df_mean_kpi['mean_download_cnt'] / df_mean_kpi['mean_pv_device_cnt']

    if per_user_kpi == True:
        ## 한 유저당 중복 집계된 download 수를 제외한 전환율 도출 !!!!!!!!!!!!! 평균 전환율!!!!!!!!
        ### 3) 주차별 전환율
        _download_device_cnt = df_download[KEY_ID_DEVICE].nunique()
        
        if dict_kpi['pv_device_cnt']!= 0:
            df_week_kpi['사용자 대비 전환율(유저 기준 중복 집계 제외)'] =\
                _download_device_cnt / dict_kpi['pv_device_cnt']
        else:
            df_week_kpi['사용자 대비 전환율(유저 기준 중복 집계 제외)'] = 0

        ### 4) 일별 전환율
        _download_device_cnt = df_download.groupby('day')[KEY_ID_DEVICE].nunique().to_frame()[KEY_ID_DEVICE]

        df_daily_kpi['사용자 대비 전환율(유저 기준 중복 집계 제외)'] =\
            _download_device_cnt / df_daily_kpi['pv_device_cnt']


    return df_week_kpi, df_daily_kpi

# 3.Analysis

## 1)KPI
- PV
- 다운로드
- 전환율
    - 페이지뷰 대비
    - 사용자 대비

In [92]:
#########################################
# 0. 전체 KPI
## ** 서비스 전환 전후 평균 KPI 도출
#########################################
## 1) 전환 전
_date = date_setting("2022-11-03")

_df_event_wallpaper_0, _df_event_download_0 =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date=_date)

_df_event_wallpaper_0_1, _df_event_download_0_1 =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date= date_setting("2022-11-13"), start_date=_date)

_df_output_0_1, _ =\
    calculate_kpi(_df_event_wallpaper_0, _df_event_download_0, text = '전환 전', per_user_kpi=True)

_df_output_0_2, _ =\
    calculate_kpi(_df_event_wallpaper_0_1, _df_event_download_0_1, text = '전환 후', per_user_kpi=True)

_df_output_mean = pd.concat([_df_output_0_1, _df_output_0_2], axis=0)

_df_output_mean

Unnamed: 0,pv_cnt,pv_device_cnt,dw_device_cnt,download_cnt,페이지뷰 대비 전환율,사용자 대비 전환율,사용자 대비 전환율(유저 기준 중복 집계 제외)
전환 전,2851,2019,754,3617,1.268678,1.791481,0.373452
전환 후,1815,1086,179,872,0.480441,0.802947,0.164825


In [94]:
_df_output_1_2

Unnamed: 0_level_0,pv_cnt,pv_device_cnt,download_cnt,페이지뷰 대비 전환율,사용자 대비 전환율,사용자 대비 전환율(유저 기준 중복 집계 제외)
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-10-24,76,48,32,0.421053,0.666667,0.25
2022-10-25,95,63,78,0.821053,1.238095,0.380952
2022-10-26,88,67,97,1.102273,1.447761,0.343284
2022-10-27,72,62,72,1.0,1.16129,0.258065
2022-10-28,255,212,262,1.027451,1.235849,0.34434
2022-10-29,731,550,904,1.236662,1.643636,0.318182
2022-10-30,435,339,460,1.057471,1.356932,0.342183


In [93]:
#########################################
# 1. 주차별/일별 KPI
## ** 한 주 시작일 = 월요일 -> 태깅이 월요일에 반영되어 일요일 데이터가 없어서 월요일로 시작점을 설정함
#########################################
## date setting
_date_1 = date_setting("2022-10-31")
_date_2 = date_setting("2022-11-07")
_date_3 = date_setting("2022-11-14")

## data setting
_df_event_wallpaper_1, _df_event_download_1 =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date=_date_1)

_df_event_wallpaper_2, _df_event_download_2 =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date=_date_2, start_date=_date_1)

_df_event_wallpaper_3,_df_event_download_3 =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date=_date_3, start_date=_date_2)

## calculate KPI
_df_output_1_1, _df_output_1_2 =\
    calculate_kpi(_df_event_wallpaper_1, _df_event_download_1, text = '10월_4주차', per_user_kpi=True)

_df_output_2_1, _df_output_2_2 =\
    calculate_kpi(_df_event_wallpaper_2, _df_event_download_2, text = '11월_1주차', per_user_kpi=True)

_df_output_3_1, _df_output_3_2 =\
    calculate_kpi(_df_event_wallpaper_3, _df_event_download_3, text = '11월_2주차', per_user_kpi=True)

## output
### prep
df_ouput_week = pd.concat([_df_output_1_1, _df_output_2_1, _df_output_3_1])
df_output_daily = pd.concat([_df_output_1_2, _df_output_2_2, _df_output_3_2])

df_ouput_week
df_output_daily

Unnamed: 0,pv_cnt,pv_device_cnt,dw_device_cnt,download_cnt,페이지뷰 대비 전환율,사용자 대비 전환율,사용자 대비 전환율(유저 기준 중복 집계 제외)
10월_4주차,1752,1300,432,1905,1.087329,1.465385,0.332308
11월_1주차,1929,1249,417,2101,1.089165,1.682146,0.333867
11월_2주차,1139,677,102,525,0.460931,0.77548,0.150665


Unnamed: 0_level_0,pv_cnt,pv_device_cnt,download_cnt,페이지뷰 대비 전환율,사용자 대비 전환율,사용자 대비 전환율(유저 기준 중복 집계 제외)
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-10-24,76,48,32,0.421053,0.666667,0.25
2022-10-25,95,63,78,0.821053,1.238095,0.380952
2022-10-26,88,67,97,1.102273,1.447761,0.343284
2022-10-27,72,62,72,1.0,1.16129,0.258065
2022-10-28,255,212,262,1.027451,1.235849,0.34434
2022-10-29,731,550,904,1.236662,1.643636,0.318182
2022-10-30,435,339,460,1.057471,1.356932,0.342183
2022-10-31,286,224,333,1.164336,1.486607,0.392857
2022-11-01,590,403,1215,2.059322,3.014888,0.459057
2022-11-02,223,171,164,0.735426,0.959064,0.345029


In [58]:
## 일평균 다운로드 완료 기기 수 추출
_, _df_event_download_before =\
    return_df(df_app_log_wallpaper, df_app_log_download, end_date = date_setting("2022-11-03"))

_, _df_event_download_after =\
    return_df(df_app_log_wallpaper, df_app_log_download, start_date = date_setting("2022-11-03"),\
        end_date = date_setting("2022-11-14"))

_df_event_download_before.groupby('day')[KEY_ID_DEVICE].nunique().mean()
_df_event_download_after.groupby('day')[KEY_ID_DEVICE].nunique().mean()

81.8

19.636363636363637

## 2)추가사항
- 신규 사용자 비율


**11/24일 TODO: 아래 데이터 기존에 반환했던 데이터랑 수치 일치 여부 확인!!!!!!!!**

In [91]:
#########################################
# 2. 신규 사용자 비율
#########################################
## data setting
# df_app_log_wallpaper_before = df_app_log_before[df_app_log_before['abs_events'].apply(lambda x: True if EVENT_MAIN_WALLPAPER in x else False)]
# df_app_log_wallpaper_after = df_app_log_after[df_app_log_after['abs_events'].apply(lambda x: True if EVENT_MAIN_WALLPAPER in x else False)]

df_prop_after = SpherePrep.Prop.df_prop_pipe(df_app_log_after, KEY_ID = [KEY_ID_DEVICE, KEY_ID_USER], drop_none_prop=False)  
df_prop_before = SpherePrep.Prop.df_prop_pipe(df_app_log_before, KEY_ID = [KEY_ID_DEVICE, KEY_ID_USER], drop_none_prop=False)  

## 11/3일 이전 데이터에서 로그인 여부를 담은 테이블 생성
df_signup_boolen = pd.DataFrame()
df_signup_boolen_after = pd.DataFrame()

df_signup_boolen[KEY_ID_DEVICE] = df_prop_before[KEY_ID_DEVICE]
df_signup_boolen['signup_boolen'] = df_prop_before[KEY_ID_USER].apply(lambda x: True if pd.isna(x)==True else False)
lst_not_login_before =\
    list(
        df_signup_boolen[KEY_ID_DEVICE][
            df_signup_boolen['signup_boolen'].apply(
                lambda x: True if x == False else False)])              # 11/3일 이전에 한 번도 로그인 하지 않은 기기 id


## 11/3일 이후 데이터에서 로그인 여부 담은 테이블
df_signup_boolen_after[KEY_ID_DEVICE] = df_prop_after[KEY_ID_DEVICE]
df_signup_boolen_after['signup_boolen'] = df_prop_after[KEY_ID_USER].apply(lambda x: True if pd.isna(x)==True else False)
lst_login_after =\
    list(
        df_signup_boolen_after[KEY_ID_DEVICE][
            df_signup_boolen_after['signup_boolen'].apply(
                lambda x: True if x == True else False)])              # 11/3일 이전에 한 번이라도도 로그인한 기기 id


## 11/3일 이후 데이터에서 단 한 번이라도 접속한 기기 id 
lst_connect_device_after = list(df_app_log_after[KEY_ID_DEVICE][df_app_log_after[KEY_ID_DEVICE].isin(lst_not_login_before)])

## 11/3일 이전에 접속 o & 로그인 x한 기기이면서 11/3일 이후에 회원가입한 기기 id
lst_target = list(
    set(lst_connect_device_after) & set(lst_login_after)
)

## 위 기기 id 중에서 wallpaper에 도달한 기기 id
_df_target = df_app_log[df_app_log[KEY_ID_DEVICE].isin(lst_target)]
_df_target_wallpaper = _df_target[_df_target['abs_events'].apply(lambda x: True if EVENT_MAIN_WALLPAPER in x else False)]

## 11/3일 이전에는 로그인하지 않았지만 이후에 회원가입하여 로그인한 월페이퍼 유저의 일별 수
print('total_after_signup_cnt: ', _df_target_wallpaper[KEY_ID_DEVICE].nunique())
_df_output_daily = _df_target_wallpaper.groupby('day')[KEY_ID_DEVICE].nunique().to_frame(name = 'after_signup_cnt')
_df_output_daily['total_conn_cnt'] = df_app_log_wallpaper.groupby('day')[KEY_ID_DEVICE].nunique().to_frame(name = 'cnt')        # 월페이퍼 전체 일별 접속 유저 수
_df_output_daily

total_after_signup_cnt:  51


Unnamed: 0_level_0,after_signup_cnt,total_conn_cnt
day,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-10-24,4,48
2022-10-25,1,63
2022-10-26,2,67
2022-10-27,3,62
2022-10-28,2,212
2022-10-31,4,224
2022-11-01,4,403
2022-11-02,11,171
2022-11-03,2,170
2022-11-04,4,148


In [None]:
df_pv.groupby('day')[KEY_ID_DEVICE].nunique().to_frame(name = 'pv_device_cnt')