# 퍼널 분석(Funnel Analysis)
서비스를 개선하려할 때 ‘뭐부터 개선해야 하지?’ 의문점이 들 때 사용하면 좋다. 일반적으로 사용자가 서비스를 이용할 때 서비스 진입 후 최종 핵심 기능을 사용하기까지 각 단계를 거치면서 점점 이탈하게 되는데 이 단계를 구분해 서서히 좁아지는 형태구조를 퍼널(funnel)이라 칭한다. 퍼널 분석을 이용하면 각 단계에 대한 고객 분석이 가능하다. 단계별로 고객의 이탈률을 확인하여 이에 대한 조치를 취할 수 있다. 

## Library Import & Install

In [3]:
!pip install chart-studio

Collecting chart-studio
  Downloading chart_studio-1.1.0-py3-none-any.whl (64 kB)
     ---------------------------------------- 64.4/64.4 kB 1.7 MB/s eta 0:00:00
Collecting retrying>=1.3.3
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Installing collected packages: retrying, chart-studio
Successfully installed chart-studio-1.1.0 retrying-1.3.4



[notice] A new release of pip available: 22.2.2 -> 22.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import numpy as np
import pandas as pd
import os
import plotly
import plotly.express as px
import chart_studio.plotly as py
from plotly import graph_objs as go
from __future__ import division
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
%matplotlib inline

## Data Load

In [2]:
dfHome = pd.read_csv('C:/Users/USER/Desktop/Data/home_page_table.csv')
dfSearch = pd.read_csv('C:/Users/USER/Desktop/Data/search_page_table.csv')
dfPaymentC1 = pd.read_csv('C:/Users/USER/Desktop/Data/payment_confirmation_table.csv')
dfPaymentP2 = pd.read_csv('C:/Users/USER/Desktop/Data/payment_page_table.csv')
UserTable = pd.read_csv('C:/Users/USER/Desktop/Data/user_table.csv')

In [3]:
print(dfHome.shape)
print(dfSearch.shape)
print(dfPaymentC1.shape)
print(dfPaymentP2.shape)
print(UserTable.shape)

(90400, 2)
(45200, 2)
(452, 2)
(6030, 2)
(90400, 4)


In [6]:
dfHome.head()

Unnamed: 0,user_id,page
0,313593,home_page
1,468315,home_page
2,264005,home_page
3,290784,home_page
4,639104,home_page


In [7]:
dfSearch.head()

Unnamed: 0,user_id,page
0,15866,search_page
1,347058,search_page
2,577020,search_page
3,780347,search_page
4,383739,search_page


In [8]:
dfPaymentC1.head()

Unnamed: 0,user_id,page
0,123100,payment_confirmation_page
1,704999,payment_confirmation_page
2,407188,payment_confirmation_page
3,538348,payment_confirmation_page
4,841681,payment_confirmation_page


In [9]:
dfPaymentP2.head()

Unnamed: 0,user_id,page
0,253019,payment_page
1,310478,payment_page
2,304081,payment_page
3,901286,payment_page
4,195052,payment_page


In [10]:
UserTable.head()

Unnamed: 0,user_id,date,device,sex
0,450007,2015-02-28,Desktop,Female
1,756838,2015-01-13,Desktop,Male
2,568983,2015-04-09,Desktop,Male
3,190794,2015-02-18,Desktop,Female
4,537909,2015-01-15,Desktop,Male


## Data Peparation

In [4]:
data_table = [
    ['Phases', 'Values'],
    ['dfHome', dfHome['user_id'].count()],
    ['dfSearch', dfSearch['user_id'].count()],
    ['dfPaymentP2', dfPaymentP2['user_id'].count()],
    ['dfPaymentC1', dfPaymentC1['user_id'].count()],
    ['UserTable', UserTable['user_id'].count()]
]

data_table

[['Phases', 'Values'],
 ['dfHome', 90400],
 ['dfSearch', 45200],
 ['dfPaymentP2', 6030],
 ['dfPaymentC1', 452],
 ['UserTable', 90400]]

In [5]:
table = ff.create_table(data_table)
iplot(table)

## Basic conversion funnel

In [6]:
data = dict(
    values = [i[1] for i in data_table[1:]][:-1],
    phases = ['Home', 'Search', 'Payment', 'Confirm']
)

fig = px.funnel(data, x='values', y='phases')
fig.show()

## 분할된 Funnel 대한 데이터 준비 및 병합

In [14]:
UserTable.head()

Unnamed: 0,user_id,date,device,sex
0,450007,2015-02-28,Desktop,Female
1,756838,2015-01-13,Desktop,Male
2,568983,2015-04-09,Desktop,Male
3,190794,2015-02-18,Desktop,Female
4,537909,2015-01-15,Desktop,Male


In [7]:
# 컬럼명 변경
dfHome = dfHome.rename(columns={'page' : 'Step One'})
dfSearch = dfSearch.rename(columns={'page' : 'Step Two'})
dfPaymentP2 = dfPaymentP2.rename(columns={'page' : 'Step Tree'})
dfPaymentC1 = dfPaymentC1.rename(columns={'page' : 'Step Four'})

### By gender

In [8]:
# 데이터 병합
dfT = UserTable.merge(dfHome, how='outer', on='user_id')\
    .merge(dfSearch, how='outer', on='user_id')\
    .merge(dfPaymentP2, how='outer', on='user_id')\
    .merge(dfPaymentC1, how='outer', on='user_id')

dfT.head()

Unnamed: 0,user_id,date,device,sex,Step One,Step Two,Step Tree,Step Four
0,450007,2015-02-28,Desktop,Female,home_page,,,
1,756838,2015-01-13,Desktop,Male,home_page,,,
2,568983,2015-04-09,Desktop,Male,home_page,search_page,,
3,190794,2015-02-18,Desktop,Female,home_page,search_page,,
4,537909,2015-01-15,Desktop,Male,home_page,,,


In [9]:
# Step One
Step_One_Male = (dfT['sex'] == 'Male') & (dfT['Step One'] == 'home_page')
Step_One_Female = (dfT['sex'] == 'Female') & (dfT['Step One'] == 'home_page')

# Step Two
Step_Two_Male = (dfT['sex'] == 'Male') & (dfT['Step Two'] == 'search_page')
Step_Two_Female = (dfT['sex'] == 'Female') & (dfT['Step Two'] == 'search_page')

# Step Tree
Step_Tree_Male = (dfT['sex'] == 'Male') & (dfT['Step Tree'] == 'payment_page')
Step_Tree_Female = (dfT['sex'] == 'Female') & (dfT['Step Tree'] == 'payment_page')

# Step Four
Step_Four_Male = (dfT['sex'] == 'Male') & (dfT['Step Four'] == 'payment_confirmation_page')
Step_Four_Female = (dfT['sex'] == 'Female') & (dfT['Step Four'] == 'payment_confirmation_page')

In [10]:
data_table2 = [
    ['Phases', 'Man', 'Woman'],
    ['Home', Step_One_Male.sum(), Step_One_Female.sum()],
    ['Search', Step_Two_Male.sum(), Step_Two_Female.sum()],
    ['Payment', Step_Tree_Male.sum(), Step_Tree_Female.sum()],
    ['Confirmation', Step_Four_Male.sum(), Step_Four_Female.sum()]
]

data_table2

[['Phases', 'Man', 'Woman'],
 ['Home', 45325, 45075],
 ['Search', 22524, 22676],
 ['Payment', 2930, 3100],
 ['Confirmation', 211, 241]]

In [11]:
table = ff.create_table(data_table2)
iplot(table)

In [12]:
df = pd.DataFrame(np.array([['Home', 45325, 45075], ['Search', 22524, 22676],
                            ['Payment', 2930, 3100], ['Confirmation', 211, 241]]),
                            columns=['', 'Man', 'Woman'])

df = df.set_index('')
df['Man'] = df['Man'].astype('int')
df['Woman'] = df['Woman'].astype('int')

In [45]:
df

Unnamed: 0,Man,Woman
,,
Home,45325.0,45075.0
Search,22524.0,22676.0
Payment,2930.0,3100.0
Confirmation,211.0,241.0


In [13]:
df.dtypes

Man      int32
Woman    int32
dtype: object

In [14]:
stages = ['Home', 'Search', 'Payment', 'Confirmation']

df_man = pd.DataFrame(dict(values=df['Man'].values, stage=stages))
df_man['Sex'] = 'Man'

df_woman = pd.DataFrame(dict(values=df['Woman'].values, stage=stages))
df_woman['Sex'] = 'Woman'

data = pd.concat([df_man, df_woman], axis=0)
data

Unnamed: 0,values,stage,Sex
0,45325,Home,Man
1,22524,Search,Man
2,2930,Payment,Man
3,211,Confirmation,Man
0,45075,Home,Woman
1,22676,Search,Woman
2,3100,Payment,Woman
3,241,Confirmation,Woman


In [15]:
fig = px.funnel(data, x='values', y='stage', color='Sex')
fig.show()

### By gender and device

In [18]:
print(dfT['sex'].unique())
print(dfT['device'].unique())

['Female' 'Male']
['Desktop' 'Mobile']


In [22]:
# Step One
Step_One_Desktop_Male = (dfT['sex'] == 'Male') & (dfT['device'] == 'Desktop') & (dfT['Step One'] == 'home_page')
Step_One_Mobile_Male = (dfT['sex'] == 'Male') & (dfT['device'] == 'Mobile') & (dfT['Step One'] == 'home_page')
Step_One_Desktop_Female = (dfT['sex'] == 'Female') & (dfT['device'] == 'Desktop') & (dfT['Step One'] == 'home_page')
Step_One_Mobile_Female = (dfT['sex'] == 'Female') & (dfT['device'] == 'Mobile') & (dfT['Step One'] == 'home_page')

# Step Two
Step_Two_Desktop_Male = (dfT['sex'] == 'Male') & (dfT['device'] == 'Desktop') & (dfT['Step Two'] == 'search_page')
Step_Two_Mobile_Male = (dfT['sex'] == 'Male') & (dfT['device'] == 'Mobile') & (dfT['Step Two'] == 'search_page')
Step_Two_Desktop_Female = (dfT['sex'] == 'Female') & (dfT['device'] == 'Desktop') & (dfT['Step Two'] == 'search_page')
Step_Two_Mobile_Female = (dfT['sex'] == 'Female') & (dfT['device'] == 'Mobile') & (dfT['Step Two'] == 'search_page')

# Step Tree
Step_Three_Desktop_Male = (dfT['sex'] == 'Male') & (dfT['device'] == 'Desktop') & (dfT['Step Tree'] == 'payment_page')
Step_Three_Mobile_Male = (dfT['sex'] == 'Male') & (dfT['device'] == 'Mobile') & (dfT['Step Tree'] == 'payment_page')
Step_Three_Desktop_Female = (dfT['sex'] == 'Female') & (dfT['device'] == 'Desktop') & (dfT['Step Tree'] == 'payment_page')
Step_Three_Mobile_Female = (dfT['sex'] == 'Female') & (dfT['device'] == 'Mobile') & (dfT['Step Tree'] == 'payment_page')

# Step Four
Step_Four_Desktop_Male = (dfT['sex'] == 'Male') & (dfT['device'] == 'Desktop') & (dfT['Step Four'] == 'payment_confirmation_page')
Step_Four_Mobile_Male = (dfT['sex'] == 'Male') & (dfT['device'] == 'Mobile') & (dfT['Step Four'] == 'payment_confirmation_page')
Step_Four_Desktop_Female = (dfT['sex'] == 'Female') & (dfT['device'] == 'Desktop') & (dfT['Step Four'] == 'payment_confirmation_page')
Step_Four_Mobile_Female = (dfT['sex'] == 'Female') & (dfT['device'] == 'Mobile') & (dfT['Step Four'] == 'payment_confirmation_page')

In [23]:
data_tableTop = [
    ['Phases', 'Desktop Male', 'Mobile Male', 'Desktop Female', 'Mobile Female'],
    ['Home', Step_One_Desktop_Male.sum(), Step_One_Mobile_Male.sum(), Step_One_Desktop_Female.sum(), Step_One_Mobile_Female.sum()],
    ['Search', Step_Two_Desktop_Male.sum(), Step_Two_Mobile_Male.sum(), Step_Two_Desktop_Female.sum(), Step_Two_Mobile_Female.sum()],
    ['Payment', Step_Three_Desktop_Male.sum(), Step_Three_Mobile_Male.sum(), Step_Three_Desktop_Female.sum(), Step_Three_Mobile_Female.sum()],
    ['Confirmation', Step_Four_Desktop_Male.sum(), Step_Four_Mobile_Male.sum(), Step_Four_Desktop_Female.sum(), Step_Four_Mobile_Female.sum()]
]

data_tableTop

[['Phases', 'Desktop Male', 'Mobile Male', 'Desktop Female', 'Mobile Female'],
 ['Home', 30203, 15122, 29997, 15078],
 ['Search', 15009, 7515, 15091, 7585],
 ['Payment', 1480, 1450, 1530, 1570],
 ['Confirmation', 76, 135, 74, 167]]

In [25]:
table = ff.create_table(data_tableTop)
iplot(table)

In [None]:
dfFull = pd.DataFrame(np.array([['Home', 30203, 15122, 29997, 15078], ['Search', 15009, 7515, 15091, 7585], ['Payment', 1480, 1450, 1530, 1570], ['Confirmation', 76, 135, 74, 167]]),
                            columns=['','Desktop Male', 'Mobile Male', 'Desktop Female', 'Mobile Female'])

dfFull = dfFull.set_index('')

In [31]:
dfFull = pd.DataFrame(np.array([['Home', 30203, 15122, 29997, 15078], ['Search', 15009, 7515, 15091, 7585],
                            ['Payment', 1480, 1450, 1560, 1570], ['Comfirmation', 76, 135, 74, 167]]),
                            columns=['', 'Desktop Male', 'Mobile Male', 'Desktop Female', 'Mobile Female'])
dfFull = dfFull.set_index('')

In [32]:
dfFull

Unnamed: 0,Desktop Male,Mobile Male,Desktop Female,Mobile Female
,,,,
Home,30203.0,15122.0,29997.0,15078.0
Search,15009.0,7515.0,15091.0,7585.0
Payment,1480.0,1450.0,1560.0,1570.0
Comfirmation,76.0,135.0,74.0,167.0


In [34]:
from plotly import graph_objects as go

In [54]:
dfFull

Unnamed: 0,Desktop Male,Mobile Male,Desktop Female,Mobile Female
,,,,
Home,30203.0,15122.0,29997.0,15078.0
Search,15009.0,7515.0,15091.0,7585.0
Payment,1480.0,1450.0,1560.0,1570.0
Comfirmation,76.0,135.0,74.0,167.0


In [67]:
fig = go.Figure()

fig.add_trace(go.Funnel(
    name = 'Desktop Male',
    y = ['Home', 'Search', 'Payment', 'Confirmation'],
    x = [30203, 15009, 1480, 76],
    textinfo = 'value+percent initial'
))

fig.add_trace(go.Funnel(
    name = 'Mobile Male',
    orientation = 'h',
    y = ['Home', 'Search', 'Payment', 'Confirmation'],
    x = [15122, 7515, 1450, 135],
    textposition = 'inside',
    textinfo = 'value+percent previous'
))

fig.add_trace(go.Funnel(
    name = 'Desktop Female',
    orientation = 'h',
    y = ['Home', 'Search', 'Payment', 'Confirmation'],
    x = [29997, 15091, 1560, 74],
    textposition = 'inside',
    textinfo = 'value+percent previous'
))

fig.add_trace(go.Funnel(
    name = 'Mobile Female',
    orientation = 'h',
    y = ['Home', 'Search', 'Payment', 'Confirmation'],
    x = [15078, 7585, 1570, 167],
    textposition = 'inside',
    textinfo = 'value+percent total'
))

fig.show()

In [68]:
labels = ['Desktop Male', 'Mobile Male', 'Desktop Female', 'Mobile Female']
values = [76, 135, 74, 167]
colors = ['rgb(63,92,128)', 'rgb(90,131,182)', 'rgb(255,255,255)']

trace = go.Pie(labels=labels, values=values,
                textfont=dict(size=25),
                marker=dict(colors=colors, line=dict(color='#000000', width=1)))

iplot([trace], filename='styled_pie_chart')