In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("C:/Users/user/Downloads/user_data.csv") # 내 컴퓨터 = 로컬 환경 -> 파일 다운로드 필요 -> 다운로드한 파일 경로 복붙
pd.set_option('display.max_columns', None) # want to see all columns

df

Unnamed: 0,user_id,stage,conversion
0,user_0,homepage,True
1,user_1,homepage,True
2,user_2,homepage,True
3,user_3,homepage,True
4,user_4,homepage,True
...,...,...,...
17170,user_17170,purchase,False
17171,user_17171,purchase,False
17172,user_17172,purchase,True
17173,user_17173,purchase,False


In [None]:
stages = df['stage'].unique() # df(dataframe)에서 'stage'라는 열 선택 -> 선택한 열에서 중복된 값을 제거하고 고유값만 반환
conversion_rates = {} # 딕셔너리 초기화
retention_rates = {} # 딕셔너리 초기화

for stage in stages : # for문으로 계산해보기
    stage_data = df[df['stage'] == stage] # 해당 단계에 해당하는 데이터만 필터링하고
    conversion_rate = stage_data['conversion'].mean() # 전환율을 계산할 때 'conversion' 열의 평균을 계산하여 True의 비율을 구해야 함
    retention_rate = len(stage_data) / len(df) # 유지율을 계산합하고, 전체 사용자수로 나눠야한다
    conversion_rates[stage] = conversion_rate
    retention_rates[stage] = retention_rate
    
conversion_rates_df = pd.DataFrame(list(conversion_rates.items()), columns = ['Stage', 'Conversion Rate']) # conversion_rates 딕셔너리의 key값과 value값을 list에 삽입 -> pd(pandas)를 사용하여 데이터프레임 형태로 변환 -> 열(columns) 이름을 'Stage', 'Conversation Rate'로 설정
retention_rates_df = pd.DataFrame(list(retention_rates.items()), columns = ['Stages', 'Retention Rate'])

In [7]:
conversion_rates_df

Unnamed: 0,Stage,Conversion Rate
0,homepage,1.0
1,product_page,0.503
2,cart,0.299333
3,checkout,0.08
4,purchase,0.062222


In [8]:
retention_rates_df

Unnamed: 0,Stages,Retention Rate
0,homepage,0.582242
1,product_page,0.291121
2,cart,0.087336
3,checkout,0.026201
4,purchase,0.0131


In [9]:
import plotly.graph_objects as go

In [None]:
# 이해x
# 'funnel_data' 데이터 프레임 생성
funnel_data = pd.DataFrame({
    'Stage' : stages,
    'Users' : [len(df[df['stage'] == stage]) for stage in stages],
    'Conversion Rate' : [conversion_rates[stage] for stage in stages],
    'Retention Rate' : [retention_rates[stage] for stage in stages]
})
funnel_data

Unnamed: 0,Stage,Users,Conversion Rate,Retention Rate
0,homepage,10000,1.0,0.582242
1,product_page,5000,0.503,0.291121
2,cart,1500,0.299333,0.087336
3,checkout,450,0.08,0.026201
4,purchase,225,0.062222,0.0131


In [None]:
# 이해x
# plotly 활용해서 funnel 시각화
fig = go.Figure(go.Funnel(
    y = funnel_data['Stage'],
    x = funnel_data['Users'],
    textinfo = "value + percent initial",
    hoverinfo = "x + percent initial + percent total",
    marker = dict(color = ["#FF5733", "#FFC300", "#C70039", "#900C3F"]),
    text = funnel_data.apply(lambda row : f"Users : {row['Users'] :,}<br>CR: {row['Conversion Rate']:.2%}<br>RR : {row['Retention Rate']:.2%}", axis = 1)
))

fig.update_layout(
    title = 'User Funnel with Conversion and Retention Rates',
    xaxis_title = 'Number of Users',
    yaxis_title = 'Stage',
    yaxis = dict(categoryorder = 'total descending')
)

fig.show()