In [1]:
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import plotly.express as px
from matplotlib import rc
rc('font', family='AppleGothic')
plt.rcParams['axes.unicode_minus'] = False

pd.options.display.max_columns = None  # 길어서 생략된 열 다 보이게 하기

In [2]:
inventory = pd.read_parquet("/Users/hj/Documents/Four-A/2nd dataset/khj/data/inventory_optimized.parquet")
inventory

Unnamed: 0,GoodsCode,Category,OriginalPrice,Inventory
0,1000,Books,12580,4452
1,1001,Health & Beauty,21530,2308
2,1002,Home & Garden,22800,5177
3,1003,Health & Beauty,22910,9705
4,1004,Health & Beauty,12780,7852
...,...,...,...,...
495,1495,Electronics,137560,4714
496,1496,Home & Garden,10520,1529
497,1497,Electronics,40260,3958
498,1498,Electronics,54950,8904


In [3]:
log = pd.read_parquet("/Users/hj/Documents/Four-A/2nd dataset/khj/data/real_log_for_spartan_optimized.parquet")

log

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp
0,74377,view,1484,108330,,,2022-02-17 19:07:19.511967
1,74377,purchase,1484,108330,,,2022-03-20 04:26:25.511967
2,74377,view,1063,14370,,,2022-03-02 04:43:47.511967
3,74377,purchase,1063,14370,,,2022-03-22 00:42:08.511967
4,74377,ad_click,1203,85290,ad_google,175.38,2022-04-16 22:19:28.511967
...,...,...,...,...,...,...,...
3096926,454879,view,1250,100420,,,2022-05-27 08:18:40.554370
3096927,454879,view,1250,100420,,,2022-04-25 00:41:09.554370
3096928,454879,share,1250,100420,,,2022-06-14 03:44:21.554370
3096929,454879,basket,1250,100420,,,2022-05-13 11:09:41.554370


In [4]:
df = pd.merge(log, inventory, on='GoodsCode', how='inner')
df

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,Category,OriginalPrice,Inventory
0,74377,view,1484,108330,,,2022-02-17 19:07:19.511967,Electronics,108330,9595
1,74377,purchase,1484,108330,,,2022-03-20 04:26:25.511967,Electronics,108330,9595
2,74377,view,1063,14370,,,2022-03-02 04:43:47.511967,Clothing,14370,2386
3,74377,purchase,1063,14370,,,2022-03-22 00:42:08.511967,Clothing,14370,2386
4,74377,ad_click,1203,85290,ad_google,175.38,2022-04-16 22:19:28.511967,Electronics,85290,9173
...,...,...,...,...,...,...,...,...,...,...
3096926,454879,view,1250,100420,,,2022-05-27 08:18:40.554370,Electronics,100420,8291
3096927,454879,view,1250,100420,,,2022-04-25 00:41:09.554370,Electronics,100420,8291
3096928,454879,share,1250,100420,,,2022-06-14 03:44:21.554370,Electronics,100420,8291
3096929,454879,basket,1250,100420,,,2022-05-13 11:09:41.554370,Electronics,100420,8291


In [5]:
df['ActionType'].replace('share', 'view', inplace=True)
df

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['ActionType'].replace('share', 'view', inplace=True)
  df['ActionType'].replace('share', 'view', inplace=True)


Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,Category,OriginalPrice,Inventory
0,74377,view,1484,108330,,,2022-02-17 19:07:19.511967,Electronics,108330,9595
1,74377,purchase,1484,108330,,,2022-03-20 04:26:25.511967,Electronics,108330,9595
2,74377,view,1063,14370,,,2022-03-02 04:43:47.511967,Clothing,14370,2386
3,74377,purchase,1063,14370,,,2022-03-22 00:42:08.511967,Clothing,14370,2386
4,74377,ad_click,1203,85290,ad_google,175.38,2022-04-16 22:19:28.511967,Electronics,85290,9173
...,...,...,...,...,...,...,...,...,...,...
3096926,454879,view,1250,100420,,,2022-05-27 08:18:40.554370,Electronics,100420,8291
3096927,454879,view,1250,100420,,,2022-04-25 00:41:09.554370,Electronics,100420,8291
3096928,454879,view,1250,100420,,,2022-06-14 03:44:21.554370,Electronics,100420,8291
3096929,454879,basket,1250,100420,,,2022-05-13 11:09:41.554370,Electronics,100420,8291


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3096931 entries, 0 to 3096930
Data columns (total 10 columns):
 #   Column         Dtype         
---  ------         -----         
 0   CustomerID     int32         
 1   ActionType     category      
 2   GoodsCode      int16         
 3   Price          int32         
 4   AdID           category      
 5   CPC            float64       
 6   Timestamp      datetime64[ns]
 7   Category       category      
 8   OriginalPrice  int32         
 9   Inventory      int32         
dtypes: category(3), datetime64[ns](1), float64(1), int16(1), int32(4)
memory usage: 109.3 MB


In [7]:
df.describe(include='all')

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,Category,OriginalPrice,Inventory
count,3096931.0,3096931,3096931.0,3096931.0,172028,172028.0,3096931,3096931,3096931.0,3096931.0
unique,,4,,,3,,,5,,
top,,view,,,ad_google,,,Books,,
freq,,2076942,,,58025,,,693899,,
mean,424046.3,,1249.51,32196.87,,193.679474,2022-04-20 17:14:39.187857664,,32196.87,5471.431
min,4.0,,1000.0,5070.0,,10.52,2022-02-01 18:46:02.707955,,5070.0,1001.0
25%,199328.0,,1124.0,13020.0,,63.8,2022-03-27 21:43:52.397296896,,13020.0,3056.0
50%,406663.0,,1250.0,19290.0,,116.07,2022-04-20 08:27:21.511966976,,19290.0,5263.0
75%,646393.0,,1375.0,35450.0,,216.68,2022-05-14 00:06:58.253411584,,35450.0,8005.0
max,899999.0,,1499.0,147600.0,,1462.6,2023-06-11 10:58:15.127184,,147600.0,9996.0


In [8]:
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

df['year'] = df['Timestamp'].dt.year
df['month'] = df['Timestamp'].dt.month
df['day'] = df['Timestamp'].dt.day
df['date'] = pd.to_datetime(df['Timestamp']).dt.date
df['YearMonth'] = df['Timestamp'].map(lambda date: 100*date.year + date.month)
df.head()

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,Category,OriginalPrice,Inventory,year,month,day,date,YearMonth
0,74377,view,1484,108330,,,2022-02-17 19:07:19.511967,Electronics,108330,9595,2022,2,17,2022-02-17,202202
1,74377,purchase,1484,108330,,,2022-03-20 04:26:25.511967,Electronics,108330,9595,2022,3,20,2022-03-20,202203
2,74377,view,1063,14370,,,2022-03-02 04:43:47.511967,Clothing,14370,2386,2022,3,2,2022-03-02,202203
3,74377,purchase,1063,14370,,,2022-03-22 00:42:08.511967,Clothing,14370,2386,2022,3,22,2022-03-22,202203
4,74377,ad_click,1203,85290,ad_google,175.38,2022-04-16 22:19:28.511967,Electronics,85290,9173,2022,4,16,2022-04-16,202204


In [9]:
df[df['CustomerID']==8405].sort_values('Timestamp')

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,Category,OriginalPrice,Inventory,year,month,day,date,YearMonth


- 고객이 하루에 구매를 몇건 했는지 확인

In [10]:
# 'CustomerID'와 'Date' 열을 기준으로 데이터를 그룹화하여 구매 횟수 계산
purchase_count_per_day = df[df['ActionType'] == 'purchase'].groupby(['CustomerID', 'date']).size().reset_index(name='PurchaseCount')

# 결과 출력
print(purchase_count_per_day)

purchase_count_per_day.sort_values('PurchaseCount')

        CustomerID        date  PurchaseCount
0                6  2022-04-26              1
1                6  2022-06-02              1
2                6  2022-07-05              1
3                8  2022-03-08              1
4                8  2022-04-04              1
...            ...         ...            ...
454012      899999  2022-03-18              1
454013      899999  2022-03-21              1
454014      899999  2022-04-04              1
454015      899999  2022-04-29              1
454016      899999  2022-06-20              1

[454017 rows x 3 columns]


Unnamed: 0,CustomerID,date,PurchaseCount
0,6,2022-04-26,1
301922,565739,2022-05-16,1
301921,565739,2022-04-01,1
301920,565739,2022-02-13,1
301919,565737,2022-05-10,1
...,...,...,...
131529,230803,2022-03-17,3
111467,195868,2022-05-12,3
83800,147617,2022-04-21,3
114905,201515,2022-04-30,4


In [11]:
yearly_revenue = df[df['ActionType']=='purchase'].groupby('year')['Price'].sum()
monthly_revenue = df[df['ActionType'] == 'purchase'].groupby('YearMonth')['Price'].sum().reset_index()
print(yearly_revenue)
monthly_revenue

year
2022    14748266310
2023        5813120
Name: Price, dtype: int64


Unnamed: 0,YearMonth,Price
0,202202,923406730
1,202203,3372937530
2,202204,4827429880
3,202205,4011574160
4,202206,1505817790
5,202207,83833400
6,202208,3633580
7,202209,5297610
8,202210,5253430
9,202211,3706090


In [12]:
df['YearMonth'] = df['YearMonth'].astype(str)
monthly_revenue["YearMonth"] = monthly_revenue["YearMonth"].astype(str)
fig = px.bar(data_frame=monthly_revenue, x='YearMonth', y='Price', title='Monthly Revenue', width=700, height=500)
fig.show()

In [13]:
monthly_revenue['MonthlyGrowth'] = round(monthly_revenue['Price'].pct_change(),2)
monthly_revenue.head()

Unnamed: 0,YearMonth,Price,MonthlyGrowth
0,202202,923406730,
1,202203,3372937530,2.65
2,202204,4827429880,0.43
3,202205,4011574160,-0.17
4,202206,1505817790,-0.62


In [14]:
fig = px.bar(data_frame=monthly_revenue, x='YearMonth', y='MonthlyGrowth', title='Monthly Growth Rate', width=700, height=500)
fig.show()

# DAU(daily active user) 추이

In [15]:
dau = df.groupby('date')[['CustomerID']].nunique().reset_index().rename({'CustomerID':'dau'}, axis=1)
dau

Unnamed: 0,date,dau
0,2022-02-01,96
1,2022-02-02,624
2,2022-02-03,1044
3,2022-02-04,1602
4,2022-02-05,2067
...,...,...
456,2023-05-18,1
457,2023-05-20,1
458,2023-05-23,1
459,2023-05-28,1


In [16]:
fig = px.line(data_frame = dau, x='date', y='dau', title='DAU 추이')
fig.show()

In [17]:
dau['date'] = pd.to_datetime(dau['date'])

dau['day_of_week'] = dau['date'].dt.day_name()
dau['day_of_week1'] = dau['date'].dt.day_of_week
dau

Unnamed: 0,date,dau,day_of_week,day_of_week1
0,2022-02-01,96,Tuesday,1
1,2022-02-02,624,Wednesday,2
2,2022-02-03,1044,Thursday,3
3,2022-02-04,1602,Friday,4
4,2022-02-05,2067,Saturday,5
...,...,...,...,...
456,2023-05-18,1,Thursday,3
457,2023-05-20,1,Saturday,5
458,2023-05-23,1,Tuesday,1
459,2023-05-28,1,Sunday,6


## 요일별 DAU

In [18]:
## 요일별 DAU 

avg_dau_by_dow = dau.groupby(['day_of_week', 'day_of_week1'])[['dau']].mean().reset_index()
avg_dau_by_dow.sort_values('day_of_week1', inplace=True)
avg_dau_by_dow

Unnamed: 0,day_of_week,day_of_week1,dau
1,Monday,0,6340.640625
5,Tuesday,1,6140.30303
6,Wednesday,2,6143.181818
4,Thursday,3,5967.176471
0,Friday,4,6526.370968
2,Saturday,5,5960.970588
3,Sunday,6,6057.179104


In [19]:
fig = px.bar(data_frame=avg_dau_by_dow, x='day_of_week', y='dau', title='요일별 DAU 평균', width=700, height=500)
fig.show()

# MAU(Monthly Active User)

In [20]:
mau = df.groupby('YearMonth')[['CustomerID']].nunique().reset_index().rename({'CustomerID':'mau'}, axis=1)
mau

Unnamed: 0,YearMonth,mau
0,202202,97988
1,202203,176220
2,202204,199875
3,202205,200058
4,202206,145777
5,202207,14304
6,202208,604
7,202209,633
8,202210,645
9,202211,611


In [21]:
fig = px.bar(data_frame=mau, x='YearMonth', y='mau', title='월별 MAU', width=700, height=500)
fig.show()

## Monthly orders

In [22]:
purchase_count_per_month = df[df['ActionType'] == 'purchase'].groupby('YearMonth').size().reset_index(name='PurchaseCount')
purchase_count_per_month

Unnamed: 0,YearMonth,PurchaseCount
0,202202,28555
1,202203,105227
2,202204,149809
3,202205,125092
4,202206,46530
5,202207,2688
6,202208,124
7,202209,130
8,202210,156
9,202211,131


In [23]:
fig = px.bar(data_frame=purchase_count_per_month, x='YearMonth', y='PurchaseCount', title='Monthly Orders', width=700, height=500)
fig.show()

## Monthly revenue

In [24]:
yearly_revenue = df[df['ActionType']=='purchase'].groupby('year')['Price'].sum()
monthly_revenue = df[df['ActionType'] == 'purchase'].groupby('YearMonth')['Price'].sum().reset_index()

monthly_revenue

Unnamed: 0,YearMonth,Price
0,202202,923406730
1,202203,3372937530
2,202204,4827429880
3,202205,4011574160
4,202206,1505817790
5,202207,83833400
6,202208,3633580
7,202209,5297610
8,202210,5253430
9,202211,3706090


In [25]:
df['YearMonth'] = df['YearMonth'].astype(str)
monthly_revenue["YearMonth"] = monthly_revenue["YearMonth"].astype(str)
fig = px.bar(data_frame=monthly_revenue, x='YearMonth', y='Price', title='Monthly Revenue', width=700, height=500)
fig.show()

#

# 광고 매체

In [26]:
df[df['CustomerID']==8405]

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,Category,OriginalPrice,Inventory,year,month,day,date,YearMonth


In [27]:
df['AdID'].value_counts()

AdID
ad_google    58025
ad_meta      57324
ad_line      56679
Name: count, dtype: int64

In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3096931 entries, 0 to 3096930
Data columns (total 15 columns):
 #   Column         Dtype         
---  ------         -----         
 0   CustomerID     int32         
 1   ActionType     category      
 2   GoodsCode      int16         
 3   Price          int32         
 4   AdID           category      
 5   CPC            float64       
 6   Timestamp      datetime64[ns]
 7   Category       category      
 8   OriginalPrice  int32         
 9   Inventory      int32         
 10  year           int32         
 11  month          int32         
 12  day            int32         
 13  date           object        
 14  YearMonth      object        
dtypes: category(3), datetime64[ns](1), float64(1), int16(1), int32(7), object(2)
memory usage: 192.0+ MB


In [29]:
funnel_total = df[['CustomerID', 'ActionType','Timestamp']]
funnel_total

Unnamed: 0,CustomerID,ActionType,Timestamp
0,74377,view,2022-02-17 19:07:19.511967
1,74377,purchase,2022-03-20 04:26:25.511967
2,74377,view,2022-03-02 04:43:47.511967
3,74377,purchase,2022-03-22 00:42:08.511967
4,74377,ad_click,2022-04-16 22:19:28.511967
...,...,...,...
3096926,454879,view,2022-05-27 08:18:40.554370
3096927,454879,view,2022-04-25 00:41:09.554370
3096928,454879,view,2022-06-14 03:44:21.554370
3096929,454879,basket,2022-05-13 11:09:41.554370


In [30]:
grouped = funnel_total.groupby(['CustomerID','ActionType'])['Timestamp'].min()
grouped





CustomerID  ActionType
4           ad_click                            NaT
            basket                              NaT
            purchase                            NaT
            view         2022-07-04 15:11:42.511967
6           ad_click     2022-05-07 21:00:59.166991
                                    ...            
899993      view         2022-02-27 02:51:35.372529
899999      ad_click     2022-04-06 21:09:47.722389
            basket       2022-04-01 05:54:44.722389
            purchase     2022-03-18 23:44:47.722389
            view         2022-02-09 14:12:23.722389
Name: Timestamp, Length: 914700, dtype: datetime64[ns]

In [31]:
funnel_steps = pd.DataFrame({'steps':[1,2,3]}, index=['view', 'basket', 'purchase'])
funnel_steps

Unnamed: 0,steps
view,1
basket,2
purchase,3


In [32]:
grouped = pd.DataFrame(grouped).merge(funnel_steps, left_on='ActionType', right_index=True)
grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Timestamp,steps
CustomerID,ActionType,Unnamed: 2_level_1,Unnamed: 3_level_1
4,basket,NaT,2
4,purchase,NaT,3
4,view,2022-07-04 15:11:42.511967,1
6,basket,2022-05-18 03:08:13.811783,2
6,purchase,2022-04-26 21:46:00.166991,3
...,...,...,...
899993,purchase,2022-03-10 05:30:24.372529,3
899993,view,2022-02-27 02:51:35.372529,1
899999,basket,2022-04-01 05:54:44.722389,2
899999,purchase,2022-03-18 23:44:47.722389,3


## ad-meta funnel

In [33]:
meta = df[df['AdID']=='ad_meta']
meta

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,Category,OriginalPrice,Inventory,year,month,day,date,YearMonth
23,8620,ad_click,1169,16750,ad_meta,118.03,2022-02-08 19:54:39.511967,Books,16750,7640,2022,2,8,2022-02-08,202202
53,120663,ad_click,1050,11360,ad_meta,45.21,2022-02-25 07:56:58.511967,Books,11360,1645,2022,2,25,2022-02-25,202202
85,147563,ad_click,1122,21770,ad_meta,193.36,2022-12-19 04:03:42.511967,Health & Beauty,21770,2750,2022,12,19,2022-12-19,202212
90,147563,ad_click,1122,21770,ad_meta,193.36,2022-12-24 03:21:22.511967,Health & Beauty,21770,2750,2022,12,24,2022-12-24,202212
119,47804,ad_click,1236,62310,ad_meta,538.95,2022-03-04 20:29:51.511967,Electronics,62310,8906,2022,3,4,2022-03-04,202203
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3096731,828017,ad_click,1330,36610,ad_meta,217.95,2022-05-17 21:52:10.388118,Home & Garden,36610,8813,2022,5,17,2022-05-17,202205
3096795,616218,ad_click,1213,28020,ad_meta,92.16,2022-03-31 07:25:21.392190,Clothing,28020,2868,2022,3,31,2022-03-31,202203
3096804,616218,ad_click,1199,9180,ad_meta,83.35,2022-05-03 07:20:49.392190,Books,9180,5388,2022,5,3,2022-05-03,202205
3096826,733390,ad_click,1492,15020,ad_meta,49.34,2022-06-06 19:44:45.393408,Books,15020,6631,2022,6,6,2022-06-06,202206


In [34]:
df[df['CustomerID']==207149].sort_values('Timestamp')

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,Category,OriginalPrice,Inventory,year,month,day,date,YearMonth
2587883,207149,purchase,1030,34730,,,2022-02-15 23:12:02.190202,Home & Garden,34730,8368,2022,2,15,2022-02-15,202202
2587879,207149,view,1030,34730,,,2022-03-01 01:22:12.190202,Home & Garden,34730,8368,2022,3,1,2022-03-01,202203
2587887,207149,purchase,1072,15550,,,2022-03-04 17:46:45.190202,Health & Beauty,15550,7819,2022,3,4,2022-03-04,202203
2587901,207149,purchase,1130,49870,,,2022-03-12 11:02:54.190202,Home & Garden,49870,4993,2022,3,12,2022-03-12,202203
2587882,207149,ad_click,1030,34730,ad_line,140.48,2022-03-14 08:05:06.190202,Home & Garden,34730,8368,2022,3,14,2022-03-14,202203
335,207149,ad_click,1063,14370,ad_meta,98.1,2022-03-15 04:40:52.511967,Clothing,14370,2386,2022,3,15,2022-03-15,202203
2587892,207149,view,1334,10890,,,2022-03-18 09:34:28.190202,Books,10890,7540,2022,3,18,2022-03-18,202203
2587890,207149,basket,1334,10890,,,2022-03-21 00:38:07.190202,Books,10890,7540,2022,3,21,2022-03-21,202203
2587885,207149,view,1072,15550,,,2022-03-26 10:06:05.190202,Health & Beauty,15550,7819,2022,3,26,2022-03-26,202203
336,207149,view,1063,14370,,,2022-03-28 18:52:28.511967,Clothing,14370,2386,2022,3,28,2022-03-28,202203


In [35]:
meta['ActionType'].value_counts()

ActionType
ad_click    57324
basket          0
purchase        0
view            0
Name: count, dtype: int64

In [36]:
# purchased의 AdID ghkrdls 
purchased = df[df['ActionType']=='purchase']
purchased['Category'].value_counts()

Category
Books              102507
Electronics         99617
Clothing            87169
Health & Beauty     85816
Home & Garden       83663
Name: count, dtype: int64

In [37]:
loggg= log[log['ActionType']=='ad_click']
loggg['Timestamp'].max()

Timestamp('2023-05-20 01:40:03.127184')

# 전체 funnel

In [38]:
df['ActionType'].value_counts()

ActionType
view        2076942
purchase     458772
basket       389189
ad_click     172028
Name: count, dtype: int64

In [39]:
funnel_counts = {'view': 0, 'basket': 0, 'purchase': 0}

# 퍼널을 계산
for idx, row in df.iterrows():
    if row['ActionType'] == 'view':
        funnel_counts['view'] += 1
    elif row['ActionType'] == 'basket':
        funnel_counts['basket'] += 1
        funnel_counts['view'] += 1  
    elif row['ActionType'] == 'purchase':
        funnel_counts['purchase'] += 1
        funnel_counts['view'] += 1
        funnel_counts['basket'] += 1

print("퍼널 카운트:", funnel_counts)


퍼널 카운트: {'view': 2924903, 'basket': 847961, 'purchase': 458772}


In [40]:
view_count = 2924903
basket_count = 847961
purchase_count = 458772

# 각 단계에서 다음 단계로의 전환율을 계산합니다.
basket_to_purchase_rate = purchase_count / basket_count
view_to_basket_rate = basket_count / view_count
view_to_purchase_rate = purchase_count / view_count

# 결과를 출력합니다.
print("View Count:", view_count)
print("Basket Count:", basket_count)
print("Purchase Count:", purchase_count)
print("View to Basket Conversion Rate:", view_to_basket_rate)
print("Basket to Purchase Conversion Rate:", basket_to_purchase_rate)
print("View to Purchase Conversion Rate:",view_to_purchase_rate )

View Count: 2924903
Basket Count: 847961
Purchase Count: 458772
View to Basket Conversion Rate: 0.28991081071748365
Basket to Purchase Conversion Rate: 0.5410295992386442
View to Purchase Conversion Rate: 0.1568503297374306


In [41]:
data = dict(
    number=[2924903,847961,458772],
    stage=['view','basket','purchase'],
    rate=[1,0.28991081071748365,0.1568503297374306]
)
fig = px.funnel(data, x='rate', y='stage', title='전체 Funnel')
fig.update_traces(texttemplate="%{value:,.2%}")
fig.show()

## adclick 추가한 전체 funnel :x

In [42]:
funnel_counts_ad = {'view': 0, 'basket': 0, 'purchase': 0, 'ad_purchase': 0}

# 퍼널을 계산
for idx, row in df.iterrows():
    if row['ActionType'] == 'view':
        funnel_counts_ad['view'] += 1
    elif row['ActionType'] == 'basket':
        funnel_counts_ad['basket'] += 1
        funnel_counts_ad['view'] += 1  
    elif row['ActionType'] == 'purchase':
        funnel_counts_ad['purchase'] += 1
        funnel_counts_ad['view'] += 1
        funnel_counts_ad['basket'] += 1
    elif row['ActionType'] == 'ad_click':
        funnel_counts_ad['ad_purchase'] += 1
        funnel_counts_ad['view'] += 1
        funnel_counts_ad['basket'] += 1

print("퍼널 카운트:", funnel_counts_ad)


퍼널 카운트: {'view': 3096931, 'basket': 1019989, 'purchase': 458772, 'ad_purchase': 172028}


In [43]:
view_count_ad = 3096931
basket_count_ad = 1019989
purchase_count_ad = 630800
ad_purchase_count = 172028

# 각 단계에서 다음 단계로의 전환율을 계산합니다.
basket_to_purchase_rate_ad = purchase_count_ad / basket_count_ad
view_to_basket_rate_ad = basket_count_ad / view_count_ad
view_to_purchase_rate_ad = purchase_count_ad / view_count_ad
view_to_ad_purchase_rate_ad = ad_purchase_count / view_count_ad

# 결과를 출력합니다.
print("View Count:", view_count_ad)
print("Basket Count:", basket_count_ad)
print("Purchase Count:", purchase_count_ad)
print("ad_purchase Count:", ad_purchase_count)
print("View to Basket Conversion Rate:", view_to_basket_rate_ad)
print("Basket to Purchase Conversion Rate:", basket_to_purchase_rate_ad)
print("View to Purchase Conversion Rate:",view_to_purchase_rate_ad)
print("View to Ad Purchase Conversion Rate", view_to_ad_purchase_rate_ad)

View Count: 3096931
Basket Count: 1019989
Purchase Count: 630800
ad_purchase Count: 172028
View to Basket Conversion Rate: 0.32935477090061094
Basket to Purchase Conversion Rate: 0.6184380419788841
View to Purchase Conversion Rate: 0.2036855196321778
View to Ad Purchase Conversion Rate 0.05554789564249252


In [44]:
data = dict(
    number=[3096931,1019989,630800,172028],
    stage=['view','basket','purchase','ad_purchase'],
    rate=[1,0.32935477090061094,0.2036855196321778,0.05554789564249252]
)
fig = px.funnel(data, x='rate', y='stage', title='Ad 포함 전체 Funnel')
fig.update_traces(texttemplate="%{value:,.2%}")
fig.show()

In [45]:
df[df['ActionType']=='ad_click']

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,Category,OriginalPrice,Inventory,year,month,day,date,YearMonth
4,74377,ad_click,1203,85290,ad_google,175.38,2022-04-16 22:19:28.511967,Electronics,85290,9173,2022,4,16,2022-04-16,202204
23,8620,ad_click,1169,16750,ad_meta,118.03,2022-02-08 19:54:39.511967,Books,16750,7640,2022,2,8,2022-02-08,202202
48,110346,ad_click,1266,7800,ad_line,65.35,2022-05-29 17:55:12.511967,Books,7800,2591,2022,5,29,2022-05-29,202205
53,120663,ad_click,1050,11360,ad_meta,45.21,2022-02-25 07:56:58.511967,Books,11360,1645,2022,2,25,2022-02-25,202202
62,120663,ad_click,1011,32940,ad_line,101.81,2022-04-04 06:10:53.511967,Home & Garden,32940,2555,2022,4,4,2022-04-04,202204
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3096826,733390,ad_click,1492,15020,ad_meta,49.34,2022-06-06 19:44:45.393408,Books,15020,6631,2022,6,6,2022-06-06,202206
3096863,716526,ad_click,1472,5630,ad_line,47.27,2022-05-31 06:01:19.395079,Clothing,5630,1661,2022,5,31,2022-05-31,202205
3096865,716526,ad_click,1472,5630,ad_google,45.99,2022-04-22 06:54:55.395079,Clothing,5630,1661,2022,4,22,2022-04-22,202204
3096891,362992,ad_click,1382,51790,ad_line,136.22,2022-04-30 23:01:51.397001,Electronics,51790,1526,2022,4,30,2022-04-30,202204


In [46]:
df

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,Category,OriginalPrice,Inventory,year,month,day,date,YearMonth
0,74377,view,1484,108330,,,2022-02-17 19:07:19.511967,Electronics,108330,9595,2022,2,17,2022-02-17,202202
1,74377,purchase,1484,108330,,,2022-03-20 04:26:25.511967,Electronics,108330,9595,2022,3,20,2022-03-20,202203
2,74377,view,1063,14370,,,2022-03-02 04:43:47.511967,Clothing,14370,2386,2022,3,2,2022-03-02,202203
3,74377,purchase,1063,14370,,,2022-03-22 00:42:08.511967,Clothing,14370,2386,2022,3,22,2022-03-22,202203
4,74377,ad_click,1203,85290,ad_google,175.38,2022-04-16 22:19:28.511967,Electronics,85290,9173,2022,4,16,2022-04-16,202204
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3096926,454879,view,1250,100420,,,2022-05-27 08:18:40.554370,Electronics,100420,8291,2022,5,27,2022-05-27,202205
3096927,454879,view,1250,100420,,,2022-04-25 00:41:09.554370,Electronics,100420,8291,2022,4,25,2022-04-25,202204
3096928,454879,view,1250,100420,,,2022-06-14 03:44:21.554370,Electronics,100420,8291,2022,6,14,2022-06-14,202206
3096929,454879,basket,1250,100420,,,2022-05-13 11:09:41.554370,Electronics,100420,8291,2022,5,13,2022-05-13,202205


In [47]:
df[df['CustomerID']==74377].sort_values('Timestamp')

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,Category,OriginalPrice,Inventory,year,month,day,date,YearMonth
1860787,74377,view,1216,17090,,,2022-02-16 18:33:07.868132,Books,17090,8827,2022,2,16,2022-02-16,202202
0,74377,view,1484,108330,,,2022-02-17 19:07:19.511967,Electronics,108330,9595,2022,2,17,2022-02-17,202202
1860788,74377,view,1250,100420,,,2022-02-24 18:03:05.868132,Electronics,100420,8291,2022,2,24,2022-02-24,202202
2,74377,view,1063,14370,,,2022-03-02 04:43:47.511967,Clothing,14370,2386,2022,3,2,2022-03-02,202203
1860785,74377,view,1352,46710,,,2022-03-05 09:46:17.868132,Electronics,46710,8424,2022,3,5,2022-03-05,202203
1860789,74377,view,1250,100420,,,2022-03-18 16:23:26.868132,Electronics,100420,8291,2022,3,18,2022-03-18,202203
1,74377,purchase,1484,108330,,,2022-03-20 04:26:25.511967,Electronics,108330,9595,2022,3,20,2022-03-20,202203
1860784,74377,view,1352,46710,,,2022-03-20 23:55:26.868132,Electronics,46710,8424,2022,3,20,2022-03-20,202203
5,74377,view,1203,85290,,,2022-03-21 23:23:24.511967,Electronics,85290,9173,2022,3,21,2022-03-21,202203
3,74377,purchase,1063,14370,,,2022-03-22 00:42:08.511967,Clothing,14370,2386,2022,3,22,2022-03-22,202203


- ad_Click : 광고를 보고 접속 view
- +7일 해서 ad_click 본 goodid = purchase goodid 랑 일치하는 애들만 광고를 통해 구매를 했다. 라고 정의

In [48]:
# from datetime import timedelta
# from tqdm.auto import tqdm


# def filter_data_from_ad_click(df, ad_click_times):
#     filtered_indices = []  # 필터링된 로우의 인덱스를 저장할 리스트
    
#     # tqdm을 사용하여 진행 상황 표시
#     for ad_click_time in tqdm(ad_click_times, desc="Filtering data"):
#         # 광고 클릭 시간 이후의 데이터 필터링
#         temp_indices = df[df['Timestamp'] >= ad_click_time].index
        
#         # 광고 클릭 이후 7일 이내의 데이터만 선택
#         end_time = ad_click_time + timedelta(days=7)
#         temp_indices = temp_indices[df.loc[temp_indices, 'Timestamp'] <= end_time]
        
#         # 필터링된 로우의 인덱스를 추가
#         filtered_indices.extend(temp_indices)
    
#     # 필터링된 인덱스로부터 데이터프레임 생성
#     filtered_data = df.loc[filtered_indices]
    
#     return filtered_data

# # 광고 클릭 시간 찾기 (datetime 객체로 변환)
# ad_click_times = df[df['ActionType'] == 'ad_click']['Timestamp'].tolist()

# # 광고 클릭 시간으로부터 7일 이내의 데이터 필터링
# filtered_data = filter_data_from_ad_click(df, ad_click_times)

# filtered_data



In [49]:
# filtered_data[filtered_data['CustomerID']==74377]

# 광고본후(+7days) 데이터셋

In [50]:
new_file = pd.read_parquet("/Users/hj/Documents/Four-A/2nd dataset/khj/data/result_df.parquet")
new_file

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,GoodsCode_ad
0,4,view,1492,15020,,,2022-07-04 15:11:42.511967,
1,4,view,1492,15020,,,2022-07-24 14:44:40.511967,
0,6,view,1415,12980,,,2022-04-22 15:49:44.166991,
1,6,purchase,1437,35290,,,2022-04-26 21:46:00.166991,
2,6,view,1217,16220,,,2022-05-03 14:26:12.811783,
...,...,...,...,...,...,...,...,...
28,899999,view,1059,21230,,,2022-05-22 12:51:15.722389,
29,899999,basket,1059,21230,,,2022-05-23 01:28:11.722389,
30,899999,view,1059,21230,,,2022-05-24 04:40:26.722389,
31,899999,purchase,1034,12910,,,2022-06-20 03:40:43.722389,


In [51]:
new_file[new_file['CustomerID']==482304]

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,GoodsCode_ad
0,482304,view,1182,106010,,,2022-03-22 23:50:37.397671,
1,482304,view,1191,11910,,,2022-03-27 11:07:47.397671,
2,482304,basket,1182,106010,,,2022-03-28 23:11:53.397671,
3,482304,view,1182,106010,,,2022-03-29 18:29:31.397671,
4,482304,view,1191,11910,,,2022-04-07 16:29:15.397671,
5,482304,view,1182,106010,,,2022-04-08 10:23:39.397671,
6,482304,view,1191,11910,,,2022-04-09 02:06:59.397671,
7,482304,ad_click,1191,11910,ad_meta,51.46,2022-04-11 10:17:03.397671,1191.0
8,482304,view,1191,11910,,,2022-04-17 11:37:49.397671,1191.0
9,482304,view,1182,106010,,,2022-04-21 23:45:22.397671,


In [52]:
ad_funnel = new_file[new_file['GoodsCode_ad'].notna()]
ad_funnel

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,GoodsCode_ad
5,6,ad_click,1437,35290,ad_google,182.31,2022-05-07 21:00:59.166991,1437
6,6,view,1437,35290,,,2022-05-12 10:25:41.166991,1437
2,13,ad_click,1090,21100,ad_google,158.68,2022-03-11 18:14:48.855443,1090
3,13,view,1203,85290,,,2022-03-17 14:31:19.855443,1090
4,14,ad_click,1000,12580,ad_google,104.04,2022-03-07 10:33:24.548570,1000
...,...,...,...,...,...,...,...,...
16,899993,view,1045,120410,,,2022-05-11 03:04:39.372529,1019
18,899999,ad_click,1059,21230,ad_meta,71.35,2022-04-06 21:09:47.722389,1059
19,899999,view,1271,92940,,,2022-04-10 18:07:56.722389,1059
25,899999,ad_click,1059,21230,ad_google,94.14,2022-05-06 00:08:17.722389,1059


In [53]:
ad_funnel['ActionType'].value_counts()

ActionType
ad_click    172028
view        124368
basket       32755
purchase     28505
Name: count, dtype: int64

In [54]:
# 광고로 클릭한 아이템
ad_funnel['GoodsCode_ad'].value_counts()

GoodsCode_ad
1459    867
1078    857
1009    842
1418    824
1170    819
       ... 
1228    604
1156    600
1060    589
1062    582
1317    570
Name: count, Length: 500, dtype: Int64

In [55]:
ad_funnel['GoodsCode'].value_counts()

GoodsCode
1248    813
1418    804
1090    797
1015    796
1114    792
       ... 
1022    628
1062    628
1290    617
1060    616
1366    613
Name: count, Length: 500, dtype: int64

In [56]:
# ActionType이 purchase이고 GoodsCode와 GoodsCode_ad 값이 일치하는 행을 필터링
matched_purchase = ad_funnel[(ad_funnel['ActionType'] == 'purchase') & (ad_funnel['GoodsCode'] == ad_funnel['GoodsCode_ad'])]

# 일치하는 행의 수를 카운트
matched_purchase_count = len(matched_purchase)

print("일치하는 purchase 행 수:", matched_purchase_count)


일치하는 purchase 행 수: 7759


## 전체 ad funnel_1  (광고로 인해 다른상품 매출 기여도 포함)
- 광고로 본 Goodscode 와 구매한 Goodscode 가 달라도 purchase 로 인정한 경우

In [58]:
total_ad_funnel_counts = {'view': 0, 'basket': 0, 'purchase': 0}

# 퍼널을 계산
for idx, row in ad_funnel.iterrows():
    if row['ActionType'] == 'view':
        total_ad_funnel_counts['view'] += 1
    elif row['ActionType'] == 'ad_click':
        total_ad_funnel_counts['view'] += 1
    elif row['ActionType'] == 'basket':
        total_ad_funnel_counts['basket'] += 1
        total_ad_funnel_counts['view'] += 1  
    elif row['ActionType'] == 'purchase':
        total_ad_funnel_counts['purchase'] += 1
        total_ad_funnel_counts['view'] += 1
        total_ad_funnel_counts['basket'] += 1

print("퍼널 카운트:", total_ad_funnel_counts)


퍼널 카운트: {'view': 357656, 'basket': 61260, 'purchase': 28505}


In [61]:
view_count_ad = 357656
basket_count_ad = 61260
purchase_count_ad = 28505


# 각 단계에서 다음 단계로의 전환율을 계산합니다.
basket_to_purchase_rate_ad = purchase_count_ad / basket_count_ad
view_to_basket_rate_ad = basket_count_ad / view_count_ad
view_to_purchase_rate_ad = purchase_count_ad / view_count_ad
view_to_ad_purchase_rate_ad = ad_purchase_count / view_count_ad

# 결과를 출력합니다.
print("View Count:", view_count_ad)
print("Basket Count:", basket_count_ad)
print("Purchase Count:", purchase_count_ad)
print("View to Basket Conversion Rate:", view_to_basket_rate_ad)
print("Basket to Purchase Conversion Rate:", basket_to_purchase_rate_ad)
print("View to Purchase Conversion Rate:",view_to_purchase_rate_ad)


View Count: 357656
Basket Count: 61260
Purchase Count: 28505
View to Basket Conversion Rate: 0.17128190216297223
Basket to Purchase Conversion Rate: 0.4653117858308847
View to Purchase Conversion Rate: 0.0796994877759635


In [64]:
data = dict(
    number=[357656,61260,28505],
    stage=['view','basket','purchase'],
    rate=[1,0.17128190216297223,0.0796994877759635]
)
fig = px.funnel(data, x='rate', y='stage', title='전체 AD Funnel (광고로 본 goodscode != 구매한 goodscode)')
fig.update_traces(texttemplate="%{value:,.2%}")
fig.show()

## 전체 ad funnel_2 (광고했던 상품만 target 한 funnel)

- 광고로 본 Goodcode == 실제로 구매한 Goodscode 가 일치하는 것만 purchase 로 인정하는 경우

In [65]:
total_ad_funnel_adpurchase_counts = {'view': 0, 'basket': 0, 'purchase': 0}

# 퍼널을 계산
for idx, row in ad_funnel.iterrows():
    if row['ActionType'] == 'ad_click':
        total_ad_funnel_adpurchase_counts['view'] += 1
    elif row['ActionType'] == 'view' and row['GoodsCode'] == row['GoodsCode_ad']:
        total_ad_funnel_adpurchase_counts['view'] += 1
    elif row['ActionType'] == 'basket' and row['GoodsCode'] == row['GoodsCode_ad']:
        total_ad_funnel_adpurchase_counts['basket'] += 1
        total_ad_funnel_adpurchase_counts['view'] += 1  
    elif row['ActionType'] == 'purchase' and row['GoodsCode'] == row['GoodsCode_ad']:
        total_ad_funnel_adpurchase_counts['purchase'] += 1
        total_ad_funnel_adpurchase_counts['view'] += 1
        total_ad_funnel_adpurchase_counts['basket'] += 1

print("퍼널 카운트:", total_ad_funnel_adpurchase_counts)


퍼널 카운트: {'view': 226666, 'basket': 22968, 'purchase': 7759}


In [66]:
view_count_ad_first = 226666
basket_count_ad_first = 22968
purchase_count_ad_first = 7759


# 각 단계에서 다음 단계로의 전환율을 계산합니다.
basket_to_purchase_rate_ad2 = purchase_count_ad_first / basket_count_ad_first
view_to_basket_rate_ad2 = basket_count_ad_first / view_count_ad_first
view_to_purchase_rate_ad2 = purchase_count_ad_first / view_count_ad_first


# 결과를 출력합니다.
print("View Count:", view_count_ad_first)
print("Basket Count:", basket_count_ad_first)
print("Purchase Count:", purchase_count_ad_first)
print("View to Basket Conversion Rate:", view_to_basket_rate_ad2)
print("Basket to Purchase Conversion Rate:", basket_to_purchase_rate_ad2)
print("View to Purchase Conversion Rate:",view_to_purchase_rate_ad2)


View Count: 226666
Basket Count: 22968
Purchase Count: 7759
View to Basket Conversion Rate: 0.1013297097932641
Basket to Purchase Conversion Rate: 0.3378178335074887
View to Purchase Conversion Rate: 0.034230983032303035


In [67]:
data = dict(
    number=[226666,22968,7759],
    stage=['view','basket','ad_purchase'],
    rate=[1,0.1013297097932641,0.034230983032303035]
)
fig = px.funnel(data, x='rate', y='stage', title='전체 AD Funnel (광고로 본 goodscode == 구매한 goodscode)')
fig.update_traces(texttemplate="%{value:,.2%}")
fig.show()

In [69]:
ad_funnel.head(20)

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,GoodsCode_ad
5,6,ad_click,1437,35290,ad_google,182.31,2022-05-07 21:00:59.166991,1437
6,6,view,1437,35290,,,2022-05-12 10:25:41.166991,1437
2,13,ad_click,1090,21100,ad_google,158.68,2022-03-11 18:14:48.855443,1090
3,13,view,1203,85290,,,2022-03-17 14:31:19.855443,1090
4,14,ad_click,1000,12580,ad_google,104.04,2022-03-07 10:33:24.548570,1000
5,14,view,1287,8740,,,2022-03-10 22:52:38.548570,1000
6,14,view,1280,9270,,,2022-03-11 20:45:29.548570,1000
7,14,purchase,1000,12580,,,2022-03-12 03:17:48.548570,1000
16,30,ad_click,1451,18470,ad_google,183.98,2022-04-20 23:35:21.707955,1451
17,30,view,1026,24040,,,2022-04-25 10:36:27.707955,1451


In [72]:
# 바로 전 행의 AdID 값으로 NaN 대체
ad_funnel['AdID'] = ad_funnel['AdID'].fillna(method='ffill')
ad_funnel.head(20)


Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,GoodsCode_ad
5,6,ad_click,1437,35290,ad_google,182.31,2022-05-07 21:00:59.166991,1437
6,6,view,1437,35290,ad_google,,2022-05-12 10:25:41.166991,1437
2,13,ad_click,1090,21100,ad_google,158.68,2022-03-11 18:14:48.855443,1090
3,13,view,1203,85290,ad_google,,2022-03-17 14:31:19.855443,1090
4,14,ad_click,1000,12580,ad_google,104.04,2022-03-07 10:33:24.548570,1000
5,14,view,1287,8740,ad_google,,2022-03-10 22:52:38.548570,1000
6,14,view,1280,9270,ad_google,,2022-03-11 20:45:29.548570,1000
7,14,purchase,1000,12580,ad_google,,2022-03-12 03:17:48.548570,1000
16,30,ad_click,1451,18470,ad_google,183.98,2022-04-20 23:35:21.707955,1451
17,30,view,1026,24040,ad_google,,2022-04-25 10:36:27.707955,1451


## ad_google 1 (광고로 인해 다른상품 매출 기여도 포함)

In [75]:
ad_funnel_google = ad_funnel[ad_funnel['AdID']=='ad_google']
ad_funnel_google.head()

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,GoodsCode_ad
5,6,ad_click,1437,35290,ad_google,182.31,2022-05-07 21:00:59.166991,1437
6,6,view,1437,35290,ad_google,,2022-05-12 10:25:41.166991,1437
2,13,ad_click,1090,21100,ad_google,158.68,2022-03-11 18:14:48.855443,1090
3,13,view,1203,85290,ad_google,,2022-03-17 14:31:19.855443,1090
4,14,ad_click,1000,12580,ad_google,104.04,2022-03-07 10:33:24.548570,1000


In [78]:
ad_google_funnel_total_purchase_counts = {'view': 0, 'basket': 0, 'purchase': 0}

# 퍼널을 계산
for idx, row in ad_funnel_google.iterrows():
    if row['ActionType'] == 'view':
        ad_google_funnel_total_purchase_counts['view'] += 1
    elif row['ActionType'] == 'ad_click':
        ad_google_funnel_total_purchase_counts['view'] += 1
    elif row['ActionType'] == 'basket':
        ad_google_funnel_total_purchase_counts['basket'] += 1
        ad_google_funnel_total_purchase_counts['view'] += 1  
    elif row['ActionType'] == 'purchase':
        ad_google_funnel_total_purchase_counts['purchase'] += 1
        ad_google_funnel_total_purchase_counts['view'] += 1
        ad_google_funnel_total_purchase_counts['basket'] += 1


print("퍼널 카운트:", ad_google_funnel_total_purchase_counts)


퍼널 카운트: {'view': 120840, 'basket': 20792, 'purchase': 9639}


In [79]:
view_count_ad_google1 = 120840
basket_count_ad_google1 = 20792
purchase_count_ad_google1 = 9639


# 각 단계에서 다음 단계로의 전환율을 계산합니다.
basket_to_purchase_rate_ad_google1 = purchase_count_ad_google1 / basket_count_ad_google1
view_to_basket_rate_ad_google1 = basket_count_ad_google1 / view_count_ad_google1
view_to_purchase_rate_ad_google1 = purchase_count_ad_google1 / view_count_ad_google1

# 결과를 출력합니다.
print("View Count:", view_count_ad_google1)
print("Basket Count:", basket_count_ad_google1)
print("Purchase Count:", purchase_count_ad_google1)
print("View to Basket Conversion Rate:", view_to_basket_rate_ad_google1)
print("Basket to Purchase Conversion Rate:", basket_to_purchase_rate_ad_google1)
print("View to Purchase Conversion Rate:",view_to_purchase_rate_ad_google1)


View Count: 120840
Basket Count: 20792
Purchase Count: 9639
View to Basket Conversion Rate: 0.17206223104932142
Basket to Purchase Conversion Rate: 0.4635917660638707
View to Purchase Conversion Rate: 0.07976663356504468


In [80]:
data = dict(
    number=[120840,20792,9639],
    stage=['view','basket','purchase'],
    rate=[1,0.17206223104932142,0.07976663356504468]
)
fig = px.funnel(data, x='rate', y='stage', title='Google AD Funnel (광고로 본 goodscode != 구매한 goodscode)')
fig.update_traces(texttemplate="%{value:,.2%}")
fig.show()

## ad_google 2 (광고했던 상품만 target 한 funnel)

In [82]:
ad_google_funnel_adpurchase_counts = {'view': 0, 'basket': 0, 'purchase': 0}

# 퍼널을 계산
for idx, row in ad_funnel_google.iterrows():
    if row['ActionType'] == 'ad_click':
        ad_google_funnel_adpurchase_counts['view'] += 1
    if row['ActionType'] == 'view' and row['GoodsCode'] == row['GoodsCode_ad']:
        ad_google_funnel_adpurchase_counts['view'] += 1
    elif row['ActionType'] == 'basket' and row['GoodsCode'] == row['GoodsCode_ad']:
        ad_google_funnel_adpurchase_counts['basket'] += 1
        ad_google_funnel_adpurchase_counts['view'] += 1  
    elif row['ActionType'] == 'purchase' and row['GoodsCode'] == row['GoodsCode_ad']:
        ad_google_funnel_adpurchase_counts['purchase'] += 1
        ad_google_funnel_adpurchase_counts['view'] += 1
        ad_google_funnel_adpurchase_counts['basket'] += 1

print("퍼널 카운트:", ad_google_funnel_adpurchase_counts)

퍼널 카운트: {'view': 76559, 'basket': 7885, 'purchase': 2682}


In [83]:
view_count_ad_google2 = 76559
basket_count_ad_google2 = 7885
purchase_count_ad_google2 = 2682


# 각 단계에서 다음 단계로의 전환율을 계산합니다.
basket_to_purchase_rate_ad_google2 = purchase_count_ad_google2 / basket_count_ad_google2
view_to_basket_rate_ad_google2 = basket_count_ad_google2 / view_count_ad_google2
view_to_purchase_rate_ad_google2 = purchase_count_ad_google2 / view_count_ad_google2

# 결과를 출력합니다.
print("View Count:", view_count_ad_google2)
print("Basket Count:", basket_count_ad_google2)
print("Purchase Count:", purchase_count_ad_google2)
print("View to Basket Conversion Rate:", view_to_basket_rate_ad_google2)
print("Basket to Purchase Conversion Rate:", basket_to_purchase_rate_ad_google2)
print("View to Purchase Conversion Rate:",view_to_purchase_rate_ad_google2)


View Count: 76559
Basket Count: 7885
Purchase Count: 2682
View to Basket Conversion Rate: 0.10299246332893586
Basket to Purchase Conversion Rate: 0.340139505389981
View to Purchase Conversion Rate: 0.035031805535599994


In [84]:
data = dict(
    number=[76559,7885,2682],
    stage=['view','basket','purchase'],
    rate=[1,0.10299246332893586,0.035031805535599994]
)
fig = px.funnel(data, x='rate', y='stage', title='Google AD Funnel (광고로 본 goodscode == 구매한 goodscode)')
fig.update_traces(texttemplate="%{value:,.2%}")
fig.show()

## ad_meta 1 (광고로 인한 다른상품 매출 기여도 포함)

In [89]:
ad_funnel_meta = ad_funnel[ad_funnel['AdID']=='ad_meta']
ad_funnel_meta.head(7)

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,GoodsCode_ad
2,60,ad_click,1124,79780,ad_meta,580.33,2022-04-18 05:44:55.486139,1124
3,60,basket,1124,79780,ad_meta,,2022-04-20 05:16:44.486139,1124
4,60,view,1124,79780,ad_meta,,2022-04-21 21:39:54.486139,1124
11,84,ad_click,1462,13020,ad_meta,107.84,2022-05-23 14:02:41.760000,1462
12,84,view,1462,13020,ad_meta,,2022-05-27 20:48:24.760000,1462
13,84,basket,1392,5360,ad_meta,,2022-05-28 19:03:58.961569,1462
0,102,ad_click,1355,31340,ad_meta,252.84,2022-02-08 17:09:21.530419,1355


In [90]:
ad_meta_funnel_total_purchase_counts = {'view': 0, 'basket': 0, 'purchase': 0}

# 퍼널을 계산
for idx, row in ad_funnel_meta.iterrows():
    if row['ActionType'] == 'view':
        ad_meta_funnel_total_purchase_counts['view'] += 1
    elif row['ActionType'] == 'ad_click':
        ad_meta_funnel_total_purchase_counts['view'] += 1
    elif row['ActionType'] == 'basket':
        ad_meta_funnel_total_purchase_counts['basket'] += 1
        ad_meta_funnel_total_purchase_counts['view'] += 1  
    elif row['ActionType'] == 'purchase':
        ad_meta_funnel_total_purchase_counts['purchase'] += 1
        ad_meta_funnel_total_purchase_counts['view'] += 1
        ad_meta_funnel_total_purchase_counts['basket'] += 1


print("퍼널 카운트:", ad_meta_funnel_total_purchase_counts)


퍼널 카운트: {'view': 119021, 'basket': 20322, 'purchase': 9425}


In [91]:
view_count_ad_meta1 = 119021
basket_count_ad_meta1 = 20322
purchase_count_ad_meta1 = 9425


# 각 단계에서 다음 단계로의 전환율을 계산합니다.
basket_to_purchase_rate_ad_meta1 = purchase_count_ad_meta1 / basket_count_ad_meta1
view_to_basket_rate_ad_meta1 = basket_count_ad_meta1 / view_count_ad_meta1
view_to_purchase_rate_ad_meta1 = purchase_count_ad_meta1 / view_count_ad_meta1

# 결과를 출력합니다.
print("View Count:", view_count_ad_meta1)
print("Basket Count:", basket_count_ad_meta1)
print("Purchase Count:", purchase_count_ad_meta1)
print("View to Basket Conversion Rate:", view_to_basket_rate_ad_meta1)
print("Basket to Purchase Conversion Rate:", basket_to_purchase_rate_ad_meta1)
print("View to Purchase Conversion Rate:",view_to_purchase_rate_ad_meta1)


View Count: 119021
Basket Count: 20322
Purchase Count: 9425
View to Basket Conversion Rate: 0.17074297812990985
Basket to Purchase Conversion Rate: 0.46378309221533315
View to Purchase Conversion Rate: 0.07918770637114458


In [92]:
data = dict(
    number=[119021,20322,9425],
    stage=['view','basket','purchase'],
    rate=[1,0.17074297812990985,0.07918770637114458]
)
fig = px.funnel(data, x='rate', y='stage', title='Meta AD Funnel (광고로 본 goodscode != 구매한 goodscode)')
fig.update_traces(texttemplate="%{value:,.2%}")
fig.show()

## ad_meta 2 (광고했던 상품만 target 한 funnel)

In [93]:
ad_meta_funnel_adpurchase_counts = {'view': 0, 'basket': 0, 'purchase': 0}

# 퍼널을 계산
for idx, row in ad_funnel_meta.iterrows():
    if row['ActionType'] == 'ad_click':
        ad_meta_funnel_adpurchase_counts['view'] += 1
    if row['ActionType'] == 'view' and row['GoodsCode'] == row['GoodsCode_ad']:
        ad_meta_funnel_adpurchase_counts['view'] += 1
    elif row['ActionType'] == 'basket' and row['GoodsCode'] == row['GoodsCode_ad']:
        ad_meta_funnel_adpurchase_counts['basket'] += 1
        ad_meta_funnel_adpurchase_counts['view'] += 1  
    elif row['ActionType'] == 'purchase' and row['GoodsCode'] == row['GoodsCode_ad']:
        ad_meta_funnel_adpurchase_counts['purchase'] += 1
        ad_meta_funnel_adpurchase_counts['view'] += 1
        ad_meta_funnel_adpurchase_counts['basket'] += 1

print("퍼널 카운트:", ad_meta_funnel_adpurchase_counts)

퍼널 카운트: {'view': 75485, 'basket': 7584, 'purchase': 2543}


In [95]:
view_count_ad_meta2 = 75485
basket_count_ad_meta2 = 7584
purchase_count_ad_meta2 = 2543


# 각 단계에서 다음 단계로의 전환율을 계산합니다.
basket_to_purchase_rate_ad_meta2 = purchase_count_ad_meta2 / basket_count_ad_meta2
view_to_basket_rate_ad_meta2 = basket_count_ad_meta2 / view_count_ad_meta2
view_to_purchase_rate_ad_meta2 = purchase_count_ad_meta2 / view_count_ad_meta2

# 결과를 출력합니다.
print("View Count:", view_count_ad_meta2)
print("Basket Count:", basket_count_ad_meta2)
print("Purchase Count:", purchase_count_ad_meta2)
print("View to Basket Conversion Rate:", view_to_basket_rate_ad_meta2)
print("Basket to Purchase Conversion Rate:", basket_to_purchase_rate_ad_meta2)
print("View to Purchase Conversion Rate:",view_to_purchase_rate_ad_meta2)


View Count: 75485
Basket Count: 7584
Purchase Count: 2543
View to Basket Conversion Rate: 0.10047029211101544
Basket to Purchase Conversion Rate: 0.33531118143459915
View to Purchase Conversion Rate: 0.033688812346823874


In [96]:
data = dict(
    number=[75485,7584,2543],
    stage=['view','basket','purchase'],
    rate=[1,0.10047029211101544,0.033688812346823874]
)
fig = px.funnel(data, x='rate', y='stage', title='Meta AD Funnel (광고로 본 goodscode == 구매한 goodscode)')
fig.update_traces(texttemplate="%{value:,.2%}")
fig.show()

## ad_line 1 (광고로 인한 다른상품 매출 기여도 포함)

In [97]:
ad_funnel_line = ad_funnel[ad_funnel['AdID']=='ad_line']
ad_funnel_line.head(7)

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,GoodsCode_ad
6,32,ad_click,1036,38250,ad_line,195.52,2022-03-06 19:05:33.376860,1036
7,32,view,1434,18720,ad_line,,2022-03-10 09:08:40.376860,1036
8,32,view,1336,82700,ad_line,,2022-03-10 20:36:37.376860,1036
9,32,view,1336,82700,ad_line,,2022-03-11 08:25:32.376860,1036
6,33,ad_click,1149,17780,ad_line,131.84,2022-04-10 18:14:48.486139,1149
0,52,ad_click,1481,6560,ad_line,54.88,2022-04-05 15:05:48.694075,1481
1,63,ad_click,1341,12540,ad_line,93.45,2022-04-05 02:39:00.132629,1341


In [98]:
ad_line_funnel_total_purchase_counts = {'view': 0, 'basket': 0, 'purchase': 0}

# 퍼널을 계산
for idx, row in ad_funnel_line.iterrows():
    if row['ActionType'] == 'view':
        ad_line_funnel_total_purchase_counts['view'] += 1
    elif row['ActionType'] == 'ad_click':
        ad_line_funnel_total_purchase_counts['view'] += 1
    elif row['ActionType'] == 'basket':
        ad_line_funnel_total_purchase_counts['basket'] += 1
        ad_line_funnel_total_purchase_counts['view'] += 1  
    elif row['ActionType'] == 'purchase':
        ad_line_funnel_total_purchase_counts['purchase'] += 1
        ad_line_funnel_total_purchase_counts['view'] += 1
        ad_line_funnel_total_purchase_counts['basket'] += 1


print("퍼널 카운트:", ad_line_funnel_total_purchase_counts)


퍼널 카운트: {'view': 117795, 'basket': 20146, 'purchase': 9441}


In [99]:
view_count_ad_line1 = 117795
basket_count_ad_line1 = 20146
purchase_count_ad_line1 = 9441


# 각 단계에서 다음 단계로의 전환율을 계산합니다.
basket_to_purchase_rate_ad_line1 = purchase_count_ad_line1 / basket_count_ad_line1
view_to_basket_rate_ad_line1 = basket_count_ad_line1 / view_count_ad_line1
view_to_purchase_rate_ad_line1 = purchase_count_ad_line1 / view_count_ad_line1

# 결과를 출력합니다.
print("View Count:", view_count_ad_line1)
print("Basket Count:", basket_count_ad_line1)
print("Purchase Count:", purchase_count_ad_line1)
print("View to Basket Conversion Rate:", view_to_basket_rate_ad_line1)
print("Basket to Purchase Conversion Rate:", basket_to_purchase_rate_ad_line1)
print("View to Purchase Conversion Rate:",view_to_purchase_rate_ad_line1)


View Count: 117795
Basket Count: 20146
Purchase Count: 9441
View to Basket Conversion Rate: 0.17102593488687975
Basket to Purchase Conversion Rate: 0.4686290082398491
View to Purchase Conversion Rate: 0.08014771424933147


In [100]:
data = dict(
    number=[117795,20146,9441],
    stage=['view','basket','purchase'],
    rate=[1,0.17102593488687975,0.08014771424933147]
)
fig = px.funnel(data, x='rate', y='stage', title='Line AD Funnel (광고로 본 goodscode != 구매한 goodscode)')
fig.update_traces(texttemplate="%{value:,.2%}")
fig.show()

## ad_line 2 (광고했던 상품만 target 한 funnel)

In [101]:
ad_line_funnel_adpurchase_counts = {'view': 0, 'basket': 0, 'purchase': 0}

# 퍼널을 계산
for idx, row in ad_funnel_line.iterrows():
    if row['ActionType'] == 'ad_click':
        ad_line_funnel_adpurchase_counts['view'] += 1
    if row['ActionType'] == 'view' and row['GoodsCode'] == row['GoodsCode_ad']:
        ad_line_funnel_adpurchase_counts['view'] += 1
    elif row['ActionType'] == 'basket' and row['GoodsCode'] == row['GoodsCode_ad']:
        ad_line_funnel_adpurchase_counts['basket'] += 1
        ad_line_funnel_adpurchase_counts['view'] += 1  
    elif row['ActionType'] == 'purchase' and row['GoodsCode'] == row['GoodsCode_ad']:
        ad_line_funnel_adpurchase_counts['purchase'] += 1
        ad_line_funnel_adpurchase_counts['view'] += 1
        ad_line_funnel_adpurchase_counts['basket'] += 1

print("퍼널 카운트:", ad_line_funnel_adpurchase_counts)

퍼널 카운트: {'view': 74622, 'basket': 7499, 'purchase': 2534}


In [103]:
view_count_ad_line2 = 74622
basket_count_ad_line2 = 7499
purchase_count_ad_line2 = 2534


# 각 단계에서 다음 단계로의 전환율을 계산합니다.
basket_to_purchase_rate_ad_line2 = purchase_count_ad_line2 / basket_count_ad_line2
view_to_basket_rate_ad_line2 = basket_count_ad_line2 / view_count_ad_line2
view_to_purchase_rate_ad_line2 = purchase_count_ad_line2 / view_count_ad_line2

# 결과를 출력합니다.
print("View Count:", view_count_ad_line2)
print("Basket Count:", basket_count_ad_line2)
print("Purchase Count:", purchase_count_ad_line2)
print("View to Basket Conversion Rate:", view_to_basket_rate_ad_line2)
print("Basket to Purchase Conversion Rate:", basket_to_purchase_rate_ad_line2)
print("View to Purchase Conversion Rate:",view_to_purchase_rate_ad_line2)


View Count: 74622
Basket Count: 7499
Purchase Count: 2534
View to Basket Conversion Rate: 0.10049315215352041
Basket to Purchase Conversion Rate: 0.337911721562875
View to Purchase Conversion Rate: 0.03395781404947602


In [104]:
data = dict(
    number=[74622,7499,2534],
    stage=['view','basket','purchase'],
    rate=[1,0.10049315215352041,0.03395781404947602]
)
fig = px.funnel(data, x='rate', y='stage', title='Line AD Funnel (광고로 본 goodscode == 구매한 goodscode)')
fig.update_traces(texttemplate="%{value:,.2%}")
fig.show()

# 카테고리 퍼널

In [107]:
customer_cluster = pd.read_parquet('/Users/hj/Documents/Four-A/2nd dataset/khj/data/customer_cluster_info (1).parquet')
customer_cluster

Unnamed: 0,CustomerID,cluster_plus
0,6,4
3,8,2
6,13,4
9,14,4
14,17,1
...,...,...
458758,899973,0
458759,899987,0
458761,899992,4
458764,899993,2


In [108]:
customer_cluster['cluster_plus'].value_counts()

cluster_plus
0      86124
4      42885
2      30916
1      14553
vip     1317
3        653
Name: count, dtype: int64

In [109]:
new_file_2 = new_file.merge(customer_cluster, on='CustomerID', how='left')
new_file_2

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp,GoodsCode_ad,cluster_plus
0,4,view,1492,15020,,,2022-07-04 15:11:42.511967,,
1,4,view,1492,15020,,,2022-07-24 14:44:40.511967,,
2,6,view,1415,12980,,,2022-04-22 15:49:44.166991,,4
3,6,purchase,1437,35290,,,2022-04-26 21:46:00.166991,,4
4,6,view,1217,16220,,,2022-05-03 14:26:12.811783,,4
...,...,...,...,...,...,...,...,...,...
3096926,899999,view,1059,21230,,,2022-05-22 12:51:15.722389,,4
3096927,899999,basket,1059,21230,,,2022-05-23 01:28:11.722389,,4
3096928,899999,view,1059,21230,,,2022-05-24 04:40:26.722389,,4
3096929,899999,purchase,1034,12910,,,2022-06-20 03:40:43.722389,,4


In [110]:
customer_cluster[customer_cluster['CustomerID']==4]

Unnamed: 0,CustomerID,cluster_plus


In [111]:
log[log['CustomerID']==4]

Unnamed: 0,CustomerID,ActionType,GoodsCode,Price,AdID,CPC,Timestamp
38023,4,view,1492,15020,,,2022-07-24 14:44:40.511967
38024,4,share,1492,15020,,,2022-07-04 15:11:42.511967
