In [1]:
import pandas as pd
import json
from scipy import stats
import numpy as np

# 이벤트 기간에 참여한 유저

### 가입 전체 유저

In [2]:
read_path = '../csv/reward_pay_1.0_all_user.csv'
all_user = pd.read_csv(read_path)

all_user['uid'] = all_user['uid'].astype(str)

all_user

Unnamed: 0,join_date,uid
0,2019-12-17,10456692
1,2019-12-17,10456693
2,2019-12-17,10456694
3,2019-12-17,10456695
4,2019-12-17,10456696
...,...,...
16375,2019-12-19,10473808
16376,2019-12-19,10473809
16377,2019-12-19,10473810
16378,2019-12-19,10473811


In [4]:
all_user.groupby(['join_date'], as_index=False).agg({'uid': 'count'})

Unnamed: 0,join_date,uid
0,2019-12-17,5785
1,2019-12-18,5483
2,2019-12-19,5112


### 이벤트 대상 유저

In [5]:
read_path = '../csv/reward_pay_1.0_target_user.csv'
target_user = pd.read_csv(read_path)

target_user = target_user[(target_user['memo'] == 'group_1') | (target_user['memo'] == 'group_2') |
                         (target_user['memo'] == 'group_3') | (target_user['memo'] == 'group_4')] 

target_user = target_user.rename(columns={'target_uid': 'uid', 'memo': 'test_group'})

target_user = target_user[['event_date', 'uid', 'test_group']]

target_user['uid'] = target_user['uid'].astype(str)

target_user

Unnamed: 0,event_date,uid,test_group
0,2019-12-18,10456697,group_2
1,2019-12-18,10456699,group_4
2,2019-12-18,10456707,group_4
3,2019-12-18,10456712,group_1
4,2019-12-18,10456713,group_2
...,...,...,...
3943,2019-12-20,10473793,group_2
3944,2019-12-20,10473798,group_3
3945,2019-12-20,10473802,group_3
3946,2019-12-20,10473803,group_4


### 전체 가입 유저와 이벤트 대상 유저 merge

In [12]:
user = pd.merge(all_user, target_user, on='uid', how='left')

def group_reward(row):
    if row['test_group'] == 'group_1':
        return '1000'
    elif row['test_group'] == 'group_2':
        return '3000'
    elif row['test_group'] == 'group_3':
        return '5000'
    elif row['test_group'] == 'group_4':
        return '7000'
    else:
        return '0'

user['reward'] = user.apply(group_reward, axis=1)
    
user

Unnamed: 0,join_date,uid,event_date,test_group,reward
0,2019-12-17,10456692,,,0
1,2019-12-17,10456693,,,0
2,2019-12-17,10456694,,,0
3,2019-12-17,10456695,,,0
4,2019-12-17,10456696,,,0
...,...,...,...,...,...
16375,2019-12-19,10473808,,,0
16376,2019-12-19,10473809,,,0
16377,2019-12-19,10473810,2019-12-20,group_3,5000
16378,2019-12-19,10473811,,,0


# 이벤트 기간에 결제된 내역

### 이벤트 결제 내역

In [22]:
read_path = '../csv/reward_pay_1.0_pay.csv'
pay = pd.read_csv(read_path)

pay['uid'] = pay['uid'].astype(str)

pay

Unnamed: 0,uid,paid_at,pay_type,status
0,4752408,2019-12-18 12:33:00,transfer,transfer_completed
1,6865289,2019-12-18 12:49:00,transfer,transfer_completed
2,10459160,2019-12-18 11:45:00,transfer,transfer_completed
3,5694740,2019-12-18 15:46:00,transfer,transfer_completed
4,10092496,2019-12-18 13:01:00,transfer,transfer_completed
...,...,...,...,...
26552,9658273,2020-01-03 23:09:50,pay,ship_ready
26553,10569707,2020-01-03 23:18:33,pay,ship_ready
26554,5623370,2020-01-03 23:41:29,pay,ship_ready
26555,3221775,2020-01-03 23:43:11,pay,ship_ready


### 결제 내역과 이벤트 전체 유저 merge

In [26]:
pay_user = pd.merge(pay, user, on='uid')

pay_user

Unnamed: 0,uid,paid_at,pay_type,status,join_date,event_date,test_group,reward
0,10459160,2019-12-18 11:45:00,transfer,transfer_completed,2019-12-17,,,0
1,10459160,2019-12-24 12:54:00,transfer,transfer_completed,2019-12-17,,,0
2,10459160,2019-12-26 17:30:01,transfer,transfer_completed,2019-12-17,,,0
3,10459160,2019-12-30 09:31:00,transfer,transfer_completed,2019-12-17,,,0
4,10459160,2019-12-30 09:31:00,transfer,transfer_completed,2019-12-17,,,0
...,...,...,...,...,...,...,...,...
428,10471204,2019-12-30 23:48:20,pay,purchase_confirm,2019-12-19,2019-12-20,group_1,1000
429,10470940,2019-12-31 11:21:01,pay,purchase_confirm,2019-12-19,,,0
430,10464174,2019-12-18 10:48:31,pay,ship_ready,2019-12-18,,,0
431,10459096,2019-12-19 14:53:02,pay,ship_ready,2019-12-17,,,0


In [31]:
pay_user_reward = pay_user.groupby(['reward', 'status'], as_index=False).agg({'paid_at': 'count', 'uid': pd.Series.nunique})

pay_user_reward

Unnamed: 0,reward,status,paid_at,uid
0,0,delivery_completed,6,5
1,0,in_transit,3,3
2,0,payment_received,3,3
3,0,purchase_confirm,138,101
4,0,ship_ready,2,2
5,0,transfer_completed,115,94
6,1000,in_transit,2,1
7,1000,payment_received,1,1
8,1000,purchase_confirm,10,8
9,1000,transfer_completed,30,25


### 이벤트 결제 실패 내역

In [25]:
read_path = '../csv/reward_pay_1.0_pay_fail.csv'
pay_fail = pd.read_csv(read_path)

pay_fail['uid'] = pay_fail['uid'].astype(str)

pay_fail

Unnamed: 0,uid,paid_at,pay_type,status
0,3457670,2019-12-18 01:33:00,transfer,refunded
1,8688764,2019-12-18 00:40:00,transfer,waiting_bank_account_for_refund
2,102911,2019-12-18 00:14:00,transfer,refunded
3,10455756,2019-12-18 00:19:00,transfer,refunded
4,10150770,2019-12-18 00:41:00,transfer,refunded
...,...,...,...,...
9550,6877063,2020-01-03 23:13:54,pay,refunded
9551,6395957,2020-01-03 23:24:02,pay,refunded
9552,8172576,2020-01-03 23:34:59,pay,refunded
9553,6270760,2020-01-03 23:36:45,pay,refunded


In [28]:
pay_fail_user = pd.merge(pay_fail, user, on='uid')

pay_fail_user

Unnamed: 0,uid,paid_at,pay_type,status,join_date,event_date,test_group,reward
0,10463121,2019-12-18 02:02:00,transfer,refunded,2019-12-18,,,0
1,10463545,2019-12-18 09:52:00,transfer,refunded,2019-12-18,2019-12-19,group_2,3000
2,10457964,2019-12-18 09:57:00,transfer,refunded,2019-12-17,,,0
3,10463007,2019-12-18 12:00:00,transfer,refunded,2019-12-18,2019-12-19,group_4,7000
4,10463916,2019-12-18 12:39:01,transfer,refunded,2019-12-18,2019-12-19,group_1,1000
...,...,...,...,...,...,...,...,...
161,10466858,2019-12-30 12:43:30,pay,refunded,2019-12-18,,,0
162,10462661,2020-01-02 14:21:00,pay,refunded,2019-12-17,,,0
163,10470679,2020-01-02 16:48:10,pay,refunded,2019-12-19,,,0
164,10458610,2020-01-03 14:30:48,pay,refunded,2019-12-17,,,0


In [30]:
pay_fail_user_reward = pay_fail_user.groupby(['reward', 'status'], as_index=False).agg({'paid_at': 'count', 'uid': pd.Series.nunique})

pay_fail_user_reward

Unnamed: 0,reward,status,paid_at,uid
0,0,refunded,93,77
1,0,waiting_bank_account_for_refund,1,1
2,0,waiting_extra_info,1,1
3,1000,refunded,17,14
4,3000,refunded,22,15
5,3000,waiting_bank_account_for_refund,1,1
6,5000,refunded,13,12
7,7000,refunded,17,11
8,7000,return_before_shipping,1,1


# 이벤트 기간에 생성된 번프

In [9]:
read_path = '../csv/reward_pay_1.0_bunp.csv'
bunp = pd.read_csv(read_path)

bunp['buyer_uid'] = bunp['buyer_uid'].astype(str)

bunp

Unnamed: 0,channel_id,seller_uid,buyer_uid,seller_pid,created_at
0,174060916,10400414,9789553,113779233,2019-12-18 00:00:10
1,174049406,4019513,1967118,113720593,2019-12-18 00:01:38
2,174112346,7458727,1639610,105822185,2019-12-18 00:05:07
3,174111900,5523330,9786968,113802853,2019-12-18 00:05:16
4,169990639,665491,5565322,111984155,2019-12-18 00:05:28
...,...,...,...,...,...
262662,175828965,1215342,9954312,112420578,2019-12-31 11:46:00
262663,113029043,4191970,3157189,113966342,2020-01-01 07:06:11
262664,175774342,9245445,3020603,114384145,2019-12-30 09:46:49
262665,176082846,6944936,10116007,111536521,2020-01-01 09:24:56


In [34]:
bunp_user = pd.merge(bunp, user, left_on='buyer_uid', right_on='uid')

bunp_user

Unnamed: 0,channel_id,seller_uid,buyer_uid,seller_pid,created_at,join_date,uid,event_date,test_group,reward
0,174115446,10461193,10456710,113788472,2019-12-18 00:17:17,2019-12-17,10456710,,,0
1,174109595,5253974,10462604,113799351,2019-12-18 00:23:05,2019-12-17,10462604,,,0
2,174128332,1617761,10463193,113769136,2019-12-18 02:36:22,2019-12-18,10463193,2019-12-19,group_2,3000
3,174128332,1617761,10463193,113770439,2019-12-18 02:38:34,2019-12-18,10463193,2019-12-19,group_2,3000
4,174128332,1617761,10463193,112244668,2019-12-18 02:40:46,2019-12-18,10463193,2019-12-19,group_2,3000
...,...,...,...,...,...,...,...,...,...,...
1597,175942910,3736164,10460180,104508124,2019-12-31 02:12:38,2019-12-17,10460180,2019-12-18,group_1,1000
1598,175840498,3150865,10457581,81372301,2019-12-31 13:52:38,2019-12-17,10457581,,,0
1599,175944496,549877,10473019,114388772,2019-12-31 18:07:24,2019-12-19,10473019,2019-12-20,group_4,7000
1600,176035290,5621881,10469663,98875726,2019-12-31 21:18:53,2019-12-19,10469663,2019-12-20,group_4,7000


In [37]:
bunp_user_reward = bunp_user.groupby(['reward'], as_index=False).agg({'created_at': 'count', 'uid': pd.Series.nunique})

bunp_user_reward

Unnamed: 0,reward,created_at,uid
0,0,883,552
1,1000,173,113
2,3000,204,106
3,5000,190,113
4,7000,152,91
