In [1]:
import pandas as pd
import json
from scipy import stats
import numpy as np

# 이벤트 기간에 가입한 유저

### 가입 전체 유저

In [2]:
read_path = '../csv/reward_register_2.0_all_user.csv'
all_user = pd.read_csv(read_path)

all_user['uid'] = all_user['uid'].astype(str)

all_user.tail()

Unnamed: 0,join_date,uid
16678,2019-12-08,10410217
16679,2019-12-08,10410218
16680,2019-12-08,10410219
16681,2019-12-08,10410220
16682,2019-12-08,10410221


### 이벤트 대상 유저

In [6]:
read_path = '../csv/reward_register_2.0_target_user.csv'
target_user = pd.read_csv(read_path)

target_user = target_user[(target_user['memo'] == 'group_1') | (target_user['memo'] == 'group_2') |
                         (target_user['memo'] == 'group_3') | (target_user['memo'] == 'group_4')] 

target_user = target_user.rename(columns={'target_uid': 'uid', 'memo': 'test_group'})

target_user = target_user[['event_date', 'uid', 'test_group']]

target_user['uid'] = target_user['uid'].astype(str)

target_user

Unnamed: 0,event_date,uid,test_group
0,2019-12-05,10382239,group_4
1,2019-12-05,10382245,group_2
2,2019-12-05,10382247,group_4
3,2019-12-05,10382251,group_4
4,2019-12-05,10382257,group_2
...,...,...,...
4471,2019-12-09,10410202,group_3
4472,2019-12-09,10410212,group_1
4473,2019-12-09,10410213,group_2
4474,2019-12-09,10410216,group_1


### 전체 가입 유저와 이벤트 대상 유저 merge

In [7]:
user = pd.merge(all_user, target_user, on='uid', how='left')

def group_reward(row):
    if row['test_group'] == 'group_1':
        return '1000'
    elif row['test_group'] == 'group_2':
        return '2000'
    elif row['test_group'] == 'group_3':
        return '3000'
    elif row['test_group'] == 'group_4':
        return '4000'
    else:
        return '0'

user['reward'] = user.apply(group_reward, axis=1)
    
user

Unnamed: 0,join_date,uid,event_date,test_group,reward
0,2019-12-04,10382234,,,0
1,2019-12-04,10382235,,,0
2,2019-12-04,10382236,,,0
3,2019-12-04,10382237,,,0
4,2019-12-04,10382238,,,0
...,...,...,...,...,...
16678,2019-12-08,10410217,,,0
16679,2019-12-08,10410218,,,0
16680,2019-12-08,10410219,,,0
16681,2019-12-08,10410220,2019-12-09,group_1,1000


# 이벤트 기간에 등록된 상품

In [8]:
read_path = '../csv/reward_register_2.0_product.csv'
product_raw = pd.read_csv(read_path)

product_raw['uid'] = product_raw['uid'].astype(str)
product_raw['pid'] = product_raw['pid'].astype(str)

product_raw

Unnamed: 0,uid,create_date,pid
0,10382262,2019-12-14 13:53:17.251000,113585607
1,10382262,2019-12-14 14:04:49.558000,113586197
2,10382262,2019-12-13 00:00:00.000000,113518443
3,10382262,2019-12-14 14:01:10.047000,113586030
4,10382285,2019-12-09 22:41:52.585921,113311269
...,...,...,...
6241,10410220,2019-12-10 00:09:51.156461,113317193
6242,10410220,2019-12-10 19:34:13.737182,113361807
6243,10410220,2019-12-16 06:42:27.371000,113685425
6244,10410220,2019-12-10 21:12:04.522563,113368507


In [9]:
# 등록 상품 pid
pid = product_raw.groupby(['pid'], as_index=False).agg({'uid': 'count'})

pid.drop(['uid'], axis=1, inplace=True)

pid

Unnamed: 0,pid
0,113023069
1,113023120
2,113023134
3,113023192
4,113023217
...,...
6241,113737938
6242,113737955
6243,113739024
6244,113739069


### 유저별 등록한 상품 수

In [10]:
product_uid = product_raw.groupby(['uid'], as_index=False).agg({'pid': 'count'})

product_uid = product_uid.rename(columns={'pid': 'products'})

product_uid

Unnamed: 0,uid,products
0,10382262,4
1,10382285,1
2,10382331,16
3,10382347,2
4,10382363,1
...,...,...
1487,10410140,7
1488,10410153,1
1489,10410165,4
1490,10410182,1


### merge user with product

In [11]:
user_product = pd.merge(user, product_uid, on='uid', how='left')

user_product

Unnamed: 0,join_date,uid,event_date,test_group,reward,products
0,2019-12-04,10382234,,,0,
1,2019-12-04,10382235,,,0,
2,2019-12-04,10382236,,,0,
3,2019-12-04,10382237,,,0,
4,2019-12-04,10382238,,,0,
...,...,...,...,...,...,...
16678,2019-12-08,10410217,,,0,
16679,2019-12-08,10410218,,,0,
16680,2019-12-08,10410219,,,0,
16681,2019-12-08,10410220,2019-12-09,group_1,1000,6.0


# 유저 그룹별 상품 등록 수

In [13]:
product_group = user_product.groupby(['join_date', 
                                      'reward'], as_index=False).agg({'uid':'count', 
                                                                      'products':['count', 'sum']})

product_group

Unnamed: 0_level_0,join_date,reward,uid,products,products
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,count,sum
0,2019-12-04,0,4117,197,970.0
1,2019-12-04,1000,352,39,141.0
2,2019-12-04,3000,378,30,145.0
3,2019-12-04,5000,351,35,197.0
4,2019-12-04,7000,326,25,99.0
5,2019-12-05,0,4026,366,1828.0
6,2019-12-05,1000,364,68,192.0
7,2019-12-05,3000,338,55,225.0
8,2019-12-05,5000,373,65,219.0
9,2019-12-05,7000,328,56,217.0
