In [164]:
import pandas as pd
import json
from scipy import stats
import numpy as np

# 이벤트 기간에 가입한 유저

### 전체 가입 유저

In [57]:
read_path = '../csv/reward_test_all_user.csv'
all_user = pd.read_csv(read_path)

all_user['uid'] = all_user['uid'].astype(str)

all_user.tail()

Unnamed: 0,join_date,uid
19949,2019-10-23,10117330
19950,2019-10-23,10117331
19951,2019-10-23,10117332
19952,2019-10-23,10117333
19953,2019-10-23,10117334


### 이벤트 대상 유저

In [5]:
read_path = '../csv/reward_test_target_user.csv'
target_user = pd.read_csv(read_path)

target_user = target_user[target_user['전송 실패'] != '전송 실패'] # 번개톡 전송 실패 제외

target_user = target_user.rename(columns={'date': 'event_date'})

target_user = target_user[['event_date', 'uid', 'test_group']]

target_user['uid'] = target_user['uid'].astype(str)

target_user.tail()

Unnamed: 0,event_date,uid,test_group
5337,2019-10-24,10117321,group_1
5338,2019-10-24,10117322,group_2
5339,2019-10-24,10117329,group_1
5340,2019-10-24,10117330,group_2
5341,2019-10-24,10117332,group_0


### 전체 가입 유저와 이벤트 대상 유저 merge

In [6]:
user = pd.merge(all_user, target_user, on='uid', how='left')

def group_reward(row):
    if row['test_group'] == 'group_0':
        return '2000'
    elif row['test_group'] == 'group_1':
        return '4000'
    elif row['test_group'] == 'group_2':
        return '6000'
    elif row['test_group'] == 'group_3':
        return '8000'
    else:
        return '0'

user['reward'] = user.apply(group_reward, axis=1)
    
user

Unnamed: 0,join_date,uid,event_date,test_group,reward
0,2019-10-21,10096134,,,0
1,2019-10-21,10096135,2019-10-22,group_3,8000
2,2019-10-21,10096136,2019-10-22,group_0,2000
3,2019-10-21,10096137,,,0
4,2019-10-21,10096138,2019-10-22,group_2,6000
...,...,...,...,...,...
19949,2019-10-23,10117330,2019-10-24,group_2,6000
19950,2019-10-23,10117331,,,0
19951,2019-10-23,10117332,2019-10-24,group_0,2000
19952,2019-10-23,10117333,,,0


# 이벤트 기간에 등록된 상품

In [10]:
read_path = '../csv/reward_test_product.csv'
product_raw = pd.read_csv(read_path)

product_raw['uid'] = product_raw['uid'].astype(str)
product_raw['pid'] = product_raw['pid'].astype(str)

product_raw

Unnamed: 0,uid,create_date,pid
0,10096140,2019-10-26 19:24:00.854525,110631153
1,10096192,2019-10-22 01:10:52.643629,110365794
2,10096192,2019-10-22 11:20:37.584432,110376493
3,10096218,2019-10-25 18:56:43.725892,110577984
4,10096227,2019-10-23 18:10:04.888749,110460098
...,...,...,...
5314,10117269,2019-10-26 09:41:38.462283,110604546
5315,10117269,2019-10-24 00:05:36.885096,110482721
5316,10117269,2019-10-24 00:04:55.972227,110482683
5317,10117282,2019-10-23 23:58:46.609911,110482395


In [58]:
pid = product_raw.groupby(['pid'], as_index=False).agg({'uid': 'count'})

pid.drop(['uid'], axis=1, inplace=True)

pid

Unnamed: 0,pid
0,110362538
1,110362607
2,110362626
3,110362635
4,110362713
...,...
5314,110945257
5315,110945351
5316,110945447
5317,110945461


In [14]:
#유저별 등록한 상품 수
product_uid = product_raw.groupby(['uid'], as_index=False).agg({'pid': 'count'})

product_uid = product_uid.rename(columns={'pid': 'products'})

product_uid

Unnamed: 0,uid,products
0,10096140,1
1,10096192,2
2,10096218,1
3,10096227,2
4,10096228,2
...,...,...
1496,10117243,1
1497,10117260,1
1498,10117269,3
1499,10117282,1


### merge user with product

In [15]:
user_product = pd.merge(user, product_uid, on='uid', how='left')

user_product

Unnamed: 0,join_date,uid,event_date,test_group,reward,products
0,2019-10-21,10096134,,,0,
1,2019-10-21,10096135,2019-10-22,group_3,8000,
2,2019-10-21,10096136,2019-10-22,group_0,2000,
3,2019-10-21,10096137,,,0,
4,2019-10-21,10096138,2019-10-22,group_2,6000,
...,...,...,...,...,...,...
19949,2019-10-23,10117330,2019-10-24,group_2,6000,
19950,2019-10-23,10117331,,,0,
19951,2019-10-23,10117332,2019-10-24,group_0,2000,
19952,2019-10-23,10117333,,,0,


### 유저 그룹별 상품 등록 수

In [20]:
product_group = user_product.groupby(['join_date', 
                                      'reward'], as_index=False).agg({'uid':'count', 
                                                                      'products':['count', 'sum']})

product_group

Unnamed: 0_level_0,join_date,reward,uid,products,products
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,count,sum
0,2019-10-21,0,5059,195,767.0
1,2019-10-21,2000,453,38,107.0
2,2019-10-21,4000,476,36,250.0
3,2019-10-21,6000,444,32,124.0
4,2019-10-21,8000,514,34,172.0
5,2019-10-22,0,4739,315,1057.0
6,2019-10-22,2000,413,48,235.0
7,2019-10-22,4000,413,72,339.0
8,2019-10-22,6000,437,58,250.0
9,2019-10-22,8000,454,71,171.0


# 이벤트 기간의 채팅

In [None]:
# reward_test_chat.csv 산출 쿼리
select distinct extras, channel_id
from tb_message
where typecode= 12 and
    update_time >= '2019-10-22 00:00:00.0' AND update_time < '2019-11-01 00:00:00.0'

In [21]:
read_path = '../csv/reward_test_chat.csv'
chat_raw = pd.read_csv(read_path)

chat_raw.tail()

Unnamed: 0,extras,channel_id
1179219,"{""pid"":105058524,""thumbnailUrl"":""https:\/\/seo...",154892771
1179220,"{\n ""uid"" : ""3196841"",\n ""price"" : ""250,000 ...",167407487
1179221,"{""pid"":97692564,""thumbnailUrl"":""https:\/\/seou...",167410570
1179222,"{\n ""ref"" : ""검색결과"",\n ""uid"" : ""149715"",\n ""...",167410572
1179223,"{\n ""ref"" : ""검색결과"",\n ""uid"" : ""10168863"",\n ...",167410581


In [62]:
def extras_to_pid(row):
    return json.loads(row['extras']).get('pid')
        
chat_raw['pid'] = chat_raw.apply(extras_to_pid, axis=1)

chat['pid'] = chat['pid'].astype(str)

# pid별 채팅 수 산출
chat = chat_raw.groupby(['pid'], as_index=False).agg({'channel_id': pd.Series.nunique})

chat = chat.rename(columns={'channel_id': 'chats'})

chat.tail()

Unnamed: 0,pid,chats
694525,99998799,1
694526,99999032,1
694527,99999109,1
694528,99999539,2
694529,99999845,1


### merge pid with chat

In [63]:
pid_chat = pd.merge(pid, chat, on='pid', how='left')

pid_chat

Unnamed: 0,pid,chats
0,110362538,3.0
1,110362607,
2,110362626,
3,110362635,
4,110362713,
...,...,...
5314,110945257,
5315,110945351,
5316,110945447,
5317,110945461,


# 이벤트 기간의 번프

In [36]:
read_path = '../csv/reward_test_bunp.csv'
bunp_raw = pd.read_csv(read_path)

bunp_raw['seller_pid'] = bunp_raw['seller_pid'].fillna(0)

bunp_raw['seller_pid'] = bunp_raw['seller_pid'].astype(int).astype(str)

bunp_raw.tail()

Unnamed: 0,channel_id,seller_uid,buyer_uid,seller_pid,created_at
171631,167396305,4331758,7761492,105399623,2019-10-31 23:59:29
171632,167346340,8183196,10160952,109173387,2019-10-31 23:59:37
171633,165219214,1358510,7242952,80910784,2019-10-31 23:59:39
171634,167410296,3566971,3773382,105578884,2019-10-31 23:59:45
171635,167264000,4874182,4831491,110632324,2019-10-31 23:59:49


In [39]:
bunp = bunp_raw.groupby(['seller_pid'], as_index=False).agg({'channel_id': pd.Series.nunique})

bunp = bunp.rename(columns={'channel_id': 'bunps', 'seller_pid': 'pid'})

bunp

Unnamed: 0,pid,bunps
0,0,30
1,100000180,1
2,100001266,1
3,100001311,1
4,100002703,1
...,...,...
145211,99997293,1
145212,99997333,1
145213,99998647,1
145214,99998714,1


### merge pid with bunp

In [65]:
pid_chat_bunp = pd.merge(pid_chat, bunp, on='pid', how='left')

pid_chat_bunp

Unnamed: 0,pid,chats,bunps
0,110362538,3.0,
1,110362607,,
2,110362626,,
3,110362635,,
4,110362713,,
...,...,...,...
5314,110945257,,
5315,110945351,,
5316,110945447,,
5317,110945461,,


In [51]:
product_chat_bunp['bunps'].max()

11.0

# merge user, product, chat, bunp

In [66]:
product_chat_bunp = pd.merge(product_raw, pid_chat_bunp, on='pid', how='left')

product_chat_bunp

Unnamed: 0,uid,create_date,pid,chats,bunps
0,10096140,2019-10-26 19:24:00.854525,110631153,,
1,10096192,2019-10-22 01:10:52.643629,110365794,2.0,1.0
2,10096192,2019-10-22 11:20:37.584432,110376493,,
3,10096218,2019-10-25 18:56:43.725892,110577984,,
4,10096227,2019-10-23 18:10:04.888749,110460098,3.0,1.0
...,...,...,...,...,...
5314,10117269,2019-10-26 09:41:38.462283,110604546,,
5315,10117269,2019-10-24 00:05:36.885096,110482721,1.0,
5316,10117269,2019-10-24 00:04:55.972227,110482683,,
5317,10117282,2019-10-23 23:58:46.609911,110482395,,


In [67]:
#유저별 등록한 상품 수, 채팅, 번프
product_chat_bunp_uid = product_chat_bunp.groupby(['uid'], as_index=False).agg({'pid': 'count', 'chats': 'sum', 'bunps': 'sum'})

product_chat_bunp_uid = product_chat_bunp_uid.rename(columns={'pid': 'products'})

product_chat_bunp_uid

Unnamed: 0,uid,products,chats,bunps
0,10096140,1,0.0,0.0
1,10096192,2,2.0,1.0
2,10096218,1,0.0,0.0
3,10096227,2,3.0,1.0
4,10096228,2,0.0,0.0
...,...,...,...,...
1496,10117243,1,0.0,0.0
1497,10117260,1,1.0,0.0
1498,10117269,3,1.0,0.0
1499,10117282,1,0.0,0.0


In [168]:
product_chat_bunp_uid['chats'] = product_chat_bunp_uid['chats'].replace({0: np.nan})

product_chat_bunp_uid['bunps'] = product_chat_bunp_uid['bunps'].replace({0: np.nan})

In [167]:
product_chat_bunp_uid['chats'].count()

577

In [169]:
user_product = pd.merge(user, product_chat_bunp_uid, on='uid', how='left')

user_product

Unnamed: 0,join_date,uid,event_date,test_group,reward,products,chats,bunps
0,2019-10-21,10096134,,,0,,,
1,2019-10-21,10096135,2019-10-22,group_3,8000,,,
2,2019-10-21,10096136,2019-10-22,group_0,2000,,,
3,2019-10-21,10096137,,,0,,,
4,2019-10-21,10096138,2019-10-22,group_2,6000,,,
...,...,...,...,...,...,...,...,...
19949,2019-10-23,10117330,2019-10-24,group_2,6000,,,
19950,2019-10-23,10117331,,,0,,,
19951,2019-10-23,10117332,2019-10-24,group_0,2000,,,
19952,2019-10-23,10117333,,,0,,,


In [170]:
product_group = user_product.groupby(['join_date', 
                                      'reward'], as_index=False).agg({'uid':'count', 
                                                                      'products':['count', 'sum'],
                                                                     'chats': ['count', 'sum'],
                                                                     'bunps': ['count', 'sum']})

product_group

Unnamed: 0_level_0,join_date,reward,uid,products,products,chats,chats,bunps,bunps
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,count,sum,count,sum,count,sum
0,2019-10-21,0,5059,195,767.0,97,381.0,30,54.0
1,2019-10-21,2000,453,38,107.0,11,25.0,2,2.0
2,2019-10-21,4000,476,36,250.0,15,51.0,7,10.0
3,2019-10-21,6000,444,32,124.0,10,62.0,3,3.0
4,2019-10-21,8000,514,34,172.0,14,53.0,7,18.0
5,2019-10-22,0,4739,315,1057.0,113,442.0,61,134.0
6,2019-10-22,2000,413,48,235.0,14,60.0,8,24.0
7,2019-10-22,4000,413,72,339.0,35,227.0,19,43.0
8,2019-10-22,6000,437,58,250.0,26,99.0,15,28.0
9,2019-10-22,8000,454,71,171.0,28,87.0,6,6.0


In [172]:
def event_or_not(row):
    if pd.isnull(row['test_group']):
        return  'no_test'
    else:
        return 'test_user'

user_product['test_user'] = user_product.apply(event_or_not, axis=1)

user_product

Unnamed: 0,join_date,uid,event_date,test_group,reward,products,chats,bunps,test_user
0,2019-10-21,10096134,,,0,,,,no_test
1,2019-10-21,10096135,2019-10-22,group_3,8000,,,,test_user
2,2019-10-21,10096136,2019-10-22,group_0,2000,,,,test_user
3,2019-10-21,10096137,,,0,,,,no_test
4,2019-10-21,10096138,2019-10-22,group_2,6000,,,,test_user
...,...,...,...,...,...,...,...,...,...
19949,2019-10-23,10117330,2019-10-24,group_2,6000,,,,test_user
19950,2019-10-23,10117331,,,0,,,,no_test
19951,2019-10-23,10117332,2019-10-24,group_0,2000,,,,test_user
19952,2019-10-23,10117333,,,0,,,,no_test


### 이벤트 대상과 비대상의 상품 등록 비교

In [173]:
event_user = user_product[user_product['test_user'] == 'test_user']

event_user = event_user.fillna(0)

In [174]:
event_no_user = user_product[user_product['test_user'] == 'no_test']

event_no_user = event_no_user.fillna(0)

In [175]:
stats.ttest_ind(event_user['products'], event_no_user['products'], equal_var=False)

Ttest_indResult(statistic=6.003156403804484, pvalue=2.0451447862655015e-09)

In [110]:
event_user['products'].mean()

0.46876758581879574

In [111]:
event_no_user['products'].mean()

0.1928468850441086

### 이벤트 그룹의 상품 등록 비교

In [176]:
group_0 = user_product[user_product['test_group'] == 'group_0']

group_0 = group_0.fillna(0)

In [177]:
group_1 = user_product[user_product['test_group'] == 'group_1']

group_1 = group_1.fillna(0)

In [178]:
group_2 = user_product[user_product['test_group'] == 'group_2']

group_2 = group_2.fillna(0)

In [179]:
group_3 = user_product[user_product['test_group'] == 'group_3']

group_3 = group_3.fillna(0)

In [180]:
stats.f_oneway(group_0['products'], group_1['products'], group_2['products'], group_3['products'])

F_onewayResult(statistic=2.273534390106155, pvalue=0.07796921089615652)

### 이벤트 대상과 비대상 상품의 채팅, 번프 비교

In [192]:
event_user_product = event_user[event_user['products'] > 0]

event_no_user_product = event_no_user[event_no_user['products'] > 0]

In [197]:
def product_to_chat(row):
    return row['chats'] / row['products']

event_user_product['chat_ratio'] = event_user_product.apply(product_to_chat, axis=1)

event_no_user_product['chat_ratio'] = event_no_user_product.apply(product_to_chat, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [198]:
stats.ttest_ind(event_user_product['chat_ratio'], event_no_user_product['chat_ratio'], equal_var=False)

Ttest_indResult(statistic=0.460726039481917, pvalue=0.645072887711438)

In [187]:
stats.ttest_ind(event_user_product['bunps'], event_no_user_product['bunps'], equal_var=False)

Ttest_indResult(statistic=-0.6422235069860976, pvalue=0.5208266281022746)

In [199]:
event_user_product['chat_ratio'].mean()

0.5166724619545866

In [189]:
event_no_user_product['chats'].mean()

1.490011750881316

In [193]:
event_no_user_product

Unnamed: 0,join_date,uid,event_date,test_group,reward,products,chats,bunps,test_user
82,2019-10-21,10096218,0,0,0,1.0,0.0,0.0,no_test
90,2019-10-21,10096228,0,0,0,2.0,0.0,0.0,no_test
102,2019-10-21,10096240,0,0,0,1.0,0.0,0.0,no_test
121,2019-10-21,10096260,0,0,0,2.0,3.0,0.0,no_test
186,2019-10-21,10096325,0,0,0,5.0,1.0,0.0,no_test
...,...,...,...,...,...,...,...,...,...
19824,2019-10-23,10117199,0,0,0,1.0,0.0,0.0,no_test
19863,2019-10-23,10117238,0,0,0,4.0,3.0,2.0,no_test
19867,2019-10-23,10117243,0,0,0,1.0,0.0,0.0,no_test
19883,2019-10-23,10117260,0,0,0,1.0,1.0,0.0,no_test
