# Analysing Results

## Tasks

### Task 1

Load and explore ab-test data.

In [1]:
import pandas as pd
import numpy as np

In [2]:
df_likes = pd.read_csv('likes.csv')

In [3]:
df_likes

Unnamed: 0,user_id,post_id,timestamp
0,128381,4704,1654030804
1,146885,1399,1654030816
2,50948,2315,1654030828
3,14661,673,1654030831
4,37703,1588,1654030833
...,...,...,...
230171,31851,5964,1655243535
230172,51512,1498,1655243537
230173,34017,5009,1655243573
230174,13267,1787,1655243692


In [4]:
df_likes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 230176 entries, 0 to 230175
Data columns (total 3 columns):
 #   Column     Non-Null Count   Dtype
---  ------     --------------   -----
 0   user_id    230176 non-null  int64
 1   post_id    230176 non-null  int64
 2   timestamp  230176 non-null  int64
dtypes: int64(3)
memory usage: 5.3 MB


In [5]:
df_views = pd.read_csv('views.csv')

In [6]:
df_views

Unnamed: 0,user_id,exp_group,recommendations,timestamp
0,128381,control,[3644 4529 4704 5294 4808],1654030803
1,146885,test,[1399 1076 797 7015 5942],1654030811
2,50948,test,[2315 3037 1861 6567 4093],1654030825
3,37703,test,[2842 1949 162 1588 6794],1654030826
4,14661,test,[2395 5881 5648 3417 673],1654030829
...,...,...,...,...
193290,158267,test,[1733 6834 4380 1915 1627],1655240340
193291,63527,control,[2454 191 3873 6404 1588],1655240347
193292,52169,test,[1368 1709 1616 798 5305],1655240354
193293,142402,test,[5895 6984 1978 6548 6106],1655240373


In [7]:
df_views.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193295 entries, 0 to 193294
Data columns (total 4 columns):
 #   Column           Non-Null Count   Dtype 
---  ------           --------------   ----- 
 0   user_id          193295 non-null  int64 
 1   exp_group        193295 non-null  object
 2   recommendations  193295 non-null  object
 3   timestamp        193295 non-null  int64 
dtypes: int64(2), object(2)
memory usage: 5.9+ MB


### Task 2

Check split quality. 

In [8]:
df_views.exp_group.value_counts()

test       96917
control    96378
Name: exp_group, dtype: int64

In [9]:
df_views.exp_group.value_counts(normalize=True)

test       0.501394
control    0.498606
Name: exp_group, dtype: float64

The group proportion is more or less equal. 

In [10]:
df_views.groupby('user_id', as_index=False) \
    .agg({'exp_group': 'nunique'}) \
    .sort_values('exp_group', ascending=False).head(10)

Unnamed: 0,user_id,exp_group
54475,142283,2
57065,148670,2
20633,55788,2
10071,25623,2
43341,114278,1
43345,114289,1
43344,114288,1
43343,114284,1
43342,114279,1
43339,114273,1


As we can see there are a few users that are both in control and test group. 

### Task 3

Calculate an overall conversion from view to post like (do not take into account split on control and test experiment groups).  

In [11]:
df_likes.head(1)

Unnamed: 0,user_id,post_id,timestamp
0,128381,4704,1654030804


In [12]:
df_views.head(1)

Unnamed: 0,user_id,exp_group,recommendations,timestamp
0,128381,control,[3644 4529 4704 5294 4808],1654030803


In [13]:
df = df_views.merge(df_likes, how='left', on='user_id')

In [14]:
df

Unnamed: 0,user_id,exp_group,recommendations,timestamp_x,post_id,timestamp_y
0,128381,control,[3644 4529 4704 5294 4808],1654030803,4704.0,1.654031e+09
1,128381,control,[3644 4529 4704 5294 4808],1654030803,5294.0,1.654031e+09
2,128381,control,[3644 4529 4704 5294 4808],1654030803,3608.0,1.655049e+09
3,128381,control,[3644 4529 4704 5294 4808],1654030803,2542.0,1.655049e+09
4,128381,control,[3644 4529 4704 5294 4808],1654030803,4165.0,1.655053e+09
...,...,...,...,...,...,...
1017168,52169,test,[1368 1709 1616 798 5305],1655240354,1709.0,1.655240e+09
1017169,142402,test,[5895 6984 1978 6548 6106],1655240373,6548.0,1.655240e+09
1017170,72259,control,[6117 1255 6567 3587 3811],1655240388,1712.0,1.654305e+09
1017171,72259,control,[6117 1255 6567 3587 3811],1655240388,5070.0,1.654309e+09


In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1017173 entries, 0 to 1017172
Data columns (total 6 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 0   user_id          1017173 non-null  int64  
 1   exp_group        1017173 non-null  object 
 2   recommendations  1017173 non-null  object 
 3   timestamp_x      1017173 non-null  int64  
 4   post_id          1006642 non-null  float64
 5   timestamp_y      1006642 non-null  float64
dtypes: float64(2), int64(2), object(2)
memory usage: 54.3+ MB


In [16]:
df[df.isna().any(axis=1)]

Unnamed: 0,user_id,exp_group,recommendations,timestamp_x,post_id,timestamp_y
70,106403,control,[7168 2828 1646 3604 5940],1654030891,,
87,69336,control,[5297 2492 1882 1458 1388],1654030893,,
402,18188,test,[1323 4521 2362 1834 1340],1654031245,,
465,131306,test,[7101 4454 1725 1896 2285],1654031303,,
520,39001,control,[6043 5850 7266 5525 1154],1654031396,,
...,...,...,...,...,...,...
1016552,100072,control,[4686 3529 3000 7124 922],1655239619,,
1016562,12873,control,[6139 1436 6659 3883 1135],1655239634,,
1016744,90174,control,[5505 1540 900 1378 6565],1655239851,,
1016837,22198,test,[1283 3952 3278 2512 5750],1655239958,,


In [17]:
df.fillna(value=0, inplace=True)

In [18]:
df

Unnamed: 0,user_id,exp_group,recommendations,timestamp_x,post_id,timestamp_y
0,128381,control,[3644 4529 4704 5294 4808],1654030803,4704.0,1.654031e+09
1,128381,control,[3644 4529 4704 5294 4808],1654030803,5294.0,1.654031e+09
2,128381,control,[3644 4529 4704 5294 4808],1654030803,3608.0,1.655049e+09
3,128381,control,[3644 4529 4704 5294 4808],1654030803,2542.0,1.655049e+09
4,128381,control,[3644 4529 4704 5294 4808],1654030803,4165.0,1.655053e+09
...,...,...,...,...,...,...
1017168,52169,test,[1368 1709 1616 798 5305],1655240354,1709.0,1.655240e+09
1017169,142402,test,[5895 6984 1978 6548 6106],1655240373,6548.0,1.655240e+09
1017170,72259,control,[6117 1255 6567 3587 3811],1655240388,1712.0,1.654305e+09
1017171,72259,control,[6117 1255 6567 3587 3811],1655240388,5070.0,1.654309e+09


In [19]:
df[df.isna().any(axis=1)]

Unnamed: 0,user_id,exp_group,recommendations,timestamp_x,post_id,timestamp_y


In [20]:
df['liked'] = np.where(df['post_id'] > 0, 1, 0)

In [21]:
df

Unnamed: 0,user_id,exp_group,recommendations,timestamp_x,post_id,timestamp_y,liked
0,128381,control,[3644 4529 4704 5294 4808],1654030803,4704.0,1.654031e+09,1
1,128381,control,[3644 4529 4704 5294 4808],1654030803,5294.0,1.654031e+09,1
2,128381,control,[3644 4529 4704 5294 4808],1654030803,3608.0,1.655049e+09,1
3,128381,control,[3644 4529 4704 5294 4808],1654030803,2542.0,1.655049e+09,1
4,128381,control,[3644 4529 4704 5294 4808],1654030803,4165.0,1.655053e+09,1
...,...,...,...,...,...,...,...
1017168,52169,test,[1368 1709 1616 798 5305],1655240354,1709.0,1.655240e+09,1
1017169,142402,test,[5895 6984 1978 6548 6106],1655240373,6548.0,1.655240e+09,1
1017170,72259,control,[6117 1255 6567 3587 3811],1655240388,1712.0,1.654305e+09,1
1017171,72259,control,[6117 1255 6567 3587 3811],1655240388,5070.0,1.654309e+09,1


In [22]:
df_liked = df.groupby(['user_id', 'exp_group'], as_index=False).agg({'liked': 'max'})

In [23]:
df_liked

Unnamed: 0,user_id,exp_group,liked
0,200,test,1
1,201,test,1
2,202,test,1
3,212,test,1
4,213,test,1
...,...,...,...
65012,168538,test,1
65013,168541,control,1
65014,168544,test,0
65015,168545,test,1


In [24]:
df_liked.liked.sum() / df_liked.shape[0]

0.8947659842810342

### Task 4

Is there a statistically significant difference for a metric `number of likes per user`?

In [25]:
df_likes_per_user = df.groupby(['user_id', 'exp_group'], as_index=False).agg({'liked': 'sum'})

In [26]:
df_likes_per_user

Unnamed: 0,user_id,exp_group,liked
0,200,test,1
1,201,test,18
2,202,test,2
3,212,test,12
4,213,test,35
...,...,...,...
65012,168538,test,15
65013,168541,control,4
65014,168544,test,0
65015,168545,test,20


In [27]:
df_likes_per_user_test = df_likes_per_user[df_likes_per_user['exp_group'] == 'test']

In [28]:
df_likes_per_user_control = df_likes_per_user[df_likes_per_user['exp_group'] == 'control']

In [29]:
from scipy.stats import mannwhitneyu

In [30]:
_, pval = mannwhitneyu(df_likes_per_user_control.liked, df_likes_per_user_test.liked)

In [31]:
alpha = 0.05

if pval < alpha:
    print(f'p-value = {pval:.4f} --> there is a statistically significant difference.')
else:
    print(f'p-value = {pval:.4f} --> there is NO statistically significant difference.')

p-value = 0.0183 --> there is a statistically significant difference.


### Task 5

Is there a statistically significant difference for a metric `convertion to likes`?

In [32]:
df_liked_test = df_liked[df_liked['exp_group'] == 'test']

In [33]:
df_liked_control = df_liked[df_liked['exp_group'] == 'control']

In [34]:
from scipy.stats import ttest_ind

In [35]:
_, pval = ttest_ind(df_liked_control.liked, df_liked_test.liked)

In [36]:
alpha = 0.05

if pval < alpha:
    print(f'p-value = {pval:.4f} --> there is a statistically significant difference.')
else:
    print(f'p-value = {pval:.4f} --> there is NO statistically significant difference.')

p-value = 0.0045 --> there is a statistically significant difference.


### Task 6

Calculate hitrate metric for our recommendations. 

In [37]:
df.head()

Unnamed: 0,user_id,exp_group,recommendations,timestamp_x,post_id,timestamp_y,liked
0,128381,control,[3644 4529 4704 5294 4808],1654030803,4704.0,1654031000.0,1
1,128381,control,[3644 4529 4704 5294 4808],1654030803,5294.0,1654031000.0,1
2,128381,control,[3644 4529 4704 5294 4808],1654030803,3608.0,1655049000.0,1
3,128381,control,[3644 4529 4704 5294 4808],1654030803,2542.0,1655049000.0,1
4,128381,control,[3644 4529 4704 5294 4808],1654030803,4165.0,1655053000.0,1


In [38]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1017173 entries, 0 to 1017172
Data columns (total 7 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 0   user_id          1017173 non-null  int64  
 1   exp_group        1017173 non-null  object 
 2   recommendations  1017173 non-null  object 
 3   timestamp_x      1017173 non-null  int64  
 4   post_id          1017173 non-null  float64
 5   timestamp_y      1017173 non-null  float64
 6   liked            1017173 non-null  int64  
dtypes: float64(2), int64(3), object(2)
memory usage: 62.1+ MB


In [39]:
hitrate_lst = []
for index, row in df.iterrows():
    recs = row['recommendations']
    recs_lst = [int(x) for x in recs.replace('[', '').replace(']', '').split()]
    if row['post_id'] in recs_lst:
        hitrate_lst.append(1)
    else:
        hitrate_lst.append(0)

In [40]:
df['hitrate'] = hitrate_lst

In [41]:
df.head()

Unnamed: 0,user_id,exp_group,recommendations,timestamp_x,post_id,timestamp_y,liked,hitrate
0,128381,control,[3644 4529 4704 5294 4808],1654030803,4704.0,1654031000.0,1,1
1,128381,control,[3644 4529 4704 5294 4808],1654030803,5294.0,1654031000.0,1,1
2,128381,control,[3644 4529 4704 5294 4808],1654030803,3608.0,1655049000.0,1,0
3,128381,control,[3644 4529 4704 5294 4808],1654030803,2542.0,1655049000.0,1,0
4,128381,control,[3644 4529 4704 5294 4808],1654030803,4165.0,1655053000.0,1,0


In [42]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1017173 entries, 0 to 1017172
Data columns (total 8 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 0   user_id          1017173 non-null  int64  
 1   exp_group        1017173 non-null  object 
 2   recommendations  1017173 non-null  object 
 3   timestamp_x      1017173 non-null  int64  
 4   post_id          1017173 non-null  float64
 5   timestamp_y      1017173 non-null  float64
 6   liked            1017173 non-null  int64  
 7   hitrate          1017173 non-null  int64  
dtypes: float64(2), int64(4), object(2)
memory usage: 69.8+ MB


In [43]:
df.hitrate.sum() / df.shape[0]

0.22713442059512

Let's filter cases when like was before recommendation. 

In [44]:
filtered_df = df[pd.to_datetime(df['timestamp_x'], unit='s') < pd.to_datetime(df['timestamp_y'], unit='s')]

In [45]:
filtered_df.head()

Unnamed: 0,user_id,exp_group,recommendations,timestamp_x,post_id,timestamp_y,liked,hitrate
0,128381,control,[3644 4529 4704 5294 4808],1654030803,4704.0,1654031000.0,1,1
1,128381,control,[3644 4529 4704 5294 4808],1654030803,5294.0,1654031000.0,1,1
2,128381,control,[3644 4529 4704 5294 4808],1654030803,3608.0,1655049000.0,1,0
3,128381,control,[3644 4529 4704 5294 4808],1654030803,2542.0,1655049000.0,1,0
4,128381,control,[3644 4529 4704 5294 4808],1654030803,4165.0,1655053000.0,1,0


In [46]:
filtered_df.hitrate.sum() / filtered_df.shape[0]

0.3733096102693453

Let's filter cases when like happend one hour later than the recommendation was given.

In [47]:
hour_filtered_df = filtered_df[(filtered_df['timestamp_y'] - filtered_df['timestamp_x']) <= 3600]

In [48]:
hour_filtered_df.head()

Unnamed: 0,user_id,exp_group,recommendations,timestamp_x,post_id,timestamp_y,liked,hitrate
0,128381,control,[3644 4529 4704 5294 4808],1654030803,4704.0,1654031000.0,1,1
1,128381,control,[3644 4529 4704 5294 4808],1654030803,5294.0,1654031000.0,1,1
7,146885,test,[1399 1076 797 7015 5942],1654030811,1399.0,1654031000.0,1,1
11,50948,test,[2315 3037 1861 6567 4093],1654030825,2315.0,1654031000.0,1,1
16,37703,test,[2842 1949 162 1588 6794],1654030826,1588.0,1654031000.0,1,1


In [49]:
hour_filtered_df.hitrate.sum() / hour_filtered_df.shape[0]

0.9899362629989936

### Task 7

Check the difference in hitrate before and after filtering. 

No filtering. 

In [50]:
hitrate_test = df[df['exp_group'] == 'test'].hitrate

In [51]:
hitrate_control = df[df['exp_group'] == 'control'].hitrate

In [52]:
hitrate_test.mean()

0.22673426063325078

In [53]:
hitrate_control.mean()

0.22755225080385852

In [54]:
_, pval = ttest_ind(hitrate_control, hitrate_test)

In [55]:
alpha = 0.05

if pval < alpha:
    print(f'p-value = {pval:.4f} --> there is a statistically significant difference.')
else:
    print(f'p-value = {pval:.4f} --> there is NO statistically significant difference.')

p-value = 0.3250 --> there is NO statistically significant difference.


Filtering cases when like was before recommendation. 

In [56]:
hitrate_test_filtered = filtered_df[filtered_df['exp_group'] == 'test'].hitrate

In [57]:
hitrate_control_filtered = filtered_df[filtered_df['exp_group'] == 'control'].hitrate

In [58]:
hitrate_test_filtered.mean()

0.37234200884756785

In [59]:
hitrate_control_filtered.mean()

0.3743217728311637

In [60]:
_, pval = ttest_ind(hitrate_control_filtered, hitrate_test_filtered)

In [61]:
alpha = 0.05

if pval < alpha:
    print(f'p-value = {pval:.4f} --> there is a statistically significant difference.')
else:
    print(f'p-value = {pval:.4f} --> there is NO statistically significant difference.')

p-value = 0.1078 --> there is NO statistically significant difference.


Filtering cases when like happend one hour later than the recommendation was given.

In [62]:
hitrate_test_hour = hour_filtered_df[hour_filtered_df['exp_group'] == 'test'].hitrate

In [63]:
hitrate_control_hour = hour_filtered_df[hour_filtered_df['exp_group'] == 'control'].hitrate

In [64]:
hitrate_test_hour.mean()

0.9899703073860378

In [65]:
hitrate_control_hour.mean()

0.989900851101167

In [66]:
_, pval = ttest_ind(hitrate_control_hour, hitrate_test_hour)

In [67]:
alpha = 0.05

if pval < alpha:
    print(f'p-value = {pval:.4f} --> there is a statistically significant difference.')
else:
    print(f'p-value = {pval:.4f} --> there is NO statistically significant difference.')

p-value = 0.8668 --> there is NO statistically significant difference.
