In [48]:
import numpy as np
import pandas as pd
import scipy.stats as ss

In [4]:
tests = pd.read_csv('test_table.csv',index_col = 'user_id')
users = pd.read_csv('user_table.csv',index_col = 'user_id')
tests = tests.join(users)
tests.head(5)

Unnamed: 0_level_0,date,browser,test,pages_visited,signup_date
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
600597,2015-08-13,IE,0,2,2015-01-19
4410028,2015-08-26,Chrome,1,5,2015-05-11
6004777,2015-08-17,Chrome,0,8,2015-06-26
5990330,2015-08-27,Safari,0,8,2015-06-25
3622310,2015-08-07,Firefox,0,1,2015-04-17


In [5]:
tests.date = pd.to_datetime(tests.date)
tests['signup_date'] = pd.to_datetime(tests.signup_date)
tests.head(10)

Unnamed: 0_level_0,date,browser,test,pages_visited,signup_date
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
600597,2015-08-13,IE,0,2,2015-01-19
4410028,2015-08-26,Chrome,1,5,2015-05-11
6004777,2015-08-17,Chrome,0,8,2015-06-26
5990330,2015-08-27,Safari,0,8,2015-06-25
3622310,2015-08-07,Firefox,0,1,2015-04-17
1806423,2015-08-28,IE,0,5,2015-02-22
5177398,2015-08-14,Chrome,1,5,2015-06-02
2494813,2015-08-19,Chrome,0,10,2015-03-15
6377383,2015-08-20,Chrome,1,1,2015-07-07
1462493,2015-08-10,Firefox,1,7,2015-02-12


In [18]:
def run_ttest(df):
    vp_in_test = df.loc[tests.test == 1,'pages_visited']
    test_mean = vp_in_test.mean()
    
    vp_in_ctrl = df.loc[tests.test == 0,'pages_visited']
    ctrl_mean = vp_in_ctrl.mean()
    
    result = ss.ttest_ind(vp_in_ctrl,vp_in_test,equal_var = False)
    conclusion = 'Significant' if result.pvalue < 0.05 else 'Not Significant'
    
    return pd.Series({'n_test':vp_in_test.shape[0],
                      'n_ctrl':vp_in_ctrl.shape[0],
                      'mean_test':test_mean,
                      'mean_ctrl':ctrl_mean,
                      'test-ctrl':test_mean-ctrl_mean,
                      'p_value':result.pvalue,
                      'conclusion':conclusion})

In [19]:
run_ttest(tests)

n_test                  50154
n_ctrl                  49846
mean_test             4.59969
mean_ctrl             4.60839
test-ctrl         -0.00870091
p_value              0.577452
conclusion    Not Significant
dtype: object

according to above Hypothesis Test result, there is no significant improvement in test group .

Answer question 2 and 3¶

Is the test performing similarly for all user segments or are there diﬀerences among diﬀerent segments?
If you identiﬁed segments that responded diﬀerently to the test, can you guess the reason? Would this change your point 1 conclusions?

In [35]:
tests['n_day_after_signin'] = (tests.date-tests.signup_date).dt.days
tests['first_time'] = (tests.n_day_after_signin == 0).astype(int)


In [40]:
tests.head(10)
tests.first_time.describe()

count    100000.000000
mean          0.200010
std           0.400009
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           1.000000
Name: first_time, dtype: float64

In [41]:
tests.groupby('browser').apply(run_ttest)

Unnamed: 0_level_0,n_test,n_ctrl,mean_test,mean_ctrl,test-ctrl,p_value,conclusion
browser,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Chrome,21974,21453,4.69068,4.613341,0.077339,0.0009434084,Significant
Firefox,10786,10972,4.714259,4.600164,0.114095,0.0005817199,Significant
IE,10974,10906,4.685985,4.598478,0.087507,0.007829509,Significant
Opera,1018,1109,0.0,4.546438,-4.546438,2.253e-321,Significant
Safari,5402,5406,4.692336,4.63818,0.054156,0.2411738,Not Significant



from above result, we can see that, by applying this "Recommend Friend" feature

#page_visited in Chrome, Firefox, IE are significantly increased.
#page_visited in Opera has reduced to zero, maybe there is some bug in implementation on Opera, which stops user visiting further pages.
#page_visited in Safari has no significant improvement, maybe because the recommended friends aren't shown in a noticeable position.

In [43]:
tests.groupby('first_time').apply(run_ttest)

Unnamed: 0_level_0,n_test,n_ctrl,mean_test,mean_ctrl,test-ctrl,p_value,conclusion
first_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,40109,39890,4.622379,4.603284,0.019095,0.261837,Not Significant
1,10045,9956,4.509109,4.628867,-0.119758,0.001742,Significant



above result shows:

for old users, the new feature improve #page_visted, but the change isn't significant
for first-time new users, this new feature significantly decrease #page_visited .
this is a strange observation. Since I already suspect there is some bug in Opera's implementation (which reduce the visited number to 0 after using this new feature), so I need to split the dataset further by browser.

In [44]:
ttest_result = tests.groupby(['browser','first_time']).apply(run_ttest)
ttest_result

Unnamed: 0_level_0,Unnamed: 1_level_0,n_test,n_ctrl,mean_test,mean_ctrl,test-ctrl,p_value,conclusion
browser,first_time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Chrome,0,17525,17092,4.701512,4.607945,0.093567,0.0002290889,Significant
Chrome,1,4449,4361,4.648011,4.634488,0.013523,0.8149175,Not Significant
Firefox,0,8657,8842,4.757306,4.59059,0.166716,3.692901e-06,Significant
Firefox,1,2129,2130,4.53922,4.639906,-0.100686,0.2210706,Not Significant
IE,0,8779,8744,4.721494,4.590576,0.130918,0.0002669847,Significant
IE,1,2195,2162,4.543964,4.630435,-0.086471,0.2808421,Not Significant
Opera,0,833,883,0.0,4.594564,-4.594564,7.204927000000001e-255,Significant
Opera,1,185,226,0.0,4.358407,-4.358407,1.222949e-68,Significant
Safari,0,4315,4329,4.720973,4.638254,0.08272,0.1000829,Not Significant
Safari,1,1087,1077,4.578657,4.637883,-0.059226,0.6015241,Not Significant


In [46]:
#old user on each browser
ttest_result.xs(0,level=1)

Unnamed: 0_level_0,n_test,n_ctrl,mean_test,mean_ctrl,test-ctrl,p_value,conclusion
browser,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Chrome,17525,17092,4.701512,4.607945,0.093567,0.0002290889,Significant
Firefox,8657,8842,4.757306,4.59059,0.166716,3.692901e-06,Significant
IE,8779,8744,4.721494,4.590576,0.130918,0.0002669847,Significant
Opera,833,883,0.0,4.594564,-4.594564,7.204927000000001e-255,Significant
Safari,4315,4329,4.720973,4.638254,0.08272,0.1000829,Not Significant



for old users, the conclusion is the same as general 'browser impact', which is:

#page_visited in Chrome, Firefox, IE are significantly increased.
#page_visited in Opera has reduced to zero, maybe there is some bug in implementation on Opera, which stops user visiting further pages.
#page_visited in Safari has no significant improvement, maybe because the recommended friends aren't shown in a noticeable position.

In [47]:
#old user on each browser
ttest_result.xs(1,level=1)

Unnamed: 0_level_0,n_test,n_ctrl,mean_test,mean_ctrl,test-ctrl,p_value,conclusion
browser,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Chrome,4449,4361,4.648011,4.634488,0.013523,0.8149175,Not Significant
Firefox,2129,2130,4.53922,4.639906,-0.100686,0.2210706,Not Significant
IE,2195,2162,4.543964,4.630435,-0.086471,0.2808421,Not Significant
Opera,185,226,0.0,4.358407,-4.358407,1.222949e-68,Significant
Safari,1087,1077,4.578657,4.637883,-0.059226,0.6015241,Not Significant


besides Opera which may have a bug, all changes for new users are not significant, and there is even some drop after applying this new feature.

this may because: the friend recommendation engine may be based on a user's previous social activity on the site. then for new users, since they don't have any previous history for recommendation engine to use, the recommendation result is like random guess, which cannot draw new user's interest. But the recommended friends may occupy some space on the page, so it may even decrease the #page_visited for new users a little bit.

from this observation, I suspect the Recommedation Engine suffers 'the cold start' problem.