# A/B Testing

Business Requirement:                                                                                        
To check whether the new page increases the conversion rate by 3%                                                 
Assusmption: old page conversion rate = 12%, new page conversion rate = 15%

Hypothesis:                                                                                                     
Null Hypothesis :  conversion rate for both groups is same                                                        
Alternate Hypothesis :  conversion rates are different

In [1]:
# Importing libraries
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.stats.api as sms
import matplotlib.pyplot as plt
import seaborn as sns
from math import ceil
import datetime
from scipy.stats import chi2_contingency, beta
from statsmodels.stats.proportion import proportions_ztest, proportion_confint

In [2]:
# Choosing the smaple size
"""
Factors to consider:
alpha(critical value) : 0.05 to set 95% confidence Interval
effect size : 3%, as we require a 3% hike in the conversion rate
power of test(1-B): 0.9, means if there exists a difference between old and new page we have 90% chance of detecting it with this smaple size
"""
effect_size = sms.proportion_effectsize(0.12,0.15)
required_n = sms.NormalIndPower().solve_power(
    effect_size,
    power= 0.9,
    alpha=0.05,
    ratio=1
)
print('Sample size required for each group:',ceil(required_n))

Sample size required for each group: 2719


In [3]:
# Reading the data
data = pd.read_csv('ab_data.csv')
data.head()

Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,851104,2017-01-21 22:11:48.556739,control,old_page,0
1,804228,2017-01-12 08:01:45.159739,control,old_page,0
2,661590,2017-01-11 16:55:06.154213,treatment,new_page,0
3,853541,2017-01-08 18:28:03.143765,treatment,new_page,0
4,864975,2017-01-21 01:52:26.210827,control,old_page,1


In [4]:
data.groupby('group').count().reset_index()

Unnamed: 0,group,user_id,timestamp,landing_page,converted
0,control,147202,147202,147202,147202
1,treatment,147276,147276,147276,147276


In [5]:
data.groupby(['group','landing_page']).count().reset_index()

Unnamed: 0,group,landing_page,user_id,timestamp,converted
0,control,new_page,1928,1928,1928
1,control,old_page,145274,145274,145274
2,treatment,new_page,145311,145311,145311
3,treatment,old_page,1965,1965,1965


In [6]:
# Repeated users
user_count=data['user_id'].value_counts()>1
user_count.value_counts()

False    286690
True       3894
Name: user_id, dtype: int64

In [7]:
# Number 0f repeted users
(1-(data['user_id'].nunique()/data['user_id'].count()))*100

1.3223398691922683

In [8]:
# Removing Repeated users
data.drop_duplicates(subset='user_id',inplace=True)

In [9]:
data[['date','time']]=data['timestamp'].str.split(' ', expand=True)
data

Unnamed: 0,user_id,timestamp,group,landing_page,converted,date,time
0,851104,2017-01-21 22:11:48.556739,control,old_page,0,2017-01-21,22:11:48.556739
1,804228,2017-01-12 08:01:45.159739,control,old_page,0,2017-01-12,08:01:45.159739
2,661590,2017-01-11 16:55:06.154213,treatment,new_page,0,2017-01-11,16:55:06.154213
3,853541,2017-01-08 18:28:03.143765,treatment,new_page,0,2017-01-08,18:28:03.143765
4,864975,2017-01-21 01:52:26.210827,control,old_page,1,2017-01-21,01:52:26.210827
...,...,...,...,...,...,...,...
294473,751197,2017-01-03 22:28:38.630509,control,old_page,0,2017-01-03,22:28:38.630509
294474,945152,2017-01-12 00:51:57.078372,control,old_page,0,2017-01-12,00:51:57.078372
294475,734608,2017-01-22 11:45:03.439544,control,old_page,0,2017-01-22,11:45:03.439544
294476,697314,2017-01-15 01:20:28.957438,control,old_page,0,2017-01-15,01:20:28.957438


In [74]:
data['date']

pandas.core.series.Series

In [10]:
data['week'] = data['date'].apply(lambda x: datetime.datetime.strptime(x,'%Y-%m-%d').isocalendar()[1])
data

Unnamed: 0,user_id,timestamp,group,landing_page,converted,date,time,week
0,851104,2017-01-21 22:11:48.556739,control,old_page,0,2017-01-21,22:11:48.556739,3
1,804228,2017-01-12 08:01:45.159739,control,old_page,0,2017-01-12,08:01:45.159739,2
2,661590,2017-01-11 16:55:06.154213,treatment,new_page,0,2017-01-11,16:55:06.154213,2
3,853541,2017-01-08 18:28:03.143765,treatment,new_page,0,2017-01-08,18:28:03.143765,1
4,864975,2017-01-21 01:52:26.210827,control,old_page,1,2017-01-21,01:52:26.210827,3
...,...,...,...,...,...,...,...,...
294473,751197,2017-01-03 22:28:38.630509,control,old_page,0,2017-01-03,22:28:38.630509,1
294474,945152,2017-01-12 00:51:57.078372,control,old_page,0,2017-01-12,00:51:57.078372,2
294475,734608,2017-01-22 11:45:03.439544,control,old_page,0,2017-01-22,11:45:03.439544,3
294476,697314,2017-01-15 01:20:28.957438,control,old_page,0,2017-01-15,01:20:28.957438,2


In [11]:
data['week'].value_counts()

2    92620
3    92300
1    84882
4    20782
Name: week, dtype: int64

## Frequentist approach

In [12]:
# Getting the conversion rate of Control and Test group
weeks = 4
control_group = data.loc[(data['group']=='control') & (data['week']<=weeks)]
test_group = data.loc[(data['group']=='treatment') & (data['week']<=weeks)]

control_conversion_rate = round(control_group['converted'].sum()*100/control_group['converted'].count(),3)
test_conversion_rate = round(test_group['converted'].sum()*100/test_group['converted'].count(),3)

print('Control group conversion rate:',control_conversion_rate)
print('Test group conversion rate:', test_conversion_rate)
print('Lift:',round(test_conversion_rate-control_conversion_rate,3))

Control group conversion rate: 12.03
Test group conversion rate: 11.884
Lift: -0.146


To check the whether this means(conversion rate) obtained from the sample data are statistically significant in case of population data 

In [13]:
control_group

Unnamed: 0,user_id,timestamp,group,landing_page,converted,date,time,week
0,851104,2017-01-21 22:11:48.556739,control,old_page,0,2017-01-21,22:11:48.556739,3
1,804228,2017-01-12 08:01:45.159739,control,old_page,0,2017-01-12,08:01:45.159739,2
4,864975,2017-01-21 01:52:26.210827,control,old_page,1,2017-01-21,01:52:26.210827,3
5,936923,2017-01-10 15:20:49.083499,control,old_page,0,2017-01-10,15:20:49.083499,2
7,719014,2017-01-17 01:48:29.539573,control,old_page,0,2017-01-17,01:48:29.539573,3
...,...,...,...,...,...,...,...,...
294471,718310,2017-01-21 22:44:20.378320,control,old_page,0,2017-01-21,22:44:20.378320,3
294473,751197,2017-01-03 22:28:38.630509,control,old_page,0,2017-01-03,22:28:38.630509,1
294474,945152,2017-01-12 00:51:57.078372,control,old_page,0,2017-01-12,00:51:57.078372,2
294475,734608,2017-01-22 11:45:03.439544,control,old_page,0,2017-01-22,11:45:03.439544,3


In [19]:
control_count = control_group['converted'].count()
test_count = test_group['converted'].count()
successes = [control_group['converted'].sum(), test_group['converted'].sum()]
nobs=[control_count, test_count]
z_stat, pval = proportions_ztest(successes, nobs=nobs)
(lower_con, lower_treat), (upper_con, upper_treat) = proportion_confint(successes, nobs=nobs, alpha=0.05)

print(f'z statistic: {z_stat:.2f}')
print(f'p-value: {pval:.3f}')
print(f'ci 95% for control group: [{lower_con:.3f}, {upper_con:.3f}]')
print(f'ci 95% for treatment group: [{lower_treat:.3f}, {upper_treat:.3f}]')

z statistic: 1.21
p-value: 0.227
ci 95% for control group: [0.119, 0.122]
ci 95% for treatment group: [0.117, 0.121]


Since our p-vaue=0.227 is greater than the significance level alpha(0.05), we cannot reject the null hypothesis and Hence we cannot conclude that the conversion rate between both the web pages is different.                                     
Also the CI for both the groups lie between [0.119, 0.122] and [0.117, 0.121] respectively, includes the baseline conversion rate of 12% but dosen't includes 15% . 

### Chi Squared

Ho(Null Hypothesis) = Conversion rate is same                                                                     
H1(Alternate Hypothesis) = Conversion rate is not the same

In [96]:
# Creating contingency table for Chi-square test
control_converted = control_group['converted'].sum()
treatment_converted = test_group['converted'].sum()
control_non_converted = control_group['converted'].count() - control_converted
treatment_non_converted = test_group['converted'].count() - treatment_converted
contingency_table = np.array([[control_converted, control_non_converted], 
                              [treatment_converted, treatment_non_converted]])

In [97]:
contingency_table

array([[ 17471, 127761],
       [ 17274, 128078]])

In [99]:
chi, p_value, _, _ = chi2_contingency(contingency_table, correction=False)
chi, p_value

(1.4601935202954148, 0.22689933216132585)

Since the p-value = 0.227 > 0.05, we cannot reject the null hypothesis, Therefore there is no significant difference between the conversion rate of control and test group

There is 22.68% probability that a more extreme chi square that 1.46 can occur by chance

Disadvantage:
Difficult to interpret, since it gives the chi square value and to get the correct estimate in terms of conversion rate we need to map chi square to conversion rate

## Bayesians approach

Using the 1st weeks data to create prior distribution

In [101]:
prior = data[(data['week']==1) & (data['group']=='control')]

In [104]:
prior_mean=[]
for i in range(0,1000):
    prior_mean.append(prior.sample(1000)['converted'].mean())

In [106]:
# Beta Distribution, with 0 mean and 1 sd to get prior alpha and beta
prior_alpha, prior_beta, _, _ = beta.fit(prior_mean, floc=0, fscale=1)

In [109]:
weeks = 4
data1 = data.loc[(data['week']>1) & (data['week']<=weeks)]
control_group = data1.loc[(data1['group']=='control') & (data1['week']<=weeks)]
test_group = data1.loc[(data1['group']=='treatment') & (data1['week']<=weeks)]

control_conversion_rate = round(control_group['converted'].sum()*100/control_group['converted'].count(),3)
test_conversion_rate = round(test_group['converted'].sum()*100/test_group['converted'].count(),3)

print('Control group conversion rate:',control_conversion_rate)
print('Test group conversion rate:', test_conversion_rate)
print('Lift:',round(test_conversion_rate-control_conversion_rate,3))

Control group conversion rate: 12.064
Test group conversion rate: 11.927
Lift: -0.137


In [120]:
control_converted = control_group['converted'].sum()
test_converted = test_group['converted'].sum()
control_non_converted = control_group['converted'].count() - control_converted
test_non_converted = test_group['converted'].count() - test_converted

# Updating posterior 
posterior_control = beta(prior_alpha + control_converted,prior_beta + control_non_converted)
posterior_test = beta(prior_alpha + test_converted,prior_beta + test_non_converted)

control_samples = posterior_control.rvs(1000)
test_samples = posterior_test.rvs(1000)
probability = np.mean(test_samples>control_samples)
print(f'Probability that treatment > control: {probability * 100}%')

Probability that treatment > control: 17.0%


In [125]:
print(f"Control Posterior: Mean: {posterior_control.mean()}, Variance: {posterior_control.var()}") 
print(f"Treatment Posterior: Mean: {posterior_test.mean()}, Variance: {posterior_test.var()}") 

Control Posterior: Mean: 0.12062724606912235, Variance: 1.020216176034806e-06
Treatment Posterior: Mean: 0.11927237388871918, Variance: 1.0117905720870365e-06


In [130]:
lift_percentage = (test_samples - control_samples) / control_samples
print(f"Probability that we are seeing a 2% lift: {np.mean((100 * lift_percentage) > 2) * 100}%")

Probability that we are seeing a 2% lift: 0.8%
