In [1]:
# import libraries
import pandas as pd
import numpy as np

In [2]:
# create two random datasets of 100 data points for sampling with conversion rates ('cr') per day for segments 'control' and 'var'
# Conversion rates are created normally distributed random sample with a mean of 0.023 (control) and 0.026 (variation/var). 
# Standard deviation in control set is 0.002 and for variation set is 0.003

data_control = {'date': pd.date_range(start = '2019-02-01', periods=100).tolist(), 
 'segment': 'control', 
 'cr': np.random.normal(loc=0.023, scale=0.002, size=100)}

data_var = {'date': pd.date_range(start = '2019-02-01', periods=100).tolist(), 
 'segment': 'var', 
 'cr': np.random.normal(loc=0.026, scale=0.003, size=100)}

In [3]:
# put data in dataframes
df_control = pd.DataFrame(data_control, columns = ['date', 'segment', 'cr'])
df_var = pd.DataFrame(data_var, columns = ['date', 'segment', 'cr'])

In [4]:
df_control.head()

Unnamed: 0,date,segment,cr
0,2019-02-01,control,0.020113
1,2019-02-02,control,0.019996
2,2019-02-03,control,0.02388
3,2019-02-04,control,0.023351
4,2019-02-05,control,0.024112


In [5]:
# remove negative numbers by raising them to the power of two and then taking their square root
df_control['cr'] = np.sqrt(df_control['cr'] ** 2)
df_var['cr'] = np.sqrt(df_var['cr'] ** 2)

In [6]:
# create for loop for sampling both datasets and creating randomly sampled data sets. Add the difference between the means of the
# sampled datasets in a list and take the quantiles from that for a confidence interval

boot_mean_diff = []

for a in range(10000):
    control_sample = df_control['cr'].sample(frac=1, replace=True)
    var_sample = df_var['cr'].sample(frac=1, replace=True)
    boot_mean = (np.mean(var_sample) - np.mean(control_sample))
    boot_mean_diff.append(boot_mean)
    
quantiles = pd.Series(boot_mean_diff).quantile([0.025, 0.975])
print(quantiles)

0.025    0.002766
0.975    0.004160
dtype: float64


In [7]:
print(quantiles*100)

0.025    0.276551
0.975    0.416015
dtype: float64


In [8]:
# my results:
# 0.025    0.276551
# 0.975    0.416015

In [9]:
# With 95% confidence we can say the variant test realizes an uplift of between 0.28% and 0.42% in conversion rate. The test 
# variant will be implemented!