In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import scipy.stats as st

In [10]:
# simulate data:
sample_size = 100
lift = .2
A_rate = .5
B_rate = (1 + lift) * A_rate

clicks_A = np.random.choice(['yes', 'no'],  
                            size=int(sample_size), 
                            p=[A_rate, 1-A_rate])
clicks_B = np.random.choice(['yes', 'no'],  
                            size=int(sample_size), 
                            p=[B_rate, 1-B_rate])
outcome = list(clicks_A) + list(clicks_B)
group = ['Button A']*int(sample_size) + ['Button B']*int(sample_size)
sim_data = pd.DataFrame({"Group": group, "Clicked": outcome})
print(pd.crosstab(sim_data.Group, sim_data.Clicked))

Clicked   no  yes
Group            
Button A  53   47
Button B  36   64


In [11]:
# run a hypothesis test
sig_threshold = 0.05

ab_contingency = pd.crosstab(sim_data.Group, sim_data.Clicked)
chi2, pval, dof, expected = st.chi2_contingency(ab_contingency)
result = ('significant' if pval < sig_threshold else 'not significant')
print(result)

significant


In [14]:
samp_size = 1000
A_rate = .5
lift = .2
B_rate = (1 + lift) * A_rate
sig_threshold = 0.05
results = []

for i in range(100):
    clicks_A = np.random.choice(['yes', 'no'],  
                            size=int(sample_size/2), 
                            p=[A_rate, 1-A_rate])
    clicks_B = np.random.choice(['yes', 'no'],  
                            size=int(sample_size/2), 
                            p=[B_rate, 1-B_rate])
    outcome = list(clicks_A) + list(clicks_B)
    group = ['Button A']*int(sample_size/2) + ['Button B']*int(sample_size/2)
    sim_data = {"Group": group, "Clicked": outcome}
    sim_data = pd.DataFrame(sim_data)
    ab_contingency = pd.crosstab(sim_data.Group, sim_data.Clicked)
    chi2, pval, dof, expected = st.chi2_contingency(ab_contingency)
    result = ('significant' if pval < sig_threshold else 'not significant')
    results.append(result)
    
# calculate proportion of results that are "significant":
print("Proportion of results that are significant:")
results =  np.array(results)
print(np.sum(results == 'significant')/100)

# calculate proportion of results that are "not significant":
print("Proportion of results that are NOT significant:")
results =  np.array(results)
print(np.sum(results == 'not significant')/100)

Proportion of results that are significant:
0.14
Proportion of results that are NOT significant:
0.86
