In [1]:
import pandas as pd
import numpy as np

In [2]:
abdata = pd.read_csv('clicks.csv')

In [3]:
print(abdata.head())

    user_id group is_purchase
0  8e27bf9a     A          No
1  eb89e6f0     A          No
2  7119106a     A          No
3  e53781ff     A          No
4  02d48cf1     A         Yes


We are interested in whether visitors are more likely to make a purchase if they are in any one group compared to the others. Because we want to know if there is an association between two categorical variables, we’ll start by using a Chi-Square test

In [4]:
Xtab = pd.crosstab(abdata.group, abdata.is_purchase)
print(Xtab)

is_purchase    No  Yes
group                 
A            1350  316
B            1483  183
C            1583   83


In [5]:
from scipy.stats import chi2_contingency

In [8]:
chi2, pval, dof, expected = chi2_contingency(Xtab)
print(pval)
if pval<0.05:
    print("There is a significant difference in the purchase rate for groups A, B, and C")
else:
    print("There is no significant difference in the purchase rate for groups A,B, and C")

2.4126213546684264e-35
There is a significant difference in the purchase rate for groups A, B, and C


In [10]:
num_visits = len(abdata)
print(num_visits)

4998


In [11]:
num_sales_needed_099 = 1000/0.99
print(num_sales_needed_099)
p_sales_needed_099 = num_sales_needed_099/num_visits
print(p_sales_needed_099)

1010.1010101010102
0.20210104243717691


In [12]:
num_sales_needed_199 = 1000/1.99
print(num_sales_needed_199)
p_sales_needed_199 = num_sales_needed_199/num_visits
print(p_sales_needed_199)

502.51256281407035
0.10054272965467594


In [14]:
num_sales_needed_499 = 1000/4.99
print(num_sales_needed_499)
p_sales_needed_499 = num_sales_needed_499/num_visits
print(p_sales_needed_499)

200.40080160320642
0.040096198800161346


we want to know if the percent of Group A (the 0.99 price point) that purchased an upgrade package is significantly greater than p_sales_needed_099 (the percent of visitors who need to buy an upgrade package at 0.99 in order to make our minimum revenue target of 1,000). To answer this question, we want to focus on just the visitors in group A. Then, we want to compare the number of purchases in that group to p_sales_needed_099. Since we have a single sample of categorical data and want to compare it to a hypothetical population value, a binomial test is appropriate. In order to run a binomial test for group A, we need to know two pieces of information: The number of visitors in group A (the number of visitors who were offered the 0.99 price point); The number of visitors in Group A who made a purchase

In [22]:
samp_size_099 = np.sum(abdata.group=="A")
sales_099 = np.sum((abdata.group=="A") & (abdata.is_purchase == "Yes"))
print(samp_size_099)
print(sales_099)

1666
316


In [34]:
samp_size_199 = np.sum(abdata.group == 'B')
sales_199 = np.sum((abdata.group == 'B') & (abdata.is_purchase == 'Yes'))
print(samp_size_199)
print(sales_199)

1666
183


In [35]:
samp_size_499 = np.sum(abdata.group=="C")
sales_499 = np.sum((abdata.group=="C") & (abdata.is_purchase == "Yes"))
print(samp_size_499)
print(sales_499)

1666
83


In [36]:
from scipy.stats import binom_test

In [42]:
pvalueA = binom_test(sales_099, n=samp_size_099, p=p_sales_needed_099, alternative='greater')
print(pvalueA)
if pvalueA < 0.05:
    print("The purchase rate for 0.99 (Group A) is significantly higher than the target needed to reach $1000 revenue per week.")
else:
    print("The purchase rate for 0.99 (Group A) is NOT significantly higher than the target needed to reach $1000 revenue per week.")

0.9028081076188554
The purchase rate for 0.99 (Group A) is NOT significantly higher than the target needed to reach $1000 revenue per week.


In [46]:
pvalueB = binom_test(sales_199, n=samp_size_199, p=p_sales_needed_199, alternative='greater')
print(pvalueB)
if pvalueB < 0.05:
    print("The purchase rate for 1.99 (Group B) is significantly higher than the target needed to reach $1000 revenue per week.")
else:
    print("The purchase rate for 1.99 (Group B) is NOT significantly higher than the target needed to reach $1000 revenue per week.")

1.4574710785649957e-33
The purchase rate for 1.99 (Group B) is significantly higher than the target needed to reach $1000 revenue per week.


In [47]:
pvalueC = binom_test(sales_499, n=samp_size_499, p=p_sales_needed_499, alternative='greater')
print(pvalueC)
if pvalueC < 0.05:
    print("The purchase rate for 4.99 (Group C) is significantly higher than the target needed to reach $1000 revenue per week.")
else:
    print("The purchase rate for 4.99 (Group C) is NOT significantly higher than the target needed to reach $1000 revenue per week.")

0.02794482665983064
The purchase rate for 4.99 (Group C) is significantly higher than the target needed to reach $1000 revenue per week.
