In [2]:
import pandas as pd
import numpy as np
from scipy import stats
import seaborn as sns

### State the Null Hypothesis and the Alternative Hypothesis.

**Null Hypothesis:** Any deviation in website performance is just due to chance<br><br>
**Alternative Hypothesis:** Deviations are not due to chance and it's likely the differences in the buttons impacted user behavior

### Set alpha

In [3]:
alpha = 0.1

### Gather the Data

In [4]:
a = pd.read_csv("eniac_a.csv")
b = pd.read_csv("eniac_b.csv")
c = pd.read_csv("eniac_c.csv")
d = pd.read_csv("eniac_d.csv")

In [5]:
# This cell in the data frames contains info about the number of visits
a.iloc[1, -1]

'created 2021-09-14   •   14 days 0 hours 34 mins   •   25326 visits, 23174 clicks'

In [6]:
a.iloc[1, -1] #getting number of visits
a_visits = 25326

In [7]:
b.iloc[1, -1] #getting number of visits
b_visits = 24747

In [8]:
c.iloc[1, -1] #getting number of visits
c_visits = 24876

In [9]:
d.iloc[1, -1] #getting number of visits
d_visits = 25233

In [10]:
visits = [a_visits, b_visits, c_visits, d_visits]

In [11]:
a_clicks = a.loc[a['Name']=='SHOP NOW', 'No. clicks'].item()
b_clicks = b.loc[b['Name']=='SHOP NOW', 'No. clicks'].item()
c_clicks = c.loc[c['Name']=='SEE DEALS', 'No. clicks'].item()
d_clicks = d.loc[d['Name']=='SEE DEALS', 'No. clicks'].item()
clicked = [a_clicks, b_clicks, c_clicks, d_clicks]

In [12]:
# Using a range like this allow you to grab elements from the same position in different lists
no_click = [visits[i] - clicked[i] for i in range(len(clicked))]

observed_results = pd.DataFrame(data = [clicked, no_click],
                                columns = ["Version_A", "Version_B", "Version_C", "Version_D"],
                                index = ["Click", "No-click"])
observed_results

Unnamed: 0,Version_A,Version_B,Version_C,Version_D
Click,512,281,527,193
No-click,24814,24466,24349,25040


### Calculate the Result

In [13]:
chisq, pvalue, df, expected = stats.chi2_contingency(observed_results)

### Interpret the Result

In [14]:
if pvalue > alpha:
  print("Do not reject the null hypothesis")
else:
  print("Reject the null hypothesis")

Reject the null hypothesis


### Checking CTRs

In [15]:
ctr_ls = []
for i in range(len(clicked)):
    ctr = clicked[i]/visits[i]
    ctr_ls.append(ctr)

# Alternatively this could be done with a list comprehension
# ctr_ls = [clicked[i]/visits[i] for i in range(len(clicked))]

ctrs = pd.DataFrame({"CTR":ctr_ls}, 
                    index=['A', 'B', 'C', 'D'])
ctrs

Unnamed: 0,CTR
A,0.020216
B,0.011355
C,0.021185
D,0.007649


### Post-Hoc

6 combinations:<br>
* Version A - Version B
* Version A - Version C
* Version A - Version D
* Version B - Version C
* Version B - Version D
* Version C - Version D

In [16]:
post_hoc_alpha = alpha/6

In [17]:
observed_results

Unnamed: 0,Version_A,Version_B,Version_C,Version_D
Click,512,281,527,193
No-click,24814,24466,24349,25040


In [18]:
# Creating empty dictionary to fill with the test results
stat_significant_dict = {
                        "Version_A": [],
                        "Version_B": [],
                        "Version_C": [],
                        "Version_D": []
                         }

# Compare each version to each other version
for i in observed_results.columns:
  for j in observed_results.columns:
    # Using scipy to find the p-value of each pair
    chisq, pvalue, df, expected = stats.chi2_contingency(observed_results.loc[:, [i, j]])
    # If the p-value is lower than alpha, our result is statistically significant
    stat_significant_dict[i].append(pvalue < post_hoc_alpha)

stat_significant_df = pd.DataFrame(stat_significant_dict,
                                   index=observed_results.columns)

stat_significant_df

Unnamed: 0,Version_A,Version_B,Version_C,Version_D
Version_A,False,True,False,True
Version_B,True,False,True,True
Version_C,False,True,False,True
Version_D,True,True,True,False


### Checking minimum detectable effect threshold

In [28]:
a_ctr = ctrs.iloc[0, 0]
c_ctr = ctrs.iloc[2, 0]
print(f"There was a {round(100*(c_ctr-a_ctr) / a_ctr, 2)}% difference in click through rates")

There was a 4.79% difference in click through rates
