# Load packages and data

In [1]:
import pandas as pd

In [2]:
# define relative paths to data files
interact_path = './CrazyEgg_data/Homepage Version 1 - Interact, 5-29-2013/Element list Homepage Version 1 - Interact, 5-29-2013.csv'
connect_path = './CrazyEgg_data/Homepage Version 2 - Connect, 5-29-2013/Element list Homepage Version 2 - Connect, 5-29-2013.csv'
learn_path = './CrazyEgg_data/Homepage Version 3 - Learn, 5-29-2013/Element list Homepage Version 3 - Learn, 5-29-2013.csv'
help_path = './CrazyEgg_data/Homepage Version 4 - Help, 5-29-2013/Element list Homepage Version 4 - Help, 5-29-2013.csv'
services_path = './CrazyEgg_data/Homepage Version 5 - Services, 5-29-2013/Element list Homepage Version 5 - Services, 5-29-2013.csv'

# load data to dfs
interact_raw = pd.read_csv(interact_path)
connect_raw = pd.read_csv(connect_path)
learn_raw = pd.read_csv(learn_path)
help_raw = pd.read_csv(help_path)
services_raw = pd.read_csv(services_path)

In [3]:
services_raw.head()

Unnamed: 0,Element ID,Tag name,Name,No. clicks,Visible?,Snapshot information
0,69,a,FIND,397,True,Homepage Version 5 - Services • http://www...
1,61,input,s.q,323,True,created 5-29-2013 • 20 days 4 hours 59 min...
2,67,a,lib.montana.edu/find/,106,True,
3,62,button,Search,85,True,
4,98,a,Hours,81,True,


# Combine tables to one

In [4]:
def clean_df(original_df):
    '''Function to clean the original dfs and calculate ctr'''
    # define list of category terms
    term_list = ['INTERACT', 'CONNECT', 'LEARN', 'HELP', 'SERVICES']

    # parse total amount of website visits
    visits = int(original_df.iloc[1, -1].split(' ')[-4])

    # extract needed data from original dfs and add to new dfs
    df = original_df.loc[original_df.Name.isin(term_list), ['Name', 'No. clicks']]

    # rename cols
    df.rename(columns={'Name': 'name', 'No. clicks': 'clicks'}, inplace=True)

    # add cols for visits and ctr
    df['visits'] = visits
    df['ctr'] = df.clicks / df.visits
    
    return df

In [5]:
# create cleaned dfs
interact = clean_df(interact_raw)
connect = clean_df(connect_raw)
learn = clean_df(learn_raw)
help = clean_df(help_raw)
services = clean_df(services_raw)

df_list = [interact, connect, learn, help, services]

In [6]:
# combine all data to one df
df = pd.concat(df_list).sort_values('ctr', ascending=False).reset_index(drop=True)
df.name = df.name.str.lower()
df['no_clicks'] = df.visits - df.clicks
df

Unnamed: 0,name,clicks,visits,ctr,no_clicks
0,services,45,2064,0.021802,2019
1,connect,53,2742,0.019329,2689
2,help,38,3180,0.01195,3142
3,learn,21,2747,0.007645,2726
4,interact,42,10283,0.004084,10241


“Interact” and “Learn” are the worst performers, while “Services” and “Connect” perform much better.

# Chi-square test

* Null Hypothesis: The 5 versions of the button are equally likely to receive clicks, and the observed differences are due to chance
* Alternative Hypothesis: The observed differences are not due to chance: there is at least one version that got so many more/much less clicks than the others that this can hardly be explained just by chance (i.e. they have a better/worse CTR, a better/worse performance).

In [7]:
# set significance level = 90%
alpha = 0.1

# create contingency table

conti = df[['name', 'clicks', 'no_clicks']].transpose()
conti.columns = conti.iloc[0] # set new column header
conti = conti.iloc[1:] # drop row with labels now in header
conti

name,services,connect,help,learn,interact
clicks,45,53,38,21,42
no_clicks,2019,2689,3142,2726,10241


In [8]:
from scipy import stats

chisq, pvalue, dof, expected = stats.chi2_contingency(conti)
print(chisq, pvalue, dof)
print(expected)

96.7432353798328 4.852334301093838e-20 4
[[   19.5439665     25.96393224    30.11134374    26.01127712
     97.3694804 ]
 [ 2044.4560335   2716.03606776  3149.88865626  2720.98872288
  10185.6305196 ]]


In [9]:
if pvalue < alpha:
    print('Reject Null.')
else:
    print('Do not reject Null.')

Reject Null.


That means: At least one of the website versions performes significantly worse or better than the rest!

# Post-hoc test to identify ideal label

In [10]:
from itertools import combinations

# create all possible paired combinations
combis = list(combinations(conti.columns, 2))

# prepare list for storing the combination dfs
df_combis = []

# fill list with combinations
for combi in combis:
    df_combis.append(conti[[combi[0], combi[1]]])

df_combis[0]

name,services,connect
clicks,45,53
no_clicks,2019,2689


Define new significance level based on the Bonferroni Adjustment, using previous alpha and number of paired combinations.

In [11]:
# define new significance level
alpha2 = alpha / len(df_combis)
alpha2

0.01

In [12]:
results = []

# run post-hoc chi-square test for all pairs and store results
for df in df_combis:
    chisq, pvalue, dof, expected = stats.chi2_contingency(df)
    r = [chisq, pvalue, list(df.columns)[0], list(df.columns)[1]]
    results.append(r)

results[0:2]
#list(df.columns)

[[0.24744894078894586, 0.6188771123975272, 'services', 'connect'],
 [7.180281909052921, 0.007370912499282061, 'services', 'help']]

In [13]:
# create new df from post-hoc test results
results_df = pd.DataFrame(results)
results_df.columns = ['chisq', 'pvalue', 'option1', 'option2']
results_df['distinguishable?'] = results_df.pvalue < alpha2
#results_df['option1'] = results_df.combination[0]
results_df

# filter df for all p-values < significance level
#results_df.loc[results_df['distinguishable?'] == True]

# filter df for all p-values > significance level
results_df.loc[results_df['distinguishable?'] == False]

Unnamed: 0,chisq,pvalue,option1,option2,distinguishable?
0,0.247449,0.618877,services,connect,False
4,4.822649,0.028088,connect,help,False
7,2.351936,0.125128,help,learn,False
9,4.995056,0.02542,learn,interact,False


Above table (results_df) shows all combinations where both options are different, and where not. Especially Services and Connect, the best-performing options based on CTR, are indistinguishable. So no clearly preferable option identified so far.

However, when also considering Drop-off and Homepage-return rates, "Services" performs best of all options. In both metrics, "Services" beats all other options.

**The winner is Services!**