In [1]:
import impala.dbapi
import impala.util
from scipy import stats
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import json
from datetime import date
import matplotlib.gridspec as gridspec
today = date.today()
import matplotlib.ticker as ticker
warnings.filterwarnings('ignore')
%matplotlib inline
sns.set(font_scale=1.5, palette='bright')
# widen the cell 
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
imp_connection = impala.dbapi.connect(host='impala-lb-main.data.houzz.net', port=21050)
imp_cursor = imp_connection.cursor()
pd.set_option('max_colwidth', 200)
def impala_run(q):
    return pd.read_sql(q, imp_connection)

import plotly.express as px
import plotly.graph_objects as go

# 1. Sample size calculation

### input: dataframe that has the following types of columns
        1. Unit of analysis, total units participarting in the test, e.g. users, visitors, sessions, etc. 
           This is ONE column, and shall be named as "users", or "users_xxx"
        2. Converted units, for proportional metric z-test, e.g. users who spent, visitors who contacted pro, sessions with clicks, etc. 
           This can be multiple columns, and shall be named consistent with 1. unit of analysis, but with underscore followed with a description of the metrics, e.g."users_spending"
        3. Mean metrics, for mean metric t-test, e.g. average GMV.
           This can be multiple columns, and shall be named as "mean_xxx"
        4. Standard deviation of the mean metrics, for mean metric t-test, e.g. std GMV. 
           This needs to have the same number of columns as the mean metric columns, and be names consistently with the mean metrics as "std_xxx"
### function: 
       output_sample_size(df, col_N, small2large_size_ratio=1, delta=0.05, alpha=0.05, power=0.8, two_sided=True)
       df: input dataframe
       col_N: unit of analysis column name (1 above)
       small2large_size_ratio: ratio of smaller to larger group size. E.g. for 50-50 split, this should be 1; for 10-90 split, this should be 1/9. 
       delta: effect size that needs to be detected, default 5%.
       alpha: significance level, default 5%
       power: power of analysis, default 80%
       two_sided: True for two-sided test, False for one-sided test

In [2]:
# Sample size calculater, default delta change is 5%

# input: baseline_dict
# {'proportion': value}
# Or 
# {'mean': value, 'std': value}

# output: sample size for each group

import scipy.stats as st
def sample_size_cal_unequal(baseline_dict, small2large_size_ratio=1, delta=0.05, alpha=0.05, power=0.8, two_sided=True):    
    if not two_sided:
        alpha = alpha
    else:
        alpha = alpha/2
    if 'proportion' in baseline_dict:
        z_alpha = st.norm.ppf(alpha)
        z_beta = st.norm.ppf(1-power)
        baseline = baseline_dict['proportion']
        std = np.sqrt(baseline*(1-baseline))
        p1 = baseline
        p2 = baseline*(1+delta)
        r = small2large_size_ratio
        pb = (p1+r*p2)/(1+r)
        n1 = (z_alpha*np.sqrt((1+r)*pb*(1-pb)) - st.norm.ppf(power)*np.sqrt(r*p1*(1-p1)+p2*(1-p2)))**2 / (r*(p2-p1)**2) + (r+1)/r*(p2-p1)
        n2 = n1 * small2large_size_ratio
    elif 'mean' in baseline_dict and 'std' in baseline_dict:
        z_alpha = st.norm.ppf(1-alpha)
        z_beta = st.norm.ppf(power)
        baseline = baseline_dict['mean']
        std = baseline_dict['std']
        n1 = (small2large_size_ratio+1)/small2large_size_ratio * (z_alpha + z_beta)**2 * (std)**2 / (baseline*delta)**2
        n2 = n1 * small2large_size_ratio
    else: 
        print('Error! Format of baseline_dict should either be {"proportion": value} or {"mean": value, "std": value}')
    return int(n1), int(n2) 

def output_sample_size(df, col_N, small2large_size_ratio=1, delta=0.05, alpha=0.05, power=0.8, two_sided=True):
    df = df.copy()
    col_N = col_N
    cols_X = []
    cols_mean = []
    cols_std = []
    for column in df.columns: 
        if column.startswith(col_N.split('_')[0] if '_' in col_N else col_N) and column != col_N:
            cols_X.append(column)
        elif column.startswith('mean'):
            cols_mean.append(column)
        elif column.startswith('std'):
            cols_std.append(column)
    cols_X = sorted(cols_X)
    cols_mean = sorted(cols_mean)
    cols_std = sorted(cols_std)
    if len(cols_mean) != len(cols_std):
        print("Error! Mean and std columns are not matching!")
    for col_mean, col_std in zip(cols_mean, cols_std):
        if col_mean.replace('mean_','') != col_std.replace('std_', ''):
            print("Error! Mean and std columns are not matching!")
    
    units = col_N.split('_')[0] if '_' in col_N else col_N 
    table = pd.DataFrame(columns=['metric', 'baseline', 'detectible size', 'sample size needed group 1', 'sample size needed group 2'])     
    table = pd.DataFrame()
    for col_X in cols_X: 
        baseline_dict = {}
        out_dict = {}
        metric = col_X.replace(units+'_', '')
        baseline_dict['proportion'] = df[col_X][0]/df[col_N][0]
        out_dict['metric'] = '% {}'.format(metric)
        out_dict['baseline'] = '{0:.2f}%'.format(baseline_dict['proportion']*100) 
        out_dict['detectible size'] = '{0:d}%'.format(int(delta*100))
        out_dict['sample size needed group 1'], out_dict['sample size needed group 2'] = sample_size_cal_unequal(baseline_dict, 
                                                                                                                 small2large_size_ratio=small2large_size_ratio, 
                                                                                                                 delta=delta, 
                                                                                                                 alpha=alpha, 
                                                                                                                 power=power, 
                                                                                                                 two_sided=two_sided)   
        table = table.append(out_dict, ignore_index=True)    
    for col_mean, col_std in zip(cols_mean, cols_std): 
        baseline_dict = {}
        out_dict = {}
        metric = col_mean.replace('mean_', '')
        baseline_dict['mean'] = df[col_mean][0]
        baseline_dict['std'] = df[col_std][0]
        out_dict['metric'] = 'AVG {}'.format(metric)
        out_dict['baseline'] = '{0:.4f}'.format(baseline_dict['mean']) 
        out_dict['detectible size'] = '{0:d}%'.format(int(delta*100))
        out_dict['sample size needed group 1'], out_dict['sample size needed group 2'] = sample_size_cal_unequal(baseline_dict, 
                                                                                                                 small2large_size_ratio=small2large_size_ratio, 
                                                                                                                 delta=delta, 
                                                                                                                 alpha=alpha, 
                                                                                                                 power=power, 
                                                                                                                 two_sided=two_sided)
        table = table.append(out_dict, ignore_index=True)
    table['sample size needed group 1'] = table['sample size needed group 1'].astype(int)
    table['sample size needed group 2'] = table['sample size needed group 2'].astype(int)
    return table.set_index('metric').reset_index() 

## Example

In [3]:
baseline = impala_run('''
SELECT 
    COUNT(session_id) AS sessions_all, 
    SUM(bounce_sessions) AS sessions_bounce, 
    COUNT(CASE WHEN add_to_ideabook>0 THEN session_id ELSE NULL END) AS sessions_idbk_save, 
    COUNT(CASE WHEN contact_pro>0 THEN session_id ELSE NULL END) AS sessions_pro_contact, 
    AVG(COALESCE(add_to_ideabook,0)) AS mean_idbk_saves, 
    AVG(COALESCE(contact_pro,0)) AS mean_pro_contacts,
    AVG(COALESCE(view_photo,0)) AS mean_photo_views, 
    STDDEV(COALESCE(add_to_ideabook,0)) AS std_idbk_saves, 
    STDDEV(COALESCE(contact_pro,0)) AS std_pro_contacts,
    STDDEV(COALESCE(view_photo,0)) AS std_photo_views    
FROM l2.session_analytics 
WHERE dt > '2020-01-29'
AND (REGEXP_EXTRACT(test_set, 'save_confirmation_test=([a-z]+)', 1) IS NOT NULL AND REGEXP_EXTRACT(test_set, 'save_confirmation_test=([a-z0-9]+)', 1) != '')
; ''')

In [4]:
baseline # input

Unnamed: 0,sessions_all,sessions_bounce,sessions_idbk_save,sessions_pro_contact,mean_idbk_saves,mean_pro_contacts,mean_photo_views,std_idbk_saves,std_pro_contacts,std_photo_views
0,892819,125905,466532,1851,2.0666,0.004778,3.014226,5.110579,0.141069,9.952891


In [5]:
output_sample_size(baseline, 'sessions_all',  small2large_size_ratio=1, delta=0.05, alpha=0.05, power=0.8, two_sided=True) # Output: sample size calculation

Unnamed: 0,metric,baseline,detectible size,sample size needed group 1,sample size needed group 2
0,% bounce,14.10%,5%,39041,39041
1,% idbk_save,52.25%,5%,5718,5718
2,% pro_contact,0.21%,5%,3097807,3097807
3,AVG idbk_saves,2.0666,5%,38399,38399
4,AVG photo_views,3.0142,5%,68461,68461
5,AVG pro_contacts,0.0048,5%,5473297,5473297


# 2. Statistical tests for aggregated metrics

### input: dataframe that has the following types of columns
        1. Unit of analysis, total units participarting in the test, e.g. users, visitors, sessions, etc. 
           This is ONE column, and shall be named as "users", or "users_xxx"
        2. Converted units, for proportional metric z-test, e.g. users who spent, visitors who contacted pro, sessions with clicks, etc. 
           This can be multiple columns, and shall be named consistent with 1. unit of analysis, but with underscore followed with a description of the metrics, e.g."users_spending"
        3. Mean metrics, for mean metric t-test, e.g. average GMV.
           This can be multiple columns, and shall be named as "mean_xxx"
        4. Standard deviation of the mean metrics, for mean metric t-test, e.g. std GMV. 
           This needs to have the same number of columns as the mean metric columns, and be names consistently with the mean metrics as "std_xxx"
        5. Test group that has the names of each group. 
           This is ONE column
### function: 
       compare_multiple_groups(df, col_N, hue, details='xshort', col_width=110, output='color-coded')
       df: input dataframe
       col_N: unit of analysis column name (1 above)
       hue: test group column name (5 above)
       details: 'xshort' outputs percentage difference along with 'sig/not sig'; 'xlong' outputs percentage difference, confidence interval and p-value
       output: 'raw' outputs a dataframe table with the results; 'color-coded' outputs results table with significant positives in green and significant negatives in red. 

In [6]:
# widen the cell 
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))


from statsmodels.stats.proportion import proportions_ztest
from scipy import stats
def z_test_prop_agg(n_ctr, n_exp, x_ctr, x_exp):
    p_ctr = x_ctr/n_ctr
    p_exp = x_exp/n_exp
    p_pool = (x_ctr + x_exp)/(n_ctr + n_exp)
    se_pool = np.sqrt(p_pool * (1-p_pool) * (1/n_ctr + 1/n_exp))
    z_score = (p_exp - p_ctr)/se_pool
    pval = stats.norm.sf(abs(z_score))*2
    diff = p_exp - p_ctr
    lower_CI = diff-1.96*se_pool
    higher_CI = diff+1.96*se_pool
    diff_perc = diff/(p_ctr+1e-12) * 100
    lower_CI_perc = lower_CI/(p_ctr+1e-12) * 100
    higher_CI_perc = higher_CI/(p_ctr+1e-12) * 100
    return n_ctr, n_exp, p_ctr, p_exp, diff, lower_CI, higher_CI, diff_perc, lower_CI_perc, higher_CI_perc, pval 
def t_test_mean_agg(n_ctr, n_exp, mean_ctr, mean_exp, std_ctr, std_exp):  
    diff = mean_exp - mean_ctr    
    std_pool = np.sqrt(((n_ctr-1)*std_ctr*std_ctr + (n_exp-1)*std_exp*std_exp)/(n_ctr+n_exp-2))
    se_pool = std_pool * np.sqrt(1/n_ctr + 1/n_exp)
    dof = n_ctr+n_exp-2    
    t_score = diff / se_pool
    pval = stats.t.sf(np.abs(t_score), dof)*2
    lower_CI = diff-1.96*se_pool
    higher_CI = diff+1.96*se_pool
    diff_perc = diff/(mean_ctr+1e-12) * 100
    lower_CI_perc = lower_CI/(mean_ctr+1e-12) * 100
    higher_CI_perc = higher_CI/(mean_ctr+1e-12) * 100
    return n_ctr, n_exp, mean_ctr, mean_exp, diff, lower_CI, higher_CI, diff_perc, lower_CI_perc, higher_CI_perc, pval 
# Highlight if difference is significant
def highlight_xlong(s):
    if s['% diff'] == 'N/A':
        return ['color:black']*3    
    elif float(s['% diff'].split('p=')[1]) < 0.05 and float(s['% diff'].split('%')[0]) < 0:
        return ['color:black']*2 + ['color: red']*1    
    elif float(s['% diff'].split('p=')[1]) < 0.05 and float(s['% diff'].split('%')[0]) > 0:
        return ['color:black']*2 + ['color: green']*1    
    else:
        return ['color:black']*3 
def highlight_xshort(s):
    if s['% diff'] == 'N/A':
        return ['color:black']*3    
    elif s['% diff'].split('%')[1].strip().strip('(').strip(')') == 'sig' and float(s['% diff'].split('%')[0]) < 0:
        return ['color:black']*2 + ['color: red']*1    
    elif s['% diff'].split('%')[1].strip().strip('(').strip(')') == 'sig' and float(s['% diff'].split('%')[0]) > 0:
        return ['color:black']*2 + ['color: green']*1    
    else:
        return ['color:black']*3  
def output_format(results, ctr_name, exp_name, metric_type, details):
    N_ctr, N_exp, metric_ctr, metric_exp, diff, lower_CI, higher_CI, diff_perc, lower_CI_perc, higher_CI_perc, pval = results
    if diff_perc > 10000:
        diff_perc = np.inf
    elif diff_perc < -10000:
        diff_perc = -np.inf
    if lower_CI_perc > 10000:
        lower_CI_perc = np.inf
    elif lower_CI_perc < -10000:
        lower_CI_perc = -np.inf
    if higher_CI_perc > 10000:
        higher_CI_perc = np.inf      
    elif higher_CI_perc < -10000:
        higher_CI_perc = -np.inf 
    output_dict = dict()  
    output_dict[ctr_name] = '{0:.4f}'.format(metric_ctr) if metric_type == 'mean' \
                                            else '{0:.2f}%'.format(metric_ctr*100) if metric_type == 'proportion' \
                                            else 'error'
    output_dict[exp_name] = '{0:.4f}'.format(metric_exp) if metric_type == 'mean' \
                                            else '{0:.2f}%'.format(metric_exp*100) if metric_type == 'proportion' \
                                            else 'error'
    if details == 'xshort':
        output_dict['% diff'] = '{0:.1f}% ({1})'.format(diff_perc, 'sig' if pval<0.05 else 'not sig')  
    elif details == 'xlong':
        output_dict['% diff'] = '{0:.1f}% [{1:.1f}%, {2:.1f}%], p={3:.3f}'.format(diff_perc, lower_CI_perc, higher_CI_perc, pval)         
    return output_dict 
def output_table(df, col_N, cols_X, cols_mean, cols_std, hue, ctr_name='Control', exp_name='variant', details='xshort', col_width=110, output='color-coded'):
    units = col_N.split('_')[0] if '_' in col_N else col_N 
    n_ctr = df[df[hue]==ctr_name][col_N].values[0]
    n_exp = df[df[hue]==exp_name][col_N].values[0] 
    
    table = pd.DataFrame(columns=['metric', ctr_name, exp_name, '% diff'])
    table = table.append({'metric': '{} (unit of analysis)'.format(units), ctr_name:n_ctr, exp_name:n_exp, '% diff': 'N/A'}, ignore_index=True)
    
    for col_X in cols_X:  
        x_ctr = df[df[hue]==ctr_name][col_X].values[0]
        x_exp = df[df[hue]==exp_name][col_X].values[0] 
        results = z_test_prop_agg(n_ctr, n_exp, x_ctr, x_exp)  
        out_dict = output_format(results, ctr_name, exp_name, 'proportion', details)
        out_dict['metric'] = '% {}'.format(col_X.replace(units+'_', ''))
        table = table.append(out_dict, ignore_index=True)
    for col_mean, col_std in zip(cols_mean, cols_std): 
        mean_ctr = df[df[hue]==ctr_name][col_mean].values[0]
        mean_exp = df[df[hue]==exp_name][col_mean].values[0]
        std_ctr = df[df[hue]==ctr_name][col_std].values[0]
        std_exp = df[df[hue]==exp_name][col_std].values[0]  
        results = t_test_mean_agg(n_ctr, n_exp, mean_ctr, mean_exp, std_ctr, std_exp)  
        out_dict = output_format(results, ctr_name, exp_name, 'mean', details)
        out_dict['metric'] = 'AVG {}'.format(col_mean.replace('mean_', ''))
        table = table.append(out_dict, ignore_index=True)   
    table.set_index('metric', inplace=True)
    table.rename_axis(None, inplace=True)
    if details == 'xlong':
        if output == 'raw':
            return table
        elif output == 'color-coded':
            return table.style.apply(highlight_xlong, axis=1).\
                        set_properties(subset=['% diff'], **{'width': '220px'}).\
                        set_properties(subset=[ctr_name], **{'width': '{}px'.format(col_width)}).\
                        set_properties(subset=[exp_name], **{'width': '{}px'.format(col_width)})
    elif details == 'xshort':
        if output == 'raw':
            return table
        elif output == 'color-coded':
            return table.style.apply(highlight_xshort, axis=1).\
                        set_properties(subset=['% diff'], **{'width': '90px'}).\
                        set_properties(subset=[ctr_name], **{'width': '{}px'.format(col_width)}).\
                        set_properties(subset=[exp_name], **{'width': '{}px'.format(col_width)})#.\
                        #set_table_styles([dict(selector='th', props=[('text-align', 'left')] ) ])

def color_red_or_green(cell, details):
    if details == 'xshort':
        if isinstance(cell, str) and 'sig' in cell:
            if cell.split('%')[1].strip().strip('(').strip(')') == 'sig' and float(cell.split('%')[0]) < 0:
                color = 'red'
            elif cell.split('%')[1].strip().strip('(').strip(')') == 'sig' and float(cell.split('%')[0]) > 0:
                color = 'green'
            else:
                color = 'black'
        else: 
            color = 'black'
    elif details == 'xlong':
        if isinstance(cell, str) and 'p=' in cell:
            if float(cell.split('p=')[1]) < 0.05 and float(cell.split('%')[0]) < 0:
                color = 'red'
            elif float(cell.split('p=')[1]) < 0.05 and float(cell.split('%')[0]) > 0:
                color = 'green'
            else:
                color = 'black'
        else: 
            color = 'black'        
    return 'color: %s' % color

def compare_multiple_groups(df, col_N, hue, details='xshort', col_width=110, output='color-coded'):
    df = df.copy()
    col_N = col_N
    cols_X = []
    cols_mean = []
    cols_std = []
    for column in df.columns: 
        if column.startswith(col_N.split('_')[0] if '_' in col_N else col_N) and column != col_N:
            cols_X.append(column)
        elif column.startswith('mean'):
            cols_mean.append(column)
        elif column.startswith('std'):
            cols_std.append(column)
    cols_X = sorted(cols_X)
    cols_mean = sorted(cols_mean)
    cols_std = sorted(cols_std)
    if len(cols_mean) != len(cols_std):
        print("Error! Mean and std columns are not matching!")
    for col_mean, col_std in zip(cols_mean, cols_std):
        if col_mean.replace('mean_','') != col_std.replace('std_', ''):
            print("Error! Mean and std columns are not matching!")
    groups = sorted(df[hue].value_counts().index.to_list())
    n = len(groups)
    comps = {}
    for i in range(n-1):
        for j in range(i+1, n):
            comp = output_table(df, col_N, cols_X, cols_mean, cols_std, hue, ctr_name=groups[i], exp_name=groups[j], 
                                                      details=details, col_width=col_width, output='raw')
            comp = comp.rename(columns={'% diff':'% diff ({} - {})'.format(groups[j], groups[i])})
            for col in comp.columns:
                comps[col] = comp[[col]]
    cols_all = list(comps.keys())
    cols_diff = [x for x in cols_all if x.startswith('%')]
    cols_val = list(set(cols_all) - set(cols_diff))
    cols_diff = sorted(cols_diff)
    cols_val = sorted(cols_val)
    
    comps_table = pd.DataFrame()
    for col in cols_val:
        comps_table = pd.concat([comps_table, comps[col]], axis=1)
    for col in cols_diff:
        comps_table = pd.concat([comps_table, comps[col]], axis=1)  
    return comps_table if output=='raw' else comps_table.style.applymap(color_red_or_green, details=details)

## Example

In [7]:
test_results = impala_run('''
SELECT 
    REGEXP_EXTRACT(test_set, 'save_confirmation_test=([a-z0-9]+)', 1) AS test_group, 
    COUNT(session_id) AS sessions_all, 
    SUM(bounce_sessions) AS sessions_bounce, 
    COUNT(CASE WHEN add_to_ideabook>0 THEN session_id ELSE NULL END) AS sessions_idbk_save, 
    COUNT(CASE WHEN contact_pro>0 THEN session_id ELSE NULL END) AS sessions_pro_contact, 
    AVG(COALESCE(add_to_ideabook,0)) AS mean_idbk_saves, 
    AVG(COALESCE(contact_pro,0)) AS mean_pro_contacts,
    AVG(COALESCE(view_photo,0)) AS mean_photo_views, 
    STDDEV(COALESCE(add_to_ideabook,0)) AS std_idbk_saves, 
    STDDEV(COALESCE(contact_pro,0)) AS std_pro_contacts,
    STDDEV(COALESCE(view_photo,0)) AS std_photo_views    
FROM l2.session_analytics 
WHERE dt > '2020-01-29'
AND (REGEXP_EXTRACT(test_set, 'save_confirmation_test=([a-z]+)', 1) IS NOT NULL AND REGEXP_EXTRACT(test_set, 'save_confirmation_test=([a-z0-9]+)', 1) != '')
GROUP BY 1
ORDER BY 1; ''')

In [8]:
test_results # input

Unnamed: 0,test_group,sessions_all,sessions_bounce,sessions_idbk_save,sessions_pro_contact,mean_idbk_saves,mean_pro_contacts,mean_photo_views,std_idbk_saves,std_pro_contacts,std_photo_views
0,off,301499,42411,157412,636,2.061144,0.005002,3.092345,5.27209,0.150868,10.299208
1,v1,297578,41928,155576,602,2.053952,0.004651,2.971255,4.982848,0.138249,9.728611
2,v2,293742,41566,153544,613,2.085013,0.004678,2.977576,5.069766,0.133276,9.814489


In [13]:
compare_multiple_groups(test_results, 'sessions_all', 'test_group', details='xshort') # output: test analysis short format

Unnamed: 0,off,v1,v2,% diff (v1 - off),% diff (v2 - off),% diff (v2 - v1)
sessions (unit of analysis),301499,297578,293742,,,
% bounce,14.07%,14.09%,14.15%,0.2% (not sig),0.6% (not sig),0.4% (not sig)
% idbk_save,52.21%,52.28%,52.27%,0.1% (not sig),0.1% (not sig),-0.0% (not sig)
% pro_contact,0.21%,0.20%,0.21%,-4.1% (not sig),-1.1% (not sig),3.2% (not sig)
AVG idbk_saves,2.0611,2.0540,2.0850,-0.3% (not sig),1.2% (not sig),1.5% (sig)
AVG photo_views,3.0923,2.9713,2.9776,-3.9% (sig),-3.7% (sig),0.2% (not sig)
AVG pro_contacts,0.0050,0.0047,0.0047,-7.0% (not sig),-6.5% (not sig),0.6% (not sig)


In [14]:
compare_multiple_groups(test_results, 'sessions_all', 'test_group', details='xlong')  # output: test analysis long format

Unnamed: 0,off,v1,v2,% diff (v1 - off),% diff (v2 - off),% diff (v2 - v1)
sessions (unit of analysis),301499,297578,293742,,,
% bounce,14.07%,14.09%,14.15%,"0.2% [-1.1%, 1.4%], p=0.798","0.6% [-0.7%, 1.9%], p=0.353","0.4% [-0.8%, 1.7%], p=0.502"
% idbk_save,52.21%,52.28%,52.27%,"0.1% [-0.3%, 0.6%], p=0.583","0.1% [-0.4%, 0.6%], p=0.632","-0.0% [-0.5%, 0.5%], p=0.945"
% pro_contact,0.21%,0.20%,0.21%,"-4.1% [-15.0%, 6.8%], p=0.461","-1.1% [-12.1%, 10.0%], p=0.849","3.2% [-8.3%, 14.6%], p=0.588"
AVG idbk_saves,2.0611,2.0540,2.0850,"-0.3% [-1.6%, 0.9%], p=0.587","1.2% [-0.1%, 2.4%], p=0.075","1.5% [0.3%, 2.8%], p=0.018"
AVG photo_views,3.0923,2.9713,2.9776,"-3.9% [-5.6%, -2.3%], p=0.000","-3.7% [-5.4%, -2.1%], p=0.000","0.2% [-1.5%, 1.9%], p=0.804"
AVG pro_contacts,0.0050,0.0047,0.0047,"-7.0% [-21.7%, 7.6%], p=0.348","-6.5% [-21.0%, 8.0%], p=0.380","0.6% [-14.3%, 15.5%], p=0.940"
