In [1]:
from jetstream.bigquery_client import BigQueryClient
import numpy as np
from statsmodels.stats.proportion import proportion_confint

In [2]:
DATASET = 'dberry_simulated_AA_tests_00'

In [3]:
client = BigQueryClient(project='moz-fx-data-experiments',dataset=DATASET)

In [4]:
sim_1_results_df = client.table_to_dataframe('statistics_more_from_mozilla_96_overall_1')

In [5]:
comparisons_df = sim_1_results_df.loc[~sim_1_results_df.comparison.isnull()].copy()

In [6]:
comparisons_df

Unnamed: 0,metric,statistic,parameter,branch,comparison,comparison_to_branch,ci_width,point,lower,upper,segment,analysis_basis
1044,uri_count,deciles,0.300000000,treatment-b,difference,control,0.95,0.15436,-6.0,6.0,all,enrollments
1045,uri_count,deciles,0.400000000,treatment-b,difference,control,0.95,-1.34730,-11.0,9.0,all,enrollments
1046,uri_count,deciles,0.500000000,treatment-b,difference,control,0.95,2.28590,-15.0,19.0,all,enrollments
1047,uri_count,deciles,0.600000000,treatment-b,difference,control,0.95,3.37460,-20.0,28.0,all,enrollments
1048,uri_count,deciles,0.700000000,treatment-b,difference,control,0.95,8.29016,-27.0,44.0,all,enrollments
...,...,...,...,...,...,...,...,...,...,...,...,...
1403,ad_clicks,deciles,0.800000000,treatment-b,relative_uplift,control,0.95,0.00000,0.0,0.0,all,enrollments
1404,ad_clicks,deciles,0.900000000,treatment-b,relative_uplift,control,0.95,0.00000,0.0,0.0,all,enrollments
1405,ad_clicks,deciles,0.700000000,treatment-a,relative_uplift,control,0.95,0.00000,0.0,0.0,all,enrollments
1406,ad_clicks,deciles,0.800000000,treatment-a,relative_uplift,control,0.95,0.00000,0.0,0.0,all,enrollments


In [7]:
def sign(a):
    return (a > 0) - (a < 0)

In [8]:
comparisons_df.loc[:, 'different_signs'] = (comparisons_df.lower.apply(sign) != comparisons_df.upper.apply(sign))
comparisons_df.loc[:, 'both_zero'] = np.isclose(comparisons_df.lower.apply(sign) & comparisons_df.upper.apply(sign), 0)
comparisons_df.loc[:, 'CI_contains_zero'] = comparisons_df[['different_signs', 'both_zero']].apply(lambda row: 'Yes' if row.different_signs else ('Both-zero' if row.both_zero else 'No'), axis = 'columns')

In [9]:
comparisons_df_not_both_zero = comparisons_df.loc[comparisons_df.CI_contains_zero != 'Both-zero'].copy()

In [10]:
cicz_vc = comparisons_df_not_both_zero.CI_contains_zero.value_counts()
display(cicz_vc)
n_ci_contains_zero = cicz_vc.loc[cicz_vc.index == 'Yes'][0]
n_ci_not_contains_zero = cicz_vc.loc[cicz_vc.index == 'No'][0]
n_ci_total = n_ci_contains_zero + n_ci_not_contains_zero

Yes    180
No       6
Name: CI_contains_zero, dtype: int64

In [11]:
prop_ci_contains_zero = n_ci_contains_zero / n_ci_total
'{:0.2f}% of the comparison metric confidence intervals are insignificant'.format(prop_ci_contains_zero*100)

'96.77% of the comparison metric confidence intervals are insignificant'

In [12]:
sim_ci_success_low, sim_ci_success_high = proportion_confint(n_ci_contains_zero, n_ci_total)
print('Our confidence interval for the proportion of insignificant CIs is ({:0.2f}%, {:0.2f}%)'.format(sim_ci_success_low*100, sim_ci_success_high*100))

Our confidence interval for the proportion of insignificant CIs is (94.24%, 99.31%)


In [13]:
if (sim_ci_success_low < 0.95) and (0.95 < sim_ci_success_high):
    print('Observed proportion of insignificant metrics IS NOT significantly different from null hypothesis of 95%')
else:
    print('Obserbed proportion of insignificant metrics IS significantly different from null hypothesis of 95%')

Observed proportion of insignificant metrics IS NOT significantly different from null hypothesis of 95%
