# CFR MozAnalysis Template
This is far from exhaustive but hopefully it will save you time and allow you to get a quick look faster.

In [0]:
# might need to change this
!pip install mozanalysis=='2020.4.1'

Collecting mozanalysis==2020.4.1
  Downloading https://files.pythonhosted.org/packages/30/d1/5b5a5adce605f559841f00dd5e39a9e0129cadfd58771953e213095034a0/mozanalysis-2020.4.1-py2.py3-none-any.whl
Installing collected packages: mozanalysis
Successfully installed mozanalysis-2020.4.1


In [0]:
from google.colab import auth
auth.authenticate_user()
print('Authenticated')

Authenticated


In [0]:
import mozanalysis.metrics.desktop as mmd
import mozanalysis.bayesian_stats.binary as mabsbin
import mozanalysis.frequentist_stats.bootstrap as mafsboot
import pandas as pd
from mozanalysis.experiment import Experiment
from mozanalysis.bq import BigQueryContext
from mozanalysis.metrics import DataSource, Metric, agg_any



In [0]:
# you need to change these things. or, keep them the same and you can see a real-world example.
CFR_BUCKET_ID = '"CFR_IMPORT_PASSWORD"'
CFR_MESSAGE_ID = '"CFR_EXP_IMPORT_PASSWORD"'
EXPERIMENT_SLUG = 'pref-activity-stream-password-import-cfr-release-74-74-bug-1617735'
REFERENCE_BRANCH = 'no-password-cfr'
TREATMENT_BRANCH = 'password-cfr'
START_DATE = '2020-03-10'
NUM_DATES_ENROLLMENT = 15
DATASET_ID = 'loines'
LAST_DATE_FULL_DATA = '2020-04-08'
TIME_SERIES_PERIOD = 'weekly'
ANALYSIS_START_DAYS=0
ANALYSIS_LENGTH_DAYS=15

In [0]:
# because CFR experiments further require a `bucket_id` and `message_id` field to narrow down the cfr campaign of interest,
# we have to declare the metrics here, rather than codify them in the MA repo
# note also that this will return a bolean yes/no did the client generate the event. 
# if you care about how many times each client was exposed, you will need to do something else
cfr_impression = Metric(
    name='cfr_impression',
    data_source=cfr,
    select_expr=agg_any("""event = 'IMPRESSION' 
    AND bucket_id = {}
    AND message_id = {}""".format(CFR_BUCKET_ID, CFR_MESSAGE_ID))
)

cfr_enable = Metric(
    name='cfr_enable',
    data_source=cfr,
    select_expr=agg_any("""event = 'ENABLE'
    AND bucket_id = {}
    AND message_id = {}""".format(CFR_BUCKET_ID, CFR_MESSAGE_ID))
)

cfr_dismiss = Metric(
    name='cfr_dismiss',
    data_source=cfr,
    select_expr=agg_any("""event = 'DISMISS'
    AND bucket_id = {}
    AND message_id = {}""".format(CFR_BUCKET_ID, CFR_MESSAGE_ID))
)


In [0]:
exp = Experiment(
    experiment_slug=EXPERIMENT_SLUG,
    start_date=START_DATE,
    num_dates_enrollment=NUM_DATES_ENROLLMENT
)

bq_context = BigQueryContext(
    dataset_id=DATASET_ID, 
)

res = exp.get_single_window_data(
    bq_context=bq_context,
    metric_list=[
        mmd.active_hours,
        mmd.uri_count,
        mmd.search_count,
        cfr_impression,
        cfr_enable,
        cfr_dismiss,
    ],
    last_date_full_data=LAST_DATE_FULL_DATA,
    analysis_start_days=ANALYSIS_START_DAYS,
    analysis_length_days=ANALYSIS_LENGTH_DAYS
)

Saved into 2020_04_08_pref_activity_stream_password_import_cfr_release_74_74_bug_1617735_40f7c384bdcb


In [0]:
# enrollments
res.groupby(['branch'])['client_id'].nunique()

branch
no-password-cfr    172804
password-cfr       173596
Name: client_id, dtype: int64

In [0]:
# example of bootstrapped CI on basic engagement metric
mafsboot.compare_branches(res, 'active_hours', threshold_quantile=0.9999, ref_branch_label=REFERENCE_BRANCH)

{'comparative': {'password-cfr': rel_uplift    0.005   -0.044567
                0.025   -0.039199
                0.5     -0.018957
                0.975    0.001756
                0.995    0.008123
                exp     -0.018911
  abs_uplift    0.005   -0.195024
                0.025   -0.170991
                0.5     -0.081822
                0.975    0.007507
                0.995    0.034654
                exp     -0.081856
  max_abs_diff  0.95     0.156505
  prob_win      NaN      0.036200
  dtype: float64}, 'individual': {'no-password-cfr': 0.005    4.231084
  0.025    4.249791
  0.5      4.314346
  0.975    4.382663
  0.995    4.403947
  mean     4.314682
  dtype: float64, 'password-cfr': 0.005    4.159688
  0.025    4.175788
  0.5      4.232729
  0.975    4.290892
  0.995    4.310199
  mean     4.232826
  dtype: float64}}

In [0]:
# what are the conversion rates of the CFR message (enables / impressions)
# (note that my example data doesn't make sense grouped by branch since the control branch weren't supposed to receive the CFR)
res.query("cfr_impression == True")\
  .groupby(['branch'])['client_id']\
  .agg([("clients_with_impressions", lambda client_id: client_id.nunique())])\
  .merge(res.query("cfr_enable == True")\
    .groupby(['branch'])['client_id']\
    .agg([("clients_enabled", lambda client_id: client_id.nunique())]),left_index=True, right_index=True)\
  .assign(enable_rate=lambda x: x['clients_enabled']/x['clients_with_impressions'])

Unnamed: 0_level_0,clients_with_impressions,clients_enabled,enable_rate
branch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no-password-cfr,182,3,0.016484
password-cfr,55614,530,0.00953


In [0]:
# what are the dismiss rates of the CFR message (dismisses / impressions)
res.query("cfr_impression == True")\
  .groupby(['branch'])['client_id']\
  .agg([("clients_with_impressions", lambda client_id: client_id.nunique())])\
  .merge(res.query("cfr_dismiss == True")\
    .groupby(['branch'])['client_id']\
    .agg([("clients_dismissed", lambda client_id: client_id.nunique())]),left_index=True, right_index=True)\
  .assign(dismiss_rate=lambda x: x['clients_dismissed']/x['clients_with_impressions'])

Unnamed: 0_level_0,clients_with_impressions,clients_dismissed,dismiss_rate
branch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no-password-cfr,182,4,0.021978
password-cfr,55614,1139,0.02048
