# Test Pilot Event-Based Installation Counts
The output of this notebook is used to fill in the installation counts from testpilot.firefox.com. The way we calculate these counts is:
1. Count all the enable and disable events since the switchover to event-based pings
2. Add all the enable and the disable to our baseline numbers (the approximate number of installations we had before moving to event-based pings)
3. Output these numbers to a publicly-accessible json file (available at https://analysis-output.telemetry.mozilla.org/testpilot/data/installation-counts/latest.json) -- note we output the results to latest.json as well as &lt;timestamp&gt;.json

Please note these numbers are calculated for the sole purpose of giving an approximation to end-users and should absolutely not be used for decision-making.

In [15]:
sc.defaultParallelism

256

In [16]:
from moztelemetry import get_pings, get_pings_properties
from collections import defaultdict
import json
import time
import boto3
from boto3.s3.transfer import S3Transfer

FRACTION = 1

In [17]:
# First, grab ids for all the clients who have sent us new-style pings

testpilot_pings = get_pings(sc, doc_type="testpilot", app="Firefox", fraction=FRACTION)
testpilot_fields = ["payload/version", "clientId", "payload/tests", "payload/events"]
testpilot_subset = get_pings_properties(testpilot_pings, testpilot_fields)
new_ping_subset = testpilot_subset.filter(lambda t: t['payload/version'] != 1)
active_clients = new_ping_subset.map(lambda t: (t["clientId"], True)).distinct()
active_clients.count()

43243

In [18]:
# Baseline installs, approximately how many installations we had before the TxP event-based ping changes
baseline_installations = {
    u'@activity-streams': 22000,
    u'@testpilot-addon': 30000,  # This number is *totally* made up, do not use for anything important
    u'tabcentertest1@mozilla.com': 20000,
    u'universal-search@mozilla.com': 33500,
    u'wayback_machine@mozilla.org': 3400
}

In [19]:
# Now we want to grab enabled/disabled events from the new-style pings
new_style_events = new_ping_subset.flatMap(lambda t: t.get('payload/events', []))

In [20]:
enable_events = new_style_events\
                    .filter(lambda t: t.get('event', None) == 'enabled')\
                    .map(lambda t: t['object'])\
                    .countByValue()
enable_events

defaultdict(int,
            {u'@activity-streams': 26632,
             u'@foo-bar': 2,
             u'@testpilot-addon': 16786,
             u'@x16': 4,
             u'blok@mozilla.org': 66,
             u'tabcentertest1@mozilla.com': 11194,
             u'universal-search@mozilla.com': 4110,
             u'wayback_machine@mozilla.org': 17274})

In [21]:
disable_events = new_style_events\
                    .filter(lambda t: t.get('event', None) == 'disabled')\
                    .map(lambda t: t['object'])\
                    .countByValue()

In [22]:
# negate disable event counts
for k,v in disable_events.items():
    disable_events[k] = v * -1

disable_events

defaultdict(int,
            {u'@activity-streams': -1493,
             u'@foo-bar': -2,
             u'@testpilot-addon': -2156,
             u'@x16': -4,
             u'blok@mozilla.org': -47,
             u'tabcentertest1@mozilla.com': -2639,
             u'universal-search@mozilla.com': -1073,
             u'wayback_machine@mozilla.org': -756})

In [23]:
# Add everything together

def join_dicts(dicts):
    joined_dict = defaultdict(list)
    for dictionary in dicts:
        for k,v in dictionary.items():
            joined_dict[k].append(v)
    return joined_dict

final_counts = join_dicts([baseline_installations, enable_events, disable_events])
final_counts

defaultdict(list,
            {u'@activity-streams': [22000, 26632, -1493],
             u'@foo-bar': [2, -2],
             u'@testpilot-addon': [30000, 16786, -2156],
             u'@x16': [4, -4],
             u'blok@mozilla.org': [66, -47],
             u'tabcentertest1@mozilla.com': [20000, 11194, -2639],
             u'universal-search@mozilla.com': [33500, 4110, -1073],
             u'wayback_machine@mozilla.org': [3400, 17274, -756]})

In [24]:
for k,v in final_counts.items():
    final_counts[k] = sum(v)

final_counts

defaultdict(list,
            {u'@activity-streams': 47139,
             u'@foo-bar': 0,
             u'@testpilot-addon': 44630,
             u'@x16': 0,
             u'blok@mozilla.org': 19,
             u'tabcentertest1@mozilla.com': 28555,
             u'universal-search@mozilla.com': 36537,
             u'wayback_machine@mozilla.org': 19918})

In [25]:
# Output this to json to write to file
counts_json = json.dumps(final_counts)

In [26]:
# Not really necessary, but we're saving historical output to a timestamped file
timestamp = int(time.time())
timestamp

1473194862

In [27]:
timestamped_filename = "{}.json".format(timestamp)
latest_filename = "latest.json"
bucket = "telemetry-public-analysis-2"
path = "testpilot/data/installation-counts/"
timestamped_s3_key = path + timestamped_filename
latest_s3_key = path + latest_filename

with open(latest_filename, 'w') as f:
    f.write(counts_json)

In [28]:
client = boto3.client('s3', 'us-west-2')
transfer = S3Transfer(client)
transfer.upload_file(latest_filename, bucket, timestamped_s3_key, extra_args={'ContentType':'application/json'})
transfer.upload_file(latest_filename, bucket, latest_s3_key, extra_args={'ContentType':'application/json'})