In [1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py

from pprint import pprint
from moztelemetry.spark import get_pings
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client
from datetime import date, timedelta
from __future__ import division

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
sc.defaultParallelism

80

In [4]:
yesterday = (date.today() - timedelta(1)).strftime("%Y%m%d")
pings = get_pings(sc, app="Firefox", channel="release", submission_date=yesterday, fraction=1)
pings.count()

5373921

What keys are in a ping?

In [5]:
ping = json.loads(pings.first())
ping.keys()

[u'keyedHistograms',
 u'info',
 u'slowSQL',
 u'ver',
 u'log',
 u'fileIOReports',
 u'histograms',
 u'lateWrites',
 u'clientID',
 u'addonHistograms',
 u'UIMeasurements',
 u'threadHangStats',
 u'simpleMeasurements',
 u'chromeHangs',
 u'slowSQLStartup',
 u'addonDetails']

What's included in `addonDetails`?

In [6]:
ping['addonDetails']

{u'GMP': {u'gmp-gmpopenh264': {u'applyBackgroundUpdates': 1,
   u'userDisabled': False,
   u'version': u'1.3'}},
 u'XPI': {u'{23fcfd51-4958-4f00-80a3-ae97e717ed8b}': {u'location': u'app-system-local',
   u'scan_MS': 0},
  u'{972ce4c6-7e08-4474-a285-3208198ce6fd}': {u'location': u'app-global',
   u'scan_MS': 1,
   u'scan_items': 3}}}

We can get the list of enabled addons like so:

In [7]:
ping['addonDetails']['XPI']
ping['addonDetails']['XPI'].keys()

[u'{972ce4c6-7e08-4474-a285-3208198ce6fd}',
 u'{23fcfd51-4958-4f00-80a3-ae97e717ed8b}']

Let's filter down to those pings that actually have XPI addons.  What does one look like?

In [37]:
#sample = pings.sample(withReplacement=False, fraction=0.001)
#sample.count()

subset = get_pings_properties(pings, ["clientID", "info/OS", "addonDetails/XPI"])
#print subset.count() #e.g. 5373921 total pings
subset = get_one_ping_per_client(subset)
#print subset.count() #e.g. 1072825 total pings
#pprint(subset.first())

5373921
1072825
{'addonDetails/XPI': {u'langpack-ja@firefox.mozilla.org': {u'creator': u'Mozilla Japanese L10N Community',
                                                           u'location': u'app-profile',
                                                           u'name': u'Japanese Language Pack',
                                                           u'scan_MS': 0,
                                                           u'scan_items': 1,
                                                           u'shutdown_MS': 0,
                                                           u'startup_MS': 10},
                      u'tmbepff@trendmicro.com': {u'creator': u'Trend Micro',
                                                  u'location': u'winreg-app-global',
                                                  u'name': u'Trend Micro BEP Firefox Extension',
                                                  u'scan_MS': 8,
                                                  u'scan_item

In [46]:
cached = subset.cache()

Let's work with the following:

- Only check those pings with addon details
- Get a list of the addons per ping and tally them
- Combine tallies of all addons by key to get counts by addon


In [55]:
def hasAddons(ping):
    return "addonDetails/XPI" in ping

def getAddons(ping):
    return ping["addonDetails/XPI"].keys()

addon_list = cached.filter(hasAddons).flatMap(getAddons).map(lambda x: (x,1)).reduceByKey(lambda x, y: x + y)
addon_counts = addon_list.map(lambda x:(x[1],x[0])).sortByKey(ascending=False)
addon_counts.take(5)

[(1072718, u'{972ce4c6-7e08-4474-a285-3208198ce6fd}'),
 (267263, u'{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}'),
 (184000, u'{20a82645-c095-46ed-80e3-08825760534b}'),
 (165222, u'wrc@avast.com'),
 (145287, u'{82AF8DCA-6DE9-405D-BD5E-43525BDAD38A}')]

In [None]:
Finally, let's group by OS:

In [None]:
grouped = cached.map(lambda p: (p["info/OS"], p["simpleMeasurements/firstPaint"])).groupByKey().collectAsMap()

## What are the top 50 addons by count?

In [30]:
addon_counts.take(50)

[(5351021, u'{972ce4c6-7e08-4474-a285-3208198ce6fd}'),
 (1274646, u'{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}'),
 (896094, u'{20a82645-c095-46ed-80e3-08825760534b}'),
 (857040, u'wrc@avast.com'),
 (730088, u'{82AF8DCA-6DE9-405D-BD5E-43525BDAD38A}'),
 (688965, u'{e4f94d1e-2f53-401e-8885-681602c0ddd8}'),
 (349733, u'{b9db16a4-6edc-47ec-a1f4-b86292ed211d}'),
 (323195, u'mozilla_cc@internetdownloadmanager.com'),
 (274146, u'{2D3F3651-74B9-4795-BDEC-6DA2F431CB62}'),
 (244433, u'web2pdfextension@web2pdf.adobedotcom'),
 (243815, u'avg@toolbar'),
 (226237, u'smartwebprinting@hp.com'),
 (195623, u'abs@avira.com'),
 (194851, u'url_advisor@kaspersky.com'),
 (194842, u'virtual_keyboard@kaspersky.com'),
 (191252, u'content_blocker@kaspersky.com'),
 (152924, u'jqs@sun.com'),
 (144599, u'anti_banner@kaspersky.com'),
 (144452, u'online_banking@kaspersky.com'),
 (136718, u'{73a6fe31-595d-460b-a920-fcc0f8843232}'),
 (133506, u'{DDC359D1-844A-42a7-9AA1-88A850A938A8}'),
 (132700, u'{4ED1F68A-5463-4931-9384-8

In [34]:
## What are the top addons as a proportion of all submitted pings 

In [33]:
total = pings.count()
addon_props = addon_counts.map(lambda x: (x[0]/total, x[1]))
addon_props.take(50)

[(0.9957386794483953, u'{972ce4c6-7e08-4474-a285-3208198ce6fd}'),
 (0.237191056586057, u'{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}'),
 (0.1667486366100283, u'{20a82645-c095-46ed-80e3-08825760534b}'),
 (0.15948131727280696, u'wrc@avast.com'),
 (0.1358575982043651, u'{82AF8DCA-6DE9-405D-BD5E-43525BDAD38A}'),
 (0.12820527134656426, u'{e4f94d1e-2f53-401e-8885-681602c0ddd8}'),
 (0.06507966901634765, u'{b9db16a4-6edc-47ec-a1f4-b86292ed211d}'),
 (0.060141375357025156, u'mozilla_cc@internetdownloadmanager.com'),
 (0.051014147770315196, u'{2D3F3651-74B9-4795-BDEC-6DA2F431CB62}'),
 (0.0454850378336414, u'web2pdfextension@web2pdf.adobedotcom'),
 (0.04537003800390813, u'avg@toolbar'),
 (0.04209905579185105, u'smartwebprinting@hp.com'),
 (0.0364022842911163, u'abs@avira.com'),
 (0.03625862754588317, u'url_advisor@kaspersky.com'),
 (0.03625695279108122, u'virtual_keyboard@kaspersky.com'),
 (0.03558891170897376, u'content_blocker@kaspersky.com'),
 (0.028456689259108946, u'jqs@sun.com'),
 (0.026907541067