In [1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from moztelemetry.dataset import Dataset

%matplotlib inline

In [2]:
sc.defaultParallelism

32

We can look at the schema of the dataset we are interested in:

In [3]:
Dataset.from_source('telemetry').schema

[u'submissionDate',
 u'sourceName',
 u'sourceVersion',
 u'docType',
 u'appName',
 u'appUpdateChannel',
 u'appVersion',
 u'appBuildId']

Let's create a Dataset of Telemetry submissions for a given submission date:

In [4]:
pings_dataset = (
    Dataset.from_source('telemetry')
    .where(docType='main')
    .where(submissionDate=lambda x: int(x) == 20181020)
    .where(appUpdateChannel="nightly")
)

Grab all the samples, but only properties that will be interesting to correlate (in particular we omit 'payload').

In [5]:
pings = (
    pings_dataset
    .select(environment='environment',
            application='application',
            clientId='clientId',
            meta='meta')
    .records(sc, sample=1.0)
)

fetching 6159.86539MB in 21560 files...


Filter to the experiment disabled branch

In [6]:
exp_pings = (
    pings.filter(lambda p: "experiments" in p["environment"])
         .filter(lambda p: "prefflip-webrender-v1-2-1492568" in p["environment"]["experiments"])
         .filter(lambda p: p["environment"]["experiments"]["prefflip-webrender-v1-2-1492568"]["branch"] == "disabled")
)

Filter with missing pref

In [None]:
missing_pref = (
    exp_pings.filter(lambda p: "gfx.webrender.all.qualified" not in p["environment"]["settings"]["userPrefs"])
)
cached = missing_pref.cache()
cached.count()

Inspect one

In [None]:
inspect = cached.take(1)
inspect

In [None]:
def interesting_prop(ping):
    return ping["environment"]["system"]["gfx"]["features"]["compositor"]

cached.map(interesting_prop).countByValue()


Just some fiddling...

In [7]:
have_pref = (
    exp_pings.filter(lambda p: "gfx.webrender.all.qualified" in p["environment"]["settings"]["userPrefs"])
)
have_cached = have_pref.cache()
have_cached.count()

6347

In [11]:
have_cached.filter(lambda p: p["environment"]["system"]["gfx"]["features"]["compositor"] == "webrender").take(1)

[{'application': {u'architecture': u'x86-64',
   u'buildId': u'20181004224156',
   u'channel': u'nightly',
   u'displayVersion': u'64.0a1',
   u'name': u'Firefox',
   u'platformVersion': u'64.0a1',
   u'vendor': u'Mozilla',
   u'version': u'64.0a1',
   u'xpcomAbi': u'x86_64-msvc'},
  'clientId': u'2491447e-fb44-4c1b-b922-08228b622d32',
  'environment': {u'addons': {u'activeAddons': {u'adbhelper@mozilla.org': {u'appDisabled': False,
      u'blocklisted': False,
      u'description': u'An add-on to ease connecting to Firefox for Android.',
      u'foreignInstall': False,
      u'hasBinaryComponents': False,
      u'installDay': 17395,
      u'isSystem': False,
      u'isWebExtension': False,
      u'multiprocessCompatible': True,
      u'name': u'ADB Helper',
      u'scope': 1,
      u'signedState': 4,
      u'type': u'extension',
      u'updateDay': 17752,
      u'userDisabled': False,
      u'version': u'0.12.1'},
     u'firefox@getpocket.com': {u'appDisabled': False,
      u'blocklist

In [None]:
have_cached.filter(lambda p: interesting_prop(p) == "webrender").take(1)

In [None]:
client_dataset = (
    Dataset.from_source('telemetry')
    .where(docType='main')
    .where(submissionDate=lambda x: int(x) > 20181010)
    .where(appUpdateChannel="nightly")
)

In [None]:
client_pings = (
    client_dataset
    .select(experiments='environment.experiments',
            clientId='clientId')
    .records(sc, sample=1.0)
    .map(lambda p: p["clientId"] == "2491447e-fb44-4c1b-b922-08228b622d32")
)

In [None]:
client_pings.count()