In [None]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from datetime import datetime, timedelta

from plotly.graph_objs import *
from moztelemetry import get_pings_properties, get_one_ping_per_client
from moztelemetry.dataset import Dataset

%matplotlib inline

In [None]:
sc.defaultParallelism

The FX_TAB_SWITCH_SPINNER_VISIBLE_LONG_MS probe was introduced and made opt-out on the September 8th, 2016 build. Let's make sure we only examine builds from that point and after.

In [None]:
probe_available = datetime(2016, 9, 8)
days_to_look_back = 180
start_date = max(probe_available, datetime.today() - timedelta(days=days_to_look_back)).strftime("%Y%m%d")
start_date

In [None]:
end_date = datetime.today().strftime("%Y%m%d")
end_date

In [None]:
pings = Dataset.from_source("telemetry") \
    .where(docType='main') \
    .where(appBuildId=lambda b: (b.startswith(start_date) or b > start_date) and (b.startswith(end_date) or b < end_date)) \
    .where(appUpdateChannel="nightly") \
    .records(sc, sample=1.0)

In [None]:
properties = ["clientId",
              "payload/histograms/FX_TAB_SWITCH_SPINNER_VISIBLE_LONG_MS",
              "environment/system/os/name",
              "application/buildId",
              "environment/settings/e10sEnabled"]

In [None]:
ping_props = get_pings_properties(pings, properties)

For now, we're focusing in on our Windows users, so let's filter out anything that's not Windows.

In [None]:
def windows_only(p):
    return p["environment/system/os/name"] == "Windows_NT"

windows_pings_only = ping_props.filter(windows_only)

def e10s_enabled_only(p):
    return p["environment/settings/e10sEnabled"]

e10s_enabled_on_windows_pings_only = windows_pings_only.filter(e10s_enabled_only)

To make it easier to group by both build ID and client ID, we'll map the histograms to a tuple of (build ID, client ID), and then reduce the histograms on that key to accumulate all pings that came in for the same client and the same build.

Note that if the client pinged but didn't have the histogram, the value in the key-value pair will be `None`.

In [None]:
e10s_enabled_on_windows_pings_only.first()

In [None]:
def keyed_by_build_and_client(ping):
    return ((ping["application/buildId"][:8], ping["clientId"]), ping["payload/histograms/FX_TAB_SWITCH_SPINNER_VISIBLE_LONG_MS"])

def none_is_subsumed(x, y):
    if x is None:
        return y
    if y is None:
        return x
    return x + y

grouped_spinners = e10s_enabled_on_windows_pings_only.map(keyed_by_build_and_client).reduceByKey(none_is_subsumed)


The histogram for the spinner looks like this:

```
 0        1.0
 1000     0.0
 2297     0.0
 5277     0.0
 12124    0.0
 27856    0.0
 64000    0.0
 dtype: float64,
```

And we've combined all of the client histograms by build ID. What that means is that `grouped_spinners` is keyed on (build ID, client ID) and the value is the sum of all of the spinner histograms (or `None` if the client never saw a spinner for that build). Next, we'll look at each spinner histogram sum, and bucket based on where we see values > 1.

We'll just use a pandas `Series` for the bucket structure.

In [None]:
def bucket_by_severity_per_client(spinner_pair):
    buildId = spinner_pair[0][0]
    hist = spinner_pair[1]
    named_index = ["unaffected",
                   "0ms - 999ms",
                   "1000ms - 2296ms",
                   "2297ms - 5276ms",
                   "5277ms - 12123ms",
                   "12124ms - 27855ms",
                   "27856ms - 63999ms",
                   "64000ms+"]
    
    severity = pd.Series([0, 0, 0, 0, 0, 0, 0, 0], index=named_index)

    if hist is None:
        severity[named_index[0]] = 1
    elif hist[64000] > 0:
        severity[named_index[7]] = 1
    elif hist[27856] > 0:
        severity[named_index[6]] = 1
    elif hist[12124] > 0:
        severity[named_index[5]] = 1
    elif hist[5277] > 0:
        severity[named_index[4]] = 1
    elif hist[2297] > 0:
        severity[named_index[3]] = 1
    elif hist[1000] > 0:
        severity[named_index[2]] = 1
    elif hist[0] > 0:
        severity[named_index[1]] = 1

    return (buildId, severity)

bucketed_spinners_keyed_by_build = grouped_spinners.map(bucket_by_severity_per_client)

In [None]:
final_tally = bucketed_spinners_keyed_by_build.reduceByKey(lambda x, y: x + y)

In [None]:
def to_percentages(build_severities):
    severities = build_severities[1]
    total_clients = severities.sum()
    return (build_severities[0], severities / total_clients)

percentages = final_tally.map(to_percentages).sortByKey()

In [None]:
final_result = percentages.collect()

In [None]:
import ujson as json
filename = "./output/severities_by_build_id.json"
final_result_json = json.dumps(final_result, ensure_ascii=False)

with open(filename, 'w') as f:
    f.write(final_result_json)

final_result