In [40]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from datetime import datetime, timedelta

from plotly.graph_objs import *
from moztelemetry import get_pings_properties, get_one_ping_per_client
from moztelemetry.dataset import Dataset

%matplotlib inline

In [41]:
sc.defaultParallelism

32

In [42]:
startDate = (datetime.today() - timedelta(days=3)).strftime("%Y%m%d")
startDate

'20170228'

In [43]:
endDate = datetime.today().strftime("%Y%m%d")
endDate

'20170303'

In [44]:
sample_size = 1.0

pings = Dataset.from_source("telemetry") \
    .where(docType='main') \
    .where(appBuildId=lambda b: (b.startswith(startDate) or b > startDate) and (b.startswith(endDate) or b < endDate)) \
    .where(appUpdateChannel="nightly") \
    .records(sc, sample=sample_size)

In [45]:
properties = ["clientId",
              "environment/system/os/name",
              "payload/info/subsessionLength",
              "payload/childPayloads"]

In [46]:
ping_props = get_pings_properties(pings, properties, with_processes=True)

In [47]:
def windows_only(p):
    return p["environment/system/os/name"] == "Windows_NT"

windows_pings_only = ping_props.filter(windows_only)

In [48]:
accum = sc.accumulator(0)
def calculate_total_sessions_length(ping):
    accum.add(ping['payload/info/subsessionLength'])

windows_pings_only.foreach(calculate_total_sessions_length)
total_sessions_length_s = accum.value
total_sessions_length_m = total_sessions_length_s / 60

total_sessions_length_m

17134329

In [49]:
def only_content_hangs(ping):
    result = []
    if ping['payload/childPayloads'] is None:
        return result
  
    for payload in ping['payload/childPayloads']:
        if 'threadHangStats' not in payload:
            return result
        for thread_hang in payload['threadHangStats']:
            if 'name' not in thread_hang:
                continue
            if thread_hang['name'] != 'Gecko_Child':
                continue
            if len(thread_hang['hangs']) > 0:
                result = result + thread_hang['hangs']
    return result
       
def only_parent_hangs(ping):
    result = []
  
    for thread_hang in ping['payload/threadHangStats']:
        if 'name' not in thread_hang:
            continue
        if thread_hang['name'] != 'Gecko':
            continue
        if len(thread_hang['hangs']) > 0:
            result = result + thread_hang['hangs']
    return result
    
content_hangs = windows_pings_only.flatMap(only_content_hangs)
parent_hangs = windows_pings_only.flatMap(only_parent_hangs)

In [50]:
# Scoring is total number of hangs > 100ms, divided by total
# sessionLength
def map_to_hang_sums(hang):
    hist_data = hang['histogram']['values']
    hist = pd.Series(hist_data.values(), index=map(int, hist_data.keys()))
    hang_sum = hist[hist.index >= 100].sum()
    return (tuple(hang['stack']), hang_sum)

grouped_content_hangs = content_hangs.map(map_to_hang_sums).reduceByKey(lambda a, b: a + b).collectAsMap()
grouped_parent_hangs = content_hangs.map(map_to_hang_sums).reduceByKey(lambda a, b: a + b).collectAsMap()

In [51]:
def group_by_top_frame(stacks):
    grand_total_hang_sum = 0
    top_frames = {}
    for stack, hang_sum in stacks.iteritems():
        if len(stack) == 0:
            continue
        stack_top_frame = stack[-1]
        if not stack_top_frame in top_frames:
            top_frames[stack_top_frame] = { "frame": stack_top_frame, "stacks": [], "hang_sum": 0 }

        top_frame = top_frames[stack_top_frame]

        # Keep stacks sorted by hits.
        top_frame["stacks"].append((stack, hang_sum))
        top_frame["stacks"].sort(key=lambda d: d[1], reverse=True)

        top_frame["hang_sum"] += hang_sum
        grand_total_hang_sum += hang_sum

    return top_frames, grand_total_hang_sum

content_top_frames, content_grand_total_hang_sum = group_by_top_frame(grouped_content_hangs)
parent_top_frames, parent_grand_total_hang_sum = group_by_top_frame(grouped_parent_hangs)

In [52]:
def score(grouping):
    grouping['hang_sum'] = float(grouping['hang_sum']) / float(total_sessions_length_m) * 1000
    scored_stacks = []
    for stack_tuple in grouping['stacks']:
        score = float(stack_tuple[1]) / float(total_sessions_length_m) * 1000
        scored_stacks.append((stack_tuple[0], score))
        
    grouping['stacks'] = scored_stacks
    return grouping


scored_content_top_frames = {k: score(g) for k, g in content_top_frames.iteritems()}
scored_parent_top_frames = {k: score(g) for k, g in parent_top_frames.iteritems()}


In [53]:
import ujson as json

def write_file(name, stuff):
    filename = "./output/%s-%s-%s.json" % (name, startDate, endDate)
    jsonblob = json.dumps(stuff, ensure_ascii=False)

    with open(filename, 'w') as f:
        f.write(jsonblob)

write_file('content', scored_content_top_frames)
write_file('parent', scored_parent_top_frames)
       
