In [1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import datetime
from __future__ import division

from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client

%pylab inline

Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/histogram-whitelists.json). Assuming all histograms are acceptable.
Populating the interactive namespace from numpy and matplotlib


In [2]:
# We grab pings starting from 2 days ago until [TimeWindow] days ago.
TimeWindow = 14

# Additional filters.
Filters = {
    'app': 'Firefox',
    
    # We sample 0.5% of pings. For testing, it is beter to use a small number
    # here (like 0.001) to speed up processing time.
    'fraction': 0.005,
    
    # Optionally restrict pings to a single channel.
    # 'channel': 'beta',
}

# In case you need them!
VendorIDs = {
    'Intel': '0x8086',
    'NVIDIA': '0x10de',
    'AMD': '0x1002',
    'ATI': '0x1002'
}

In [3]:
###############################
# This section gathers pings. #
###############################

def run_get_pings():
    def fmt_date(d):
        return d.strftime("%Y%m%d")
    t1 = fmt_date(datetime.datetime.now() - datetime.timedelta(TimeWindow + 2)) # go back 16 days
    t2 = fmt_date(datetime.datetime.now() - datetime.timedelta(2)) # go back 2 days
    return get_pings(sc, build_id=(t1, t2), **Filters)

# Get pings for the parameters in the previous step.
raw_pings = run_get_pings()

In [4]:
######################################################################
# This section takes the raw ping list, then formats and filters it. #
######################################################################

# Map the pings into a more readable dictionary-like form. To see
# what these look like, execute "pings.take(1)".
unique_pings = get_pings_properties(raw_pings, [
  "clientId",
  "environment/build/version",
  "environment/system/os/name",
  "environment/system/os/version",
  "environment/system/os/servicePackMajor",
  "environment/system/os/servicePackMinor",
  "environment/system/gfx/adapters",
  "environment/system/cpu",
  "payload/histograms/CANVAS_WEBGL_SUCCESS"
])
unique_pings = get_one_ping_per_client(unique_pings)

In [5]:
# We add two extra steps. The first rewrites the ping to have some
# information more easily accessible (like the primary adapter),
# and the second step removes any pings that don't have adapter
# information.
def rewrite_ping(p):
    adapters = p.get('environment/system/gfx/adapters', None)
    if not adapters:
        return None
    adapter = adapters[0]
            
    p['adapter'] = adapter
            
    # Convert the version to a tuple of integers.
    if 'driverVersion' in adapter:
        p['driverVersion'] = [int(n) for n in adapter['driverVersion'].split('.') if n.isdigit()]
    return p

def filter_ping(p):
    return 'adapter' in p
pings = unique_pings.map(rewrite_ping).filter(filter_ping)
pings = pings.cache()

#pings = pings.cache()

In [6]:
# Observe the format of a random ping. This may take some time since it has to
# execute the pipeline.
#pings.take(1)
#pings.take(1)
#new_pings = pings.map(rewrite_ping).filter(filter_ping)
pings.take(1)

[{'adapter': {u'GPUActive': True,
   u'description': u'Intel(R) HD Graphics 3000',
   u'deviceID': u'0x0116',
   u'driver': u'igdumd32 igd10umd32 igd10umd32',
   u'driverDate': u'5-27-2015',
   u'driverVersion': u'9.17.10.4229',
   u'subsysID': u'04b01028',
   u'vendorID': u'0x8086'},
  'clientId': u'6fedc967-eb24-4146-9c50-bd723807d64d',
  'driverVersion': [9, 17, 10, 4229],
  'environment/build/version': u'48.0',
  'environment/system/cpu': {u'cores': 2,
   u'count': 4,
   u'extensions': [u'hasMMX',
    u'hasSSE',
    u'hasSSE2',
    u'hasSSE3',
    u'hasSSSE3',
    u'hasSSE4_1',
    u'hasSSE4_2'],
   u'family': 6,
   u'l2cacheKB': 256,
   u'l3cacheKB': 3072,
   u'model': 42,
   u'speedMHz': 2095,
   u'stepping': 7,
   u'vendor': u'GenuineIntel'},
  'environment/system/gfx/adapters': [{u'GPUActive': True,
    u'description': u'Intel(R) HD Graphics 3000',
    u'deviceID': u'0x0116',
    u'driver': u'igdumd32 igd10umd32 igd10umd32',
    u'driverDate': u'5-27-2015',
    u'driverVersion'

In [7]:
# Count the total number of sessions in the dataset.
TotalSessions = pings.count()
print('Number of sessions: {0}'.format(TotalSessions))

Number of sessions: 115249


In [8]:
import json
import urllib2

gpu_db = json.load(urllib2.urlopen('https://raw.githubusercontent.com/jrmuizel/gpu-db/master/intel.json'))
devices = {}
for gen in gpu_db['8086'].items():
    for chipset in gen[1].items():
        for dev in chipset[1]:
            #print dev, gen[0]
            devices[int(dev,16)] = gen[0]

In [9]:
def match_count(a, b):
    print('{0} out of {1} sessions matched. ({2:.2f}%)'.format(
        a.count(),
        b.count(),
       ((a.count() / b.count()) * 100)))

In [11]:
# Sample filter #1 - how many people are using Intel devices
# with a driver less than 8.15.10.2622? (bug 1175366).
def sample_filter_1(p):
    if not 'vendorID' in p['adapter']:
        return False
    if not 'driver' in p['adapter']:
        return False

    if p['payload/histograms/CANVAS_WEBGL_SUCCESS'] is None:
        return False

    return True

interesting_pings = pings.filter(sample_filter_1)

fail_pings = interesting_pings.filter(lambda p: p['payload/histograms/CANVAS_WEBGL_SUCCESS'][0] > 0)
xp_pings = interesting_pings.filter(lambda p: float(p['environment/system/os/version']) == 5.1)
xp_fail_pings = xp_pings.filter(lambda p: p['payload/histograms/CANVAS_WEBGL_SUCCESS'][0] > 0)
match_count(xp_fail_pings, xp_pings)
match_count(xp_fail_pings, fail_pings)

997 out of 1608 sessions matched. (62.00%)
997 out of 2088 sessions matched. (47.75%)


In [15]:
ssse3_pings = xp_fail_pings.filter(lambda p: 'hasSSSE3' in p['environment/system/cpu']['extensions'])
sse2_pings = xp_fail_pings.filter(lambda p: 'hasSSE2' in p['environment/system/cpu']['extensions'])
sse41_pings = xp_fail_pings.filter(lambda p: 'hasSSE4_1' in p['environment/system/cpu']['extensions'])

match_count(sse2_pings, xp_fail_pings)
match_count(ssse3_pings, xp_fail_pings)
match_count(sse41_pings, xp_fail_pings)



990 out of 997 sessions matched. (99.30%)
579 out of 997 sessions matched. (58.07%)
127 out of 997 sessions matched. (12.74%)


In [32]:
cores = xp_fail_pings.map(lambda p: p['environment/system/cpu']['cores'])
count = xp_fail_pings.map(lambda p: p['environment/system/cpu']['count'])

#[(k[0], 100*k[1]/cores.count()) for k in cores.countByValue().iteritems()]

def percentByValue(c):
    return [(k[0], '{0:.2f}%'.format(100*k[1]/c.count())) for k in c.countByValue().iteritems()]
print percentByValue(cores)
print percentByValue(count)

[(1, '35.41%'), (2, '63.09%'), (3, '0.20%'), (4, '1.30%')]
[(1, '22.47%'), (2, '75.43%'), (3, '0.20%'), (4, '1.91%')]


In [None]:


def webgl_fail(p):
    if p['payload/histograms/CANVAS_WEBGL_SUCCESS'][0] > 0:
        return True

     #   return True
    return False
fail_pings = interesting_pings.filter(webgl_fail)

def vga_fail(p):
    if 'vga' in p['adapter']['driver']:
        return True


     #   return True
    return False
vga_pings = fail_pings.filter(vga_fail)

def not_vga_fail(p):
    if 'vga' in p['adapter']['driver']:
        return False
     #   return True
    return True
not_vga_pings = fail_pings.filter(not_vga_fail)


#TDRResults = sample_result_1.map(lambda p: p[DeviceResetReasonKey]).reduce(lambda x, y: x + y)
#help(pd.Series(TDRResults.values, DeviceResetReason))
#print(pd.Series(TDRResults.values[1:8],DeviceResetReason))
#pd.Series(TDRResults.values[1:8],DeviceResetReason).plot.bar()
#reduce(lambda x, y: sample_result_1.take(2))
#sample_result_1.take(1)[0]['payload/histograms/DEVICE_RESET_REASON'])
#fail_pings.take(1)




def map_x_to_count(data, sourceKey):
    def extract(p):
        return (p['adapter'][sourceKey],)
    return data.map(extract).countByKey()    
match_count(interesting_pings, pings)    
match_count(fail_pings, interesting_pings)
match_count(vga_pings, fail_pings)
map_x_to_count(not_vga_pings, 'driverVersion')
#not_vga_pings.take(5)