In [1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import datetime
from __future__ import division

from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client

%pylab inline

Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/histogram-whitelists.json). Assuming all histograms are acceptable.
Populating the interactive namespace from numpy and matplotlib


In [4]:
# We grab pings starting from 2 days ago until [TimeWindow] days ago.
TimeWindow = 14

# Additional filters.
Filters = {
    'app': 'Firefox',
    
    # We sample 0.5% of pings. For testing, it is beter to use a small number
    # here (like 0.001) to speed up processing time.
    'fraction': 0.001,
    
    # Optionally restrict pings to a single channel.
    # 'channel': 'nightly',
}

# In case you need them!
VendorIDs = {
    'Intel': '0x8086',
    'NVIDIA': '0x10de',
    'AMD': '0x1002',
    'ATI': '0x1002'
}

In [5]:
###############################
# This section gathers pings. #
###############################

def run_get_pings():
    def fmt_date(d):
        return d.strftime("%Y%m%d")
    t1 = fmt_date(datetime.datetime.now() - datetime.timedelta(TimeWindow + 2)) # go back 16 days
    t2 = fmt_date(datetime.datetime.now() - datetime.timedelta(2)) # go back 2 days
    return get_pings(sc, build_id=(t1, t2), **Filters)

# Get pings for the parameters in the previous step.
raw_pings = run_get_pings()

In [6]:
######################################################################
# This section takes the raw ping list, then formats and filters it. #
######################################################################

# Map the pings into a more readable dictionary-like form. To see
# what these look like, execute "pings.take(1)".
unique_pings = get_pings_properties(raw_pings, [
  "clientId",
  "environment/build/version",
  "environment/system/os/name",
  "environment/system/os/version",
  "environment/system/os/servicePackMajor",
  "environment/system/os/servicePackMinor",
  "environment/system/gfx/adapters",
  "payload/histograms/CANVAS_WEBGL_SUCCESS"
])
unique_pings = get_one_ping_per_client(unique_pings)

In [21]:
# We add two extra steps. The first rewrites the ping to have some
# information more easily accessible (like the primary adapter),
# and the second step removes any pings that don't have adapter
# information.
def rewrite_ping(p):
    adapters = p.get('environment/system/gfx/adapters', None)
    if not adapters:
        return None
    adapter = adapters[0]
            
    p['adapter'] = adapter
            
    # Convert the version to a tuple of integers.
    if 'driverVersion' in adapter:
        p['driverVersion'] = [int(n) for n in adapter['driverVersion'].split('.') if n.isdigit()]
    #else:
    #    return None
    return p

def filter_ping(p):
    return 'adapter' in p and 'driverVersion' in p
pings = unique_pings.map(rewrite_ping).filter(filter_ping)
pings = pings.cache()

#pings = pings.cache()

In [22]:
# Observe the format of a random ping. This may take some time since it has to
# execute the pipeline.
#pings.take(1)
#pings.take(1)
#new_pings = pings.map(rewrite_ping).filter(filter_ping)
pings.take(1)

[{'adapter': {u'GPUActive': True,
   u'description': u'Intel(R) HD Graphics',
   u'deviceID': u'0x22b1',
   u'driver': u'igdumdim64 igd10iumd64 igd10iumd64 igdumdim32 igd10iumd32 igd10iumd32',
   u'driverDate': u'3-19-2015',
   u'driverVersion': u'10.18.14.4175',
   u'subsysID': u'09531025',
   u'vendorID': u'0x8086'},
  'clientId': u'2fa8737c-b1a1-42f1-a3e2-9872eac591bd',
  'driverVersion': [10, 18, 14, 4175],
  'environment/build/version': u'48.0',
  'environment/system/gfx/adapters': [{u'GPUActive': True,
    u'description': u'Intel(R) HD Graphics',
    u'deviceID': u'0x22b1',
    u'driver': u'igdumdim64 igd10iumd64 igd10iumd64 igdumdim32 igd10iumd32 igd10iumd32',
    u'driverDate': u'3-19-2015',
    u'driverVersion': u'10.18.14.4175',
    u'subsysID': u'09531025',
    u'vendorID': u'0x8086'}],
  'environment/system/os/name': u'Windows_NT',
  'environment/system/os/servicePackMajor': 0,
  'environment/system/os/servicePackMinor': 0,
  'environment/system/os/version': u'6.3',
  'payl

In [23]:
# Count the total number of sessions in the dataset.
TotalSessions = pings.count()
print('Number of sessions: {0}'.format(TotalSessions))

Number of sessions: 25437


In [24]:
import json
import urllib2

gpu_db = json.load(urllib2.urlopen('https://raw.githubusercontent.com/jrmuizel/gpu-db/master/intel.json'))
devices = {}
for gen in gpu_db['8086'].items():
    for chipset in gen[1].items():
        for dev in chipset[1]:
            #print dev, gen[0]
            devices[int(dev,16)] = gen[0]

In [67]:
# Sample filter #1 - how many people are using Intel devices
# with a driver less than 8.15.10.2622? (bug 1175366).
def sample_filter_1(p):
    if not 'vendorID' in p['adapter']:
        return False
    if not 'driverVersion' in p['adapter']:
        return False

    return True

interesting_pings = pings.filter(sample_filter_1)
all_amd_pings = interesting_pings.filter(lambda p: p['adapter']['vendorID'] == '0x1002')
all_amd_pings.count()
#all_amd_pings.map(lambda p : tuple(p['driverVersion'])).countByValue()

3726

In [101]:
import json
import urllib2

gpu_db = json.load(urllib2.urlopen('https://raw.githubusercontent.com/jrmuizel/gpu-db/master/amd.json'))
devices = {}
for gen in gpu_db['1002'].items():
    for chipset in gen[1].items():
        for dev in chipset[1]:
            #print dev, gen[0]
            devices[int(dev,16)] = gen[0]
            
def do_gen(p):
    p['gen'] = devices[int(p['adapter']['deviceID'],16)]
    return p
    
all_amd_pings = all_amd_pings.map(do_gen)
gpu_db['1002']['R100']

{u'R100': [u'4336',
  u'4337',
  u'4c57',
  u'4c58',
  u'4c59',
  u'5157',
  u'5144',
  u'5159',
  u'515a',
  u'515e']}

In [102]:
def pad_number(x):
    if x <= 0:
        return x
    while x < 1000:
        x *= 10
    return x
def padDriver(p):
    for i in range(1,4):
        p['driverVersion'][i] = pad_number(p['driverVersion'][i])
    return p
all_amd_pings = all_amd_pings.map(padDriver)
all_amd_pings.count()
#all_amd_pings.map(lambda p : tuple(p['driverVersion'])).countByValue()

3726

In [121]:
blocked_amd = all_amd_pings.filter(lambda p: tuple(p['driverVersion']) < (8,5600,1000,1500))
blocked_amd = blocked_amd.filter(lambda p: 'vga' not in p['adapter']['driver'] and 'Unknown' not in p['adapter']['driver'])
print blocked_amd.count()/all_amd_pings.count()
import dateutil.parser
blocked_amd.map(lambda p: dateutil.parser.parse(p['adapter']['driverDate']).year).countByValue()

0.0391841116479


defaultdict(int,
            {2001: 6,
             2003: 2,
             2004: 18,
             2005: 21,
             2006: 31,
             2007: 28,
             2008: 40})

146

In [116]:
blocked_amd.map(lambda p: p['gen']).countByValue()
#blocked_amd.filter(lambda p: p['gen'] == 'CIK').take(1)

defaultdict(int,
            {u'R100': 14,
             u'R200': 13,
             u'R300': 73,
             u'R400': 11,
             u'R500': 9,
             u'R600': 19,
             u'R700': 1,
             u'RAGE': 6})

In [117]:
blocked_amd.map(lambda p: p['environment/system/os/version']).countByValue()

defaultdict(int, {u'5.1': 92, u'5.2': 1, u'6.0': 23, u'6.1': 29, u'6.3': 1})

In [23]:
import dateutil.parser
def before_2006(p):
    d = dateutil.parser.parse(p['adapter']['driverDate'])
    return d > dateutil.parser.parse('2006')
amd_pings = all_amd_pings.filter(before_2006)

In [12]:
amd_pings.map(lambda p: p['driverVersion'][2]).countByValue()

defaultdict(int,
            {0: 2,
             4: 10,
             5: 2,
             8: 8,
             9: 1,
             10: 68,
             11: 1237,
             12: 548,
             13: 5865,
             17: 1,
             24: 6})

In [13]:
xp_amd_pings = amd_pings.filter(lambda p: p['environment/system/os/version'] == '5.1')
xp_amd_pings.count()
not_xp_amd_pings = amd_pings.filter(lambda p: p['environment/system/os/version'] != '5.1')
not_xp_amd_pings.count()
xp_715_amd_pings = xp_amd_pings.filter(lambda p: p['driverVersion'][0] == 7 and p['driverVersion'][1] == 15)
xp_715_amd_pings = amd_pings.filter(lambda p: p['driverVersion'][0] == 7 and p['driverVersion'][1] == 15)

In [14]:
blocked = not_xp_amd_pings.filter(lambda p: p['driverVersion'][1] < 17 and (p['driverVersion'][2] * 10000 + p['driverVersion'][3]) > 118265)
blocked.count()

803

In [42]:
blocked.map(lambda p: dateutil.parser.parse(p['adapter']['driverDate'])).countByValue()

defaultdict(int,
            {datetime.datetime(2009, 4, 30, 0, 0): 14,
             datetime.datetime(2009, 5, 1, 0, 0): 2,
             datetime.datetime(2009, 5, 12, 0, 0): 4,
             datetime.datetime(2009, 5, 14, 0, 0): 2292,
             datetime.datetime(2009, 5, 24, 0, 0): 1,
             datetime.datetime(2009, 6, 3, 0, 0): 3,
             datetime.datetime(2009, 6, 4, 0, 0): 1,
             datetime.datetime(2009, 6, 8, 0, 0): 1,
             datetime.datetime(2009, 6, 10, 0, 0): 12,
             datetime.datetime(2009, 6, 11, 0, 0): 6,
             datetime.datetime(2009, 6, 16, 0, 0): 14,
             datetime.datetime(2009, 6, 23, 0, 0): 1,
             datetime.datetime(2009, 6, 26, 0, 0): 12,
             datetime.datetime(2009, 7, 1, 0, 0): 23,
             datetime.datetime(2009, 7, 4, 0, 0): 2,
             datetime.datetime(2009, 7, 8, 0, 0): 35,
             datetime.datetime(2009, 7, 11, 0, 0): 1,
             datetime.datetime(2009, 7, 14, 0, 0): 113,
       

In [40]:

blocked.map(lambda p: str([p['driverVersion'][0]] + [pad_number(x) for x in p['driverVersion'][1:4]])).countByValue()

defaultdict(int,
            {'[6, 1400, 1100, 8618]': 1,
             '[6, 1400, 1100, 8634]': 1,
             '[6, 1400, 1100, 9156]': 1,
             '[6, 1400, 1100, 9197]': 1,
             '[6, 1400, 1100, 9713]': 1,
             '[6, 1400, 1200, 5896]': 1,
             '[6, 1400, 1200, 6779]': 1,
             '[6, 1400, 1300, 2049]': 1,
             '[6, 1400, 1300, 2101]': 1,
             '[6, 1400, 1300, 4052]': 1,
             '[6, 1400, 1300, 4411]': 1,
             '[6, 1400, 1300, 6810]': 1,
             '[6, 1400, 1300, 7830]': 2,
             '[6, 1400, 1300, 7900]': 1,
             '[6, 1400, 1300, 9000]': 1,
             '[7, 1500, 1100, 8267]': 4,
             '[7, 1500, 1100, 8586]': 2,
             '[8, 1500, 1100, 8585]': 14,
             '[8, 1500, 1100, 8593]': 2292,
             '[8, 1500, 1100, 8599]': 1,
             '[8, 1500, 1100, 8610]': 3,
             '[8, 1500, 1100, 8613]': 1,
             '[8, 1500, 1100, 8618]': 11,
             '[8, 1500, 1100, 8619]

In [47]:
100*(all_amd_pings.count()/interesting_pings.count())

13.162465222156646

In [50]:
interesting_pings.map(lambda p: (p['adapter']['vendorID'])).countByValue()

defaultdict(int,
            {u'0x000b': 4,
             u'0x00ac': 3,
             u'0x00ba': 150,
             u'0x1002': 31866,
             u'0x1013': 3,
             u'0x1022': 1,
             u'0x1023': 4,
             u'0x102b': 119,
             u'0x1039': 653,
             u'0x10de': 31224,
             u'0x1106': 902,
             u'0x126f': 5,
             u'0x12d2': 1,
             u'0x1414': 48,
             u'0x15ad': 87,
             u'0x18ca': 12,
             u'0x1a03': 3,
             u'0x1ab8': 14,
             u'0x1b36': 2,
             u'0x300b': 379,
             u'0x5333': 54,
             u'0x5853': 1,
             u'0x8086': 171169,
             u'0x80ee': 30,
             u'ATI Technologies Inc.': 11,
             u'Humper': 1,
             u'Intel Open Source Technology Center': 275,
             u'Mesa Project': 3,
             u'Mesa project: www.mesa3d.org': 1,
             u'NVIDIA Corporation': 92,
             u'Nouveau': 1,
             u'Tungsten Grap

In [51]:
interesting_pings.count()

237220