Skip to content

Commit

Permalink
Bug 1168214 - Fix various problems with series signature calculation
Browse files Browse the repository at this point in the history
* We were not sorting subtest signatures before making them a property,
  causing duplicate signatures
* We had tons of redundant properties
  • Loading branch information
wlach committed Jun 2, 2015
1 parent 81c5c67 commit 8ff0096
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 23 deletions.
47 changes: 32 additions & 15 deletions treeherder/etl/perf_data_adapters.py
Expand Up @@ -148,7 +148,7 @@ def avg(s):
return series_data

@staticmethod
def _get_series_signature(signature_properties):
def get_series_signature(signature_properties):
signature_prop_values = signature_properties.keys()
signature_prop_values.extend(signature_properties.values())

Expand Down Expand Up @@ -185,6 +185,10 @@ def _add_performance_artifact(self, job_id, series_signature,

class TalosDataAdapter(PerformanceDataAdapter):

# keys useful for creating a non-redundant performance signature
SIGNIFICANT_REFERENCE_DATA_KEYS = ['option_collection_hash',
'machine_platform']

def __init__(self):

super(TalosDataAdapter, self).__init__()
Expand Down Expand Up @@ -221,7 +225,30 @@ def _get_base_perf_obj(job_guid, name, type, talos_datum, series_signature,

return obj

@staticmethod
def _transform_signature_properties(properties, significant_keys=None):
if significant_keys is None:
significant_keys = TalosDataAdapter.SIGNIFICANT_REFERENCE_DATA_KEYS
transformed_properties = {}
keys = properties.keys()
for k in keys:
if k in significant_keys:
transformed_properties[k] = properties[k]

# HACK: determine if e10s is in job_group_symbol, and add an "e10s"
# property to a 'test_options' property if so (we should probably
# make talos produce this information somehow and consume it in the
# future)
if 'e10s' in properties.get('job_group_symbol', ''):
transformed_properties['test_options'] = json.dumps(['e10s'])

return transformed_properties

def adapt_and_load(self, reference_data, job_data, datum):
# transform the reference data so it only contains what we actually
# care about
reference_data = self._transform_signature_properties(reference_data)

# Get just the talos datazilla structure for treeherder
target_datum = json.loads(datum['blob'])
for talos_datum in target_datum['talos_data']:
Expand All @@ -247,7 +274,7 @@ def adapt_and_load(self, reference_data, job_data, datum):
}
signature_properties.update(reference_data)

series_signature = self._get_series_signature(
series_signature = self.get_series_signature(
signature_properties)

series_data = {
Expand Down Expand Up @@ -296,7 +323,7 @@ def adapt_and_load(self, reference_data, job_data, datum):
}
signature_properties.update(reference_data)

series_signature = self._get_series_signature(
series_signature = self.get_series_signature(
signature_properties)
subtest_signatures.append(series_signature)

Expand All @@ -322,10 +349,10 @@ def adapt_and_load(self, reference_data, job_data, datum):
# summary series
summary_properties = {
'suite': _suite,
'subtest_signatures': json.dumps(subtest_signatures)
'subtest_signatures': json.dumps(sorted(subtest_signatures))
}
summary_properties.update(reference_data)
summary_signature = self._get_series_signature(
summary_signature = self.get_series_signature(
summary_properties)

summary_data = self._calculate_summary_data(
Expand All @@ -342,16 +369,6 @@ def adapt_and_load(self, reference_data, job_data, datum):
summary_properties, obj,
_name, 'summary', summary_data)

def get_series_signature(self, signature_values):

sha = sha1()

sha.update(''.join(map(lambda x: str(x), sorted(signature_values))))

signature = sha.hexdigest()

return signature

def submit_tasks(self, project):

from treeherder.model.tasks import populate_performance_series
Expand Down
11 changes: 8 additions & 3 deletions treeherder/model/derived/jobs.py
Expand Up @@ -136,6 +136,11 @@ class JobsModel(TreeherderModelBase):
"jobs.deletes.cycle_result_set"
]

PERFORMANCE_SERIES_JSON_KEYS = [
"subtest_signatures",
"test_options"
]

@classmethod
def create(cls, project, host=None, read_only_host=None):
"""
Expand Down Expand Up @@ -418,7 +423,7 @@ def get_performance_series_summary(self, interval_seconds):
series_summary = defaultdict(dict)
for datum in data:
key, val = datum['property'], datum['value']
if key == 'subtest_signatures':
if key in self.PERFORMANCE_SERIES_JSON_KEYS:
val = json.loads(val)
series_summary[datum['signature']][key] = val

Expand Down Expand Up @@ -1995,14 +2000,14 @@ def get_signature_properties(self, signatures):
sigdict[signature] = {}

(key, val) = (property['property'], property['value'])
if key == 'subtest_signatures':
if key in self.PERFORMANCE_SERIES_JSON_KEYS:
val = json.loads(val)

sigdict[signature][key] = val

ret = []
for signature in signatures:
if not sigdict[signature]:
if not sigdict.get(signature):
return ObjectNotFoundException("signature", id=signature)
ret.append(sigdict[signature])

Expand Down
98 changes: 98 additions & 0 deletions treeherder/model/management/commands/rewrite_perf_data.py
@@ -0,0 +1,98 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at http://mozilla.org/MPL/2.0/.

from django.core.management.base import BaseCommand
from optparse import make_option
from treeherder.client import PerformanceTimeInterval
from treeherder.model.derived.jobs import JobsModel
from treeherder.model.models import Datasource
from treeherder.etl.perf_data_adapters import TalosDataAdapter


class Command(BaseCommand):

help = """
Merge and update performance signatures to a minimal subset, concatenating
identical series
"""

SIGNIFICANT_KEYS = (['suite', 'test', 'subtest_signatures', 'test_options'] +
TalosDataAdapter.SIGNIFICANT_REFERENCE_DATA_KEYS)

option_list = BaseCommand.option_list + (
make_option('--project',
action='store',
help='Only merge data on specified project (defaults to all)'),
make_option('--mysql-debug',
action='store_true',
dest='mysql_debug',
default=False),
)

def _rewrite_series(self, jm, signature_hash, signature_properties,
subtest_signature_mapping):
new_props = TalosDataAdapter._transform_signature_properties(
signature_properties,
significant_keys=Command.SIGNIFICANT_KEYS)
if 'subtest_signatures' in new_props:
# rewrite a new set of subtest signatures
old_subtest_signatures = new_props['subtest_signatures']
new_subtest_signatures = []
for old_signature in old_subtest_signatures:
new_subtest_signatures.append(
subtest_signature_mapping[old_signature])
new_props['subtest_signatures'] = sorted(new_subtest_signatures)
new_hash = TalosDataAdapter.get_series_signature(new_props)
print "%s -> %s" % (signature_hash, new_hash)
jm.set_series_signature(new_hash, new_props)
for time_interval in PerformanceTimeInterval.all_valid_time_intervals():
series_list = jm.get_performance_series_from_signatures(
[signature_hash], time_interval)

series = series_list[0]['blob']
jm.store_performance_series(time_interval, 'talos_data',
str(new_hash), series)

jm.jobs_execute(proc='jobs.deletes.delete_performance_series',
placeholders=[signature_hash])
jm.jobs_execute(proc='jobs.deletes.delete_series_signature',
placeholders=[signature_hash])

return new_hash

def _rewrite_data(self, project, mysql_debug):

signature_mapping = {}

with JobsModel(project) as jm:
jm.DEBUG = mysql_debug
summary = jm.get_performance_series_summary(
max(PerformanceTimeInterval.all_valid_time_intervals()))
# first pass: rewrite non-summary tests
for (signature_hash, signature_properties) in summary.iteritems():
if not set(signature_properties.keys()).issubset(
self.SIGNIFICANT_KEYS) and not signature_properties.get(
'subtest_signatures'):
new_hash = self._rewrite_series(jm, signature_hash,
signature_properties, None)
signature_mapping[signature_hash] = new_hash

# second pass: rewrite summary tests
for (signature_hash, signature_properties) in summary.iteritems():
if not set(signature_properties.keys()).issubset(
self.SIGNIFICANT_KEYS) and signature_properties.get(
'subtest_signatures'):
self._rewrite_series(jm, signature_hash,
signature_properties,
signature_mapping)

def handle(self, *args, **options):
if options['project']:
projects = [options['project']]
else:
projects = Datasource.objects.values_list(
'project', flat=True).distinct()

for project in projects:
self._rewrite_data(project, options['mysql_debug'])
8 changes: 8 additions & 0 deletions treeherder/model/sql/jobs.json
Expand Up @@ -8,6 +8,14 @@
"sql":"DELETE FROM job_note WHERE id = ?",
"host_type": "master_host"
},
"delete_series_signature": {
"sql": "DELETE FROM series_signature WHERE signature = ?",
"host_type": "master_host"
},
"delete_performance_series": {
"sql": "DELETE FROM performance_series WHERE series_signature = ?",
"host_type": "master_host"
},
"cycle_job_artifact":{

"sql":"DELETE FROM job_artifact WHERE job_id IN (REP0)",
Expand Down
8 changes: 3 additions & 5 deletions ui/js/perf.js
Expand Up @@ -9,9 +9,7 @@ var perf = angular.module("perf", ['ui.router', 'ui.bootstrap', 'treeherder']);
perf.factory('PhSeries', ['$http', 'thServiceDomain', function($http, thServiceDomain) {

var _getSeriesSummary = function(signature, signatureProps, optionCollectionMap) {
var platform = signatureProps.machine_platform + " " +
signatureProps.machine_architecture;
var e10s = (signatureProps.job_group_symbol === "T-e10s");
var platform = signatureProps.machine_platform;
var testName = signatureProps.test;
var subtestSignatures;
if (testName === undefined) {
Expand All @@ -20,8 +18,8 @@ perf.factory('PhSeries', ['$http', 'thServiceDomain', function($http, thServiceD
}
var name = signatureProps.suite + " " + testName;
var options = [ optionCollectionMap[signatureProps.option_collection_hash] ];
if (e10s) {
options.push("e10s");
if (signatureProps.test_options) {
options = options.concat(signatureProps.test_options);
}
name = name + " " + options.join(" ");

Expand Down

0 comments on commit 8ff0096

Please sign in to comment.