Skip to content
Permalink
Browse files

Auto merge of #24981 - servo:wpt-unexpected, r=<try>

Improve diagnostics for WPT failures

* Include the full output (including stdout/stderr) in the intermittent-filtered log
* Print the intermittent-filtered log at the end of the main log (which is one less click to reach from Taskcluster’s task view, compared to other task artifacts)
* Fail with a specific message when a reftest screenshot is entirely white, to help recognize instances of #24726
  • Loading branch information
bors-servo committed Dec 1, 2019
2 parents 40fd791 + 10a3572 commit e0124781b0cd3d8a7cde319aace7fb57b713cbf7
@@ -748,12 +748,12 @@ def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes,
| cat
time ./mach test-wpt --release --processes $PROCESSES --timeout-multiplier=4 \
--headless --log-raw test-wdspec.log \
--log-errorsummary wdspec-errorsummary.log \
--log-servojson wdspec-jsonsummary.log \
--always-succeed \
webdriver \
| cat
./mach filter-intermittents \
wdspec-errorsummary.log \
wdspec-jsonsummary.log \
--log-intermittents intermittents.log \
--log-filteredsummary filtered-wdspec-errorsummary.log \
--tracker-api default \
@@ -768,11 +768,11 @@ def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes,
--total-chunks "$TOTAL_CHUNKS" \
--this-chunk "$THIS_CHUNK" \
--log-raw test-wpt.log \
--log-errorsummary wpt-errorsummary.log \
--log-servojson wpt-jsonsummary.log \
--always-succeed \
| cat
./mach filter-intermittents \
wpt-errorsummary.log \
wpt-jsonsummary.log \
--log-intermittents intermittents.log \
--log-filteredsummary filtered-wpt-errorsummary.log \
--tracker-api default \
@@ -68,6 +68,7 @@


def create_parser_wpt():
import mozlog.commandline
parser = wptcommandline.create_parser()
parser.add_argument('--release', default=False, action="store_true",
help="Run with a release build of servo")
@@ -77,6 +78,8 @@ def create_parser_wpt():
help="Pass preferences to servo")
parser.add_argument('--layout-2020', default=False, action="store_true",
help="Use expected results for the 2020 layout engine")
parser.add_argument('--log-servojson', action="append", type=mozlog.commandline.log_file,
help="Servo's JSON logger of unexpected results")
parser.add_argument('--always-succeed', default=False, action="store_true",
help="Always yield exit code of zero")
return parser
@@ -511,7 +514,7 @@ def update_wpt(self, **kwargs):
description='Given a WPT error summary file, filter out intermittents and other cruft.',
category='testing')
@CommandArgument('summary',
help="Error summary log to take un")
help="Error summary log to take in")
@CommandArgument('--log-filteredsummary', default=None,
help='Print filtered log to file')
@CommandArgument('--log-intermittents', default=None,
@@ -529,10 +532,7 @@ def filter_intermittents(self, summary, log_filteredsummary, log_intermittents,
encoded_auth = base64.encodestring(file.read().strip()).replace('\n', '')
failures = []
with open(summary, "r") as file:
for line in file:
line_json = json.loads(line)
if 'status' in line_json:
failures += [line_json]
failures = [json.loads(line) for line in file]
actual_failures = []
intermittents = []
for failure in failures:
@@ -546,10 +546,7 @@ def filter_intermittents(self, summary, log_filteredsummary, log_intermittents,
request = urllib.request.Request("%s/query.py?name=%s" % (tracker_api, query))
search = urllib.request.urlopen(request)
data = json.load(search)
if len(data) == 0:
actual_failures += [failure]
else:
intermittents += [failure]
is_intermittent = len(data) > 0
else:
qstr = "repo:servo/servo+label:I-intermittent+type:issue+state:open+%s" % failure['test']
# we want `/` to get quoted, but not `+` (github's API doesn't like that), so we set `safe` to `+`
@@ -559,28 +556,30 @@ def filter_intermittents(self, summary, log_filteredsummary, log_intermittents,
request.add_header("Authorization", "Basic %s" % encoded_auth)
search = urllib.request.urlopen(request)
data = json.load(search)
if data['total_count'] == 0:
actual_failures += [failure]
else:
intermittents += [failure]
is_intermittent = data['total_count'] > 0

if is_intermittent:
intermittents.append(failure["output"])
else:
actual_failures.append(failure["output"])

def format(outputs, description, file=sys.stdout):
print(len(outputs), description + ":\n", file=file)
file.write('\n'.join(outputs).encode("utf-8"))

if log_intermittents:
with open(log_intermittents, "w") as intermittents_file:
for intermittent in intermittents:
json.dump(intermittent, intermittents_file, indent=4)
print("\n", end='', file=intermittents_file)
with open(log_intermittents, "wb") as file:
format(intermittents, "known-intermittent unexpected results", file)

output = open(log_filteredsummary, "w") if log_filteredsummary else sys.stdout
for failure in actual_failures:
json.dump(failure, output, indent=4)
print("\n", end='', file=output)
description = "unexpected results that are NOT known-intermittents"
if log_filteredsummary:
with open(log_filteredsummary, "wb") as file:
format(actual_failures, description, file)

if output is not sys.stdout:
output.close()
if actual_failures:
format(actual_failures, description)

if len(actual_failures) == 0:
return 0
return 1
return bool(actual_failures)

@Command('test-android-startup',
description='Extremely minimal testing of Servo for Android',
@@ -4,6 +4,7 @@

from mozlog.formatters import base
import collections
import json
import os
import sys
import subprocess
@@ -14,7 +15,7 @@
DEFAULT_CLEAR_EOL_CODE = u"\x1b[K"


class GroupingFormatter(base.BaseFormatter):
class ServoFormatter(base.BaseFormatter):
"""Formatter designed to produce unexpected test results grouped
together in a readable format."""
def __init__(self):
@@ -77,7 +78,7 @@ def text_to_erase_display(self):
return ((self.move_up + self.clear_eol) *
self.current_display.count('\n'))

def generate_output(self, text=None, new_display=None):
def generate_output(self, text=None, new_display=None, unexpected_in_test=None):
if not self.interactive:
return text

@@ -146,10 +147,11 @@ def get_lines_for_unexpected_result(self,

lines = [u"%s%s %s" % (status, expected_text, test_name)]
if message:
lines.append(u" \u2192 %s" % message)
for message_line in message.splitlines():
lines.append(u" \u2192 %s" % message_line)
if stack:
lines.append("")
lines += [stackline for stackline in stack.splitlines()]
lines.extend(stack.splitlines())
return lines

def get_output_for_unexpected_subtests(self, test_name, unexpected_subtests):
@@ -230,7 +232,8 @@ def test_end(self, data):
subtest_failures)
self.test_failure_text += output

return self.generate_output(text=output, new_display=new_display)
return self.generate_output(text=output, new_display=new_display,
unexpected_in_test=test_name)

def test_status(self, data):
if "expected" in data:
@@ -289,3 +292,16 @@ def log(self, data):

if data['level'] in ('CRITICAL', 'ERROR'):
return self.generate_output(text=data['message'] + "\n")


class ServoJsonFormatter(ServoFormatter):
def suite_start(self, data):
ServoFormatter.suite_start(self, data)
# Don't forward the return value

def generate_output(self, text=None, new_display=None, unexpected_in_test=None):
if unexpected_in_test:
return "%s\n" % json.dumps({"test": unexpected_in_test, "output": text})

def log(self, _):
return
@@ -34,7 +34,9 @@ def run_tests(**kwargs):
set_defaults(kwargs)

mozlog.commandline.log_formatters["servo"] = \
(grouping_formatter.GroupingFormatter, "A grouping output formatter")
(grouping_formatter.ServoFormatter, "Servo’s grouping output formatter")
mozlog.commandline.log_formatters["servojson"] = \
(grouping_formatter.ServoJsonFormatter, "Servo's JSON logger of unexpected results")

use_mach_logging = False
if len(kwargs["test_list"]) == 1:
@@ -358,17 +358,17 @@ def get_hash(self, test, viewport_size, dpi):
def reset(self):
self.screenshot_cache.clear()

def is_pass(self, hashes, screenshots, relation, fuzzy):
def is_pass(self, hashes, screenshots, urls, relation, fuzzy):
assert relation in ("==", "!=")
if not fuzzy or fuzzy == ((0,0), (0,0)):
equal = hashes[0] == hashes[1]
# sometimes images can have different hashes, but pixels can be identical.
if not equal:
self.logger.info("Image hashes didn't match, checking pixel differences")
max_per_channel, pixels_different = self.get_differences(screenshots)
max_per_channel, pixels_different = self.get_differences(screenshots, urls)
equal = pixels_different == 0 and max_per_channel == 0
else:
max_per_channel, pixels_different = self.get_differences(screenshots)
max_per_channel, pixels_different = self.get_differences(screenshots, urls)
allowed_per_channel, allowed_different = fuzzy
self.logger.info("Allowed %s pixels different, maximum difference per channel %s" %
("-".join(str(item) for item in allowed_different),
@@ -379,11 +379,13 @@ def is_pass(self, hashes, screenshots, relation, fuzzy):
allowed_different[0] <= pixels_different <= allowed_different[1]))
return equal if relation == "==" else not equal

def get_differences(self, screenshots):
def get_differences(self, screenshots, urls):
from PIL import Image, ImageChops, ImageStat

lhs = Image.open(io.BytesIO(base64.b64decode(screenshots[0]))).convert("RGB")
rhs = Image.open(io.BytesIO(base64.b64decode(screenshots[1]))).convert("RGB")
self.check_if_solid_color(lhs, urls[0])
self.check_if_solid_color(rhs, urls[1])
diff = ImageChops.difference(lhs, rhs)
minimal_diff = diff.crop(diff.getbbox())
mask = minimal_diff.convert("L", dither=None)
@@ -394,6 +396,12 @@ def get_differences(self, screenshots):
(count, per_channel))
return per_channel, count

def check_if_solid_color(self, image, url):
extrema = image.getextrema()
if all(min == max for min, max in extrema):
color = ''.join('%02X' % value for value, _ in extrema)
self.message.append("Screenshot is solid color 0x%s for %s\n" % (color, url))

def run_test(self, test):
viewport_size = test.viewport_size
dpi = test.dpi
@@ -406,6 +414,7 @@ def run_test(self, test):
while stack:
hashes = [None, None]
screenshots = [None, None]
urls = [None, None]

nodes, relation = stack.pop()
fuzzy = self.get_fuzzy(test, nodes, relation)
@@ -416,8 +425,9 @@ def run_test(self, test):
return {"status": data[0], "message": data[1]}

hashes[i], screenshots[i] = data
urls[i] = node.url

if self.is_pass(hashes, screenshots, relation, fuzzy):
if self.is_pass(hashes, screenshots, urls, relation, fuzzy):
fuzzy = self.get_fuzzy(test, nodes, relation)
if nodes[1].references:
stack.extend(list(((nodes[1], item[0]), item[1]) for item in reversed(nodes[1].references)))

0 comments on commit e012478

Please sign in to comment.
You can’t perform that action at this time.