Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Useful scripts for interacting with WPT logs #24841

Merged
merged 10 commits into from Nov 23, 2019
@@ -0,0 +1,46 @@
#!/usr/bin/env python

# Copyright 2019 The Servo Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.

# Usage: python wpt-summarize.py /wpt/test/url.html [--full]
#
# Extract all log lines for a particular test file from a WPT
# logs, outputting invidual JSON objects that can be manipulated
# with tools like jq. If a particular URL results in no output,
# the URL is likely used as a reference test's reference file,
# so passing `--full` will find any output from Servo process
# command lines that include the URL.

import sys
import json

full_search = len(sys.argv) > 3 and sys.argv[3] == '--full'

with open(sys.argv[1]) as f:
data = f.readlines()
thread = None
for entry in data:
entry = json.loads(entry)
if thread and "thread" in entry:
if entry["thread"] == thread:
print(json.dumps(entry))
This conversation was marked as resolved by jdm

This comment has been minimized.

Copy link
@Manishearth

Manishearth Nov 22, 2019

Member

won't this skip test_starts from other threads?

This comment has been minimized.

Copy link
@jdm

jdm Nov 22, 2019

Author Member

I'm not sure I understand the concern here. The purpose of this script is to extract the output of a single test, so we don't care about other test_start events.

This comment has been minimized.

Copy link
@Manishearth

Manishearth Nov 22, 2019

Member

oh! i think i hadn't fully understood that part

if "action" in entry and entry["action"] == "test_end":
thread = None
else:
if ("action" in entry and
entry["action"] == "test_start" and
entry["test"] == sys.argv[2]):
thread = entry["thread"]
print(json.dumps(entry))
elif (full_search and
"command" in entry and
sys.argv[2] in entry["command"]):
thread = entry["thread"]
print(json.dumps(entry))
@@ -0,0 +1,95 @@
#!/usr/bin/env python

# Copyright 2019 The Servo Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.

# Usage: python wpt-timing.py [path/to/wpt.log] ...
#
# Given a series of WPT log files as arguments, this script
# extracts the status of each test file (ok; error; timeout; etc.)
# and how long it took to ran, then creates three CSV files, each
# sorted by runtime:
#
# - longest_ok.csv: all tests that passed
# - longest_err.csv: all tests that failed or had an error
# - timeouts.csv: all tests that timed out
#
# This information can be used to quickly determine the longest-running
# tests in the WPT testsuite in order to improve the overall testsuite
# runtime on CI.

import sys
import json
import collections
import csv


def process_log(data):
tests = {}
test_results = collections.defaultdict(list)

for entry in data:
entry = json.loads(entry)
if "action" in entry:
if entry["action"] == "test_start":
tests[entry["test"]] = {
"start": int(entry["time"]),
"end": 0,
}
elif entry["action"] == "test_end":
test = tests[entry["test"]]
test["end"] = int(entry["time"])
test_results[entry["status"]] += [
(entry["test"], test["end"] - test["start"])
]

return test_results

test_results = {
"SKIP": [],
"OK": [],
"PASS": [],
"ERROR": [],
"FAIL": [],
"CRASH": [],
"TIMEOUT": [],
}
for log_path in sys.argv[1:]:
with open(log_path) as f:
data = f.readlines()
for k, v in process_log(data).items():
test_results[k] += v

print("Skipped %d tests." % len(test_results["SKIP"]))
print("%d tests timed out." % len(test_results["TIMEOUT"]))

longest_crash = sorted(test_results["CRASH"], key=lambda x: x[1], reverse=True)
print("Longest CRASH test took %dms (%s)" % (longest_crash[0][1], longest_crash[0][0]))

longest_ok = sorted(
test_results["PASS"] + test_results["OK"],
key=lambda x: x[1], reverse=True
)
csv_data = [['Test path', 'Milliseconds']]
with open('longest_ok.csv', 'w') as csv_file:
writer = csv.writer(csv_file)
writer.writerows(csv_data + longest_ok)

longest_fail = sorted(
test_results["ERROR"] + test_results["FAIL"],
key=lambda x: x[1], reverse=True
)
with open('longest_err.csv', 'w') as csv_file:
writer = csv.writer(csv_file)
writer.writerows(csv_data + longest_fail)

longest_timeout = sorted(test_results["TIMEOUT"], key=lambda x: x[1], reverse=True)
with open('timeouts.csv', 'w') as csv_file:
writer = csv.writer(csv_file)
writer.writerows(csv_data + longest_timeout)
@@ -0,0 +1,68 @@
#!/usr/bin/env python

# Copyright 2019 The Servo Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.

# Usage: python etc/wpt_result_analyzer.py
#
# Analyze the state of WPT tests in Servo by walking all of the
# test directories, counting the number of tests present, and
# counting the number of ini files present in the corresponding
# test result directory. Prints out a list of directories that
# have non-zero failure counts, ordered by overall number of tests
# and percentage of tests that fail.

import os

test_root = os.path.join('tests', 'wpt', 'web-platform-tests')
meta_root = os.path.join('tests', 'wpt', 'metadata')

test_counts = {}
meta_counts = {}

for base_dir, dir_names, files in os.walk(test_root):
if base_dir == test_root:
continue

rel_base = os.path.relpath(base_dir, test_root)
if not os.path.exists(os.path.join(meta_root, rel_base)):
continue

test_files = []
exts = ['.html', '.htm', '.xht', '.xhtml', '.window.js', '.worker.js', '.any.js']
for f in files:
for ext in exts:
if f.endswith(ext):
test_files += [f]
test_counts[rel_base] = len(test_files)

for base_dir, dir_names, files in os.walk(meta_root):
if base_dir == meta_root:
continue

rel_base = os.path.relpath(base_dir, meta_root)
num_files = len(files)
if '__dir__.ini' in files:
num_files -= 1
meta_counts[rel_base] = num_files

final_counts = []
for (test_dir, test_count) in test_counts.items():
if not test_count:
continue
meta_count = meta_counts.get(test_dir, 0)
final_counts += [(test_dir, test_count, meta_count)]

print('Test counts')
print('dir: %% failed (num tests / num failures)')
s = sorted(final_counts, key=lambda x: x[2] / x[1])
for (test_dir, test_count, meta_count) in reversed(sorted(s, key=lambda x: x[2])):
if not meta_count:
continue
print('%s: %.2f%% (%d / %d)' % (test_dir, meta_count / test_count * 100, test_count, meta_count))
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.