Auto merge of #19207 - asajeffrey:test-perf-add-base-url-option, r=ed…

…unham Add --base option to test-perf.  Add a `--base <URL>` option to test-perf, which is handy for two reasons: a) it reduces randomness in tests by allowing tests to be file URLs, and b) it doesn't require running the test suite as root (the tp5n manifest hardwires tests served from port 80 on localhost). ---  - [X] `./mach build -d` does not report any errors - [X] `./mach test-tidy` does not report any errors - [X] These changes do not require tests because this is test infrastructure    --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/19207)
servo · Nov 14, 2017 · 5f33d35 · 5f33d35
2 parents 0085666 + 109df33
commit 5f33d35
Show file tree

Hide file tree

Showing 6 changed files with 91 additions and 31 deletions.
diff --git a/etc/ci/performance/README.md b/etc/ci/performance/README.md
@@ -66,6 +66,40 @@ Individual test can be run by `python -m pytest <filename>`:
 
 # Advanced Usage
 
+## Reducing variance
+
+Running the same performance test results in a lot of variance, caused
+by the OS the test is running on. Experimentally, the things which
+seem to tame randomness the most are a) disbling CPU frequency
+changes, b) increasing the priority of the tests, c) running one one
+CPU core, d) loading files directly rather than via localhost http,
+and e) serving files from memory rather than from disk.
+
+First run the performance tests normally (this downloads the test suite):
+```
+./mach test-perf
+```
+Disable CPU frequency changes, e.g. on Linux:
+```
+sudo cpupower frequency-set --min 3.5GHz --max 3.5GHz
+```
+Copy the test files to a `tmpfs` file,
+such as `/run/user/`, for example if your `uid` is `1000`:
+```
+cp -r etc/ci/performance /run/user/1000
+```
+Then run the test suite on one core, at high priority, using a `file://` base URL:
+```
+sudo nice --20 chrt -r 99 sudo -u *userid* taskset 1 ./mach test-perf --base file:///run/user/1000/performance/
+```
+These fixes seem to take down the variance for most tests to under 5% for individual
+tests, and under 0.5% total.
+
+(IRC logs:
+ [2017-11-09](https://mozilla.logbot.info/servo/20171109#c13829674) |
+ [2017-11-10](https://mozilla.logbot.info/servo/20171110#c13835736)
+)
+
 ## Test Perfherder Locally
 
 If you want to test the data submission code in `submit_to_perfherder.py` without getting a credential for the production server, you can setup a local treeherder VM. If you don't need to test `submit_to_perfherder.py`, you can skip this step.

diff --git a/etc/ci/performance/gecko_driver.py b/etc/ci/performance/gecko_driver.py
@@ -71,11 +71,11 @@ def generate_placeholder(testcase):
         return [timings]
 
 
-def run_gecko_test(testcase, timeout, is_async):
+def run_gecko_test(testcase, url, timeout, is_async):
     with create_gecko_session() as driver:
         driver.set_page_load_timeout(timeout)
         try:
-            driver.get(testcase)
+            driver.get(url)
         except TimeoutException:
             print("Timeout!")
             return generate_placeholder(testcase)

diff --git a/etc/ci/performance/runner.py b/etc/ci/performance/runner.py
@@ -11,6 +11,7 @@
 import subprocess
 from functools import partial
 from statistics import median, StatisticsError
+from urllib.parse import urlsplit, urlunsplit, urljoin
 
 
 def load_manifest(filename):
@@ -31,6 +32,17 @@ def parse_manifest(text):
     return output
 
 
+def testcase_url(base, testcase):
+    # The tp5 manifest hardwires http://localhost/ as the base URL,
+    # which requires running the server as root in order to open
+    # the server on port 80. To allow non-root users to run the test
+    # case, we take the URL to be relative to a base URL.
+    (scheme, netloc, path, query, fragment) = urlsplit(testcase)
+    relative_url = urlunsplit(('', '', '.' + path, query, fragment))
+    absolute_url = urljoin(base, relative_url)
+    return absolute_url
+
+
 def execute_test(url, command, timeout):
     try:
         return subprocess.check_output(
@@ -46,11 +58,11 @@ def execute_test(url, command, timeout):
     return ""
 
 
-def run_servo_test(url, timeout, is_async):
+def run_servo_test(testcase, url, timeout, is_async):
     if is_async:
         print("Servo does not support async test!")
         # Return a placeholder
-        return parse_log("", url)
+        return parse_log("", testcase, url)
 
     ua_script_path = "{}/user-agent-js".format(os.getcwd())
     command = [
@@ -71,11 +83,11 @@ def run_servo_test(url, timeout, is_async):
             ' '.join(command)
         ))
     except subprocess.TimeoutExpired:
-        print("Test FAILED due to timeout: {}".format(url))
+        print("Test FAILED due to timeout: {}".format(testcase))
-    return parse_log(log, url)
+    return parse_log(log, testcase, url)
 
 
-def parse_log(log, testcase):
+def parse_log(log, testcase, url):
     blocks = []
     block = []
     copy = False
@@ -112,11 +124,11 @@ def parse_block(block):
 
         return timing
 
-    def valid_timing(timing, testcase=None):
+    def valid_timing(timing, url=None):
         if (timing is None or
                 testcase is None or
                 timing.get('title') == 'Error response' or
-                timing.get('testcase') != testcase):
+                timing.get('testcase') != url):
             return False
         else:
             return True
@@ -152,8 +164,15 @@ def create_placeholder(testcase):
             "domComplete": -1,
         }
 
-    valid_timing_for_case = partial(valid_timing, testcase=testcase)
+    # Set the testcase field to contain the original testcase name,
-    timings = list(filter(valid_timing_for_case, map(parse_block, blocks)))
+    # rather than the url.
+    def set_testcase(timing, testcase=None):
+        timing['testcase'] = testcase
+        return timing
+
+    valid_timing_for_case = partial(valid_timing, url=url)
+    set_testcase_for_case = partial(set_testcase, testcase=testcase)
+    timings = list(map(set_testcase_for_case, filter(valid_timing_for_case, map(parse_block, blocks))))
 
     if len(timings) == 0:
         print("Didn't find any perf data in the log, test timeout?")
@@ -167,10 +186,11 @@ def create_placeholder(testcase):
         return timings
 
 
-def filter_result_by_manifest(result_json, manifest):
+def filter_result_by_manifest(result_json, manifest, base):
     filtered = []
     for name, is_async in manifest:
-        match = [tc for tc in result_json if tc['testcase'] == name]
+        url = testcase_url(base, name)
+        match = [tc for tc in result_json if tc['testcase'] == url]
         if len(match) == 0:
             raise Exception(("Missing test result: {}. This will cause a "
                              "discontinuity in the treeherder graph, "
@@ -201,9 +221,9 @@ def take_result_median(result_json, expected_runs):
     return median_results
 
 
-def save_result_json(results, filename, manifest, expected_runs):
+def save_result_json(results, filename, manifest, expected_runs, base):
 
-    results = filter_result_by_manifest(results, manifest)
+    results = filter_result_by_manifest(results, manifest, base)
     results = take_result_median(results, expected_runs)
 
     if len(results) == 0:
@@ -245,6 +265,10 @@ def main():
                         help="the test manifest in tp5 format")
     parser.add_argument("output_file",
                         help="filename for the output json")
+    parser.add_argument("--base",
+                        type=str,
+                        default='http://localhost:8000/',
+                        help="the base URL for tests. Default: http://localhost:8000/")
     parser.add_argument("--runs",
                         type=int,
                         default=20,
@@ -270,18 +294,19 @@ def main():
         testcases = load_manifest(args.tp5_manifest)
         results = []
         for testcase, is_async in testcases:
+            url = testcase_url(args.base, testcase)
             for run in range(args.runs):
                 print("Running test {}/{} on {}".format(run + 1,
                                                         args.runs,
-                                                        testcase))
+                                                        url))
                 # results will be a mixure of timings dict and testcase strings
                 # testcase string indicates a failed test
-                results += run_test(testcase, args.timeout, is_async)
+                results += run_test(testcase, url, args.timeout, is_async)
                 print("Finished")
                 # TODO: Record and analyze other performance.timing properties
 
         print(format_result_summary(results))
-        save_result_json(results, args.output_file, testcases, args.runs)
+        save_result_json(results, args.output_file, testcases, args.runs, args.base)
 
     except KeyboardInterrupt:
         print("Test stopped by user, saving partial result")

diff --git a/etc/ci/performance/test_all.sh b/etc/ci/performance/test_all.sh
@@ -8,6 +8,8 @@ set -o errexit
 set -o nounset
 set -o pipefail
 
+base="http://localhost:8000"
+
 while (( "${#}" ))
 do
 case "${1}" in
@@ -22,6 +24,10 @@ case "${1}" in
   --submit)
     submit=1
     ;;
+  --base)
+    base="${2}"
+    shift
+    ;;
   *)
     echo "Unknown option ${1}."
     exit
@@ -46,7 +52,7 @@ MANIFEST="page_load_test/example.manifest" # A manifest that excludes
 PERF_FILE="output/perf-$(date +%s).json"
 
 echo "Running tests"
-python3 runner.py ${engine} --runs 3 --timeout "${timeout}" \
+python3 runner.py ${engine} --runs 4 --timeout "${timeout}" --base "${base}" \
   "${MANIFEST}" "${PERF_FILE}"
 
 if [[ "${submit:-}" ]];

diff --git a/etc/ci/performance/test_perf.sh b/etc/ci/performance/test_perf.sh
@@ -35,13 +35,4 @@ mkdir -p servo
 mkdir -p output # Test result will be saved to output/perf-<timestamp>.json
 ./git_log_to_json.sh > servo/revision.json
 
-if [[ "${#}" -eq 1 ]]; then
+./test_all.sh --servo ${*}
-  if [[ "${1}" = "--submit" ]]; then
-    ./test_all.sh --servo --submit
-  else
-    echo "Unrecognized argument: ${1}; Ignore and proceed without submitting"
-    ./test_all.sh --servo
-  fi
-else
-  ./test_all.sh --servo
-fi
diff --git a/python/servo/testing_commands.py b/python/servo/testing_commands.py
@@ -173,14 +173,18 @@ def suite_for_path(self, path_arg):
     @Command('test-perf',
              description='Run the page load performance test',
              category='testing')
-    @CommandArgument('--submit', default=False, action="store_true",
+    @CommandArgument('--base', default=None,
+                     help="the base URL for testcases")
+    @CommandArgument('-submit', '-a', default=False, action="store_true",
                      help="submit the data to perfherder")
-    def test_perf(self, submit=False):
+    def test_perf(self, base=None, submit=False):
         self.set_software_rendering_env(True)
 
         self.ensure_bootstrapped()
         env = self.build_env()
         cmd = ["bash", "test_perf.sh"]
+        if base:
+            cmd += ["--base", base]
         if submit:
             if not ("TREEHERDER_CLIENT_ID" in os.environ and
                     "TREEHERDER_CLIENT_SECRET" in os.environ):