From b860c6c8ab9dd5c74d5a21916b35ee9593a18bde Mon Sep 17 00:00:00 2001
From: Alex Hoppen <ahoppen@apple.com>
Date: Mon, 17 May 2021 16:19:18 +0200
Subject: [PATCH] [Stress Tester] Use new duration measurement capability of
 stress tester to analyse code completion durations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the new capability of the stress tester to measure the time it took to execute code completion requests, gather that information and report an analysis on how long all code completion requests that were executed during the stress tester run took.

This should allow us to verify changes to code completion don’t introduce significant regressions and will hopefully give us a way to find situations in which code completion is slower than it should be
---
 analyze_request_durations.py | 140 +++++++++++++++++++++++++++++++++++
 run_sk_stress_test           |  10 ++-
 2 files changed, 148 insertions(+), 2 deletions(-)
 create mode 100755 analyze_request_durations.py
diff --git a/analyze_request_durations.py b/analyze_request_durations.py
new file mode 100755
index 00000000..918ddf77
--- /dev/null
+++ b/analyze_request_durations.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import argparse
+import json
+import os
+
+# This value was emirically determined on an iMac Pro with a fairly small sample size.
+# All measurements in seconds should be taken with extreme care
+INSTRUCTIONS_PER_SECOND = 6693088549.09358
+
+# The length of the bars in the histograms
+HISTOGRAM_BAR_LENGTH = 40
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('request_durations_file')
+    return parser.parse_args()
+
+
+def print_stats(entries):
+    """
+    entries is a list of dictionaries from the JSON file under the
+    files.<file-name>.<request-name> path containing the following keys
+    - totalInstructions
+    - logHistogram
+    - requestsExecuted
+    """
+
+    # Create a log histogram for all entries
+    aggregated_completion_log_histogram = {}
+    # Adds totalInstruction for each entry
+    total_instructions = 0
+    # Adds requestsExecuted for each entry
+    total_requests_executed = 0
+    for entry in entries:
+        total_instructions += int(entry['totalInstructions'])
+        total_requests_executed += int(entry['requestsExecuted'])
+        for (bucket, count) in entry['logHistogram'].iteritems():
+            bucket = int(bucket)
+            count = int(count)
+            if bucket not in aggregated_completion_log_histogram:
+                aggregated_completion_log_histogram[bucket] = 0
+            aggregated_completion_log_histogram[bucket] += count
+
+    if total_requests_executed == 0:
+        print('No requests executed')
+        return
+
+    print('Requests: {}'.format(total_requests_executed))
+
+    average_instructions_per_request = total_instructions / total_requests_executed
+    seconds_estimate = average_instructions_per_request / INSTRUCTIONS_PER_SECOND
+    print('Average: {:.0e} instr (ca. {:.3f}s)'
+          .format(average_instructions_per_request, seconds_estimate))
+
+    print('\nHistogram:')
+
+    min_bucket = min(aggregated_completion_log_histogram)
+    max_bucket = max(aggregated_completion_log_histogram)
+    max_count = max([x[1] for x in aggregated_completion_log_histogram.iteritems()])
+
+    # Sums up the percentages for each entry to make it easier to spot the
+    # n-th percentile in the histogram
+    running_percentage_sum = 0
+    for bucket in range(min_bucket, max_bucket + 1):
+        count = aggregated_completion_log_histogram.get(bucket, 0)
+        executed_instructions = pow(2, bucket)
+        estimated_seconds = executed_instructions / INSTRUCTIONS_PER_SECOND
+        bar_fill = (count * HISTOGRAM_BAR_LENGTH / max_count)
+        bar = '*' * bar_fill + ' ' * (HISTOGRAM_BAR_LENGTH - bar_fill)
+        percentage = float(count) / total_requests_executed * 100
+        running_percentage_sum += percentage
+        print('< {:5.0e} instr (ca. {:6.3f}s): {} {:4.1f}% (< {:5.1f}%)'
+              .format(executed_instructions, estimated_seconds, bar, percentage,
+                      running_percentage_sum))
+
+
+def print_stats_for_all_files(request_durations, request_name):
+    print('\n---------- All {} requests ----------\n'.format(request_name))
+    all_request_entries = []
+    for (file_name, file_stats) in request_durations['files'].iteritems():
+        all_request_entries.append(file_stats[request_name])
+    print_stats(all_request_entries)
+
+
+def print_slowest_files(request_durations, request_name, num_files):
+    print('\n---------- {} slowest files for {} requests ----------\n'
+          .format(num_files, request_name))
+
+    file_and_average = []
+    for (file_name, file_stats) in request_durations['files'].iteritems():
+        instr_executed_in_file = int(file_stats[request_name]['totalInstructions'])
+        requests_in_file = int(file_stats[request_name]['requestsExecuted'])
+        if requests_in_file > 0:
+            average = instr_executed_in_file / requests_in_file
+            file_and_average.append((file_name, average))
+    file_and_average = sorted(file_and_average, key=lambda x: x[1], reverse=True)
+
+    for (file_name, average_request_duration) in file_and_average[0:num_files]:
+        print('-- {} --\n'.format(file_name))
+        print_stats([request_durations['files'][file_name][request_name]])
+        print('\n')
+
+
+def print_seconds_disclaimer():
+    print('''IMPORTANT:
+All measurements printed by this script were done in number of number of
+instructions. Measurements in seconds are computed assuming that all machines
+execute the same number of instructions per second at all times, which is not
+correct. Thus all measurements in seconds should be taken with extreme care.
+As they scale linearly with the number of instructions, it is safe to compare
+values in seconds output by this script.
+''')
+
+
+def analyze_request_durations(file):
+    if not os.path.exists(file):
+        print('{} does not exist. No request durations to analyze.'.format(file))
+        return
+    print_seconds_disclaimer()
+    print('\n---------- Raw durations for further analysis ----------\n')
+    with open(file, 'r') as json_file:
+        print(json_file.read())
+
+    with open(file, 'r') as json_file:
+        request_durations = json.load(json_file, encoding='utf-8')
+    print_stats_for_all_files(request_durations, 'CodeComplete')
+    print_slowest_files(request_durations, 'CodeComplete', 10)
+
+
+def main():
+    args = parse_args()
+    analyze_request_durations(args.request_durations_file)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/run_sk_stress_test b/run_sk_stress_test
index 03795c12..2c4815f1 100755
--- a/run_sk_stress_test
+++ b/run_sk_stress_test
@@ -22,6 +22,7 @@ import subprocess
 import argparse
 import platform
 import codecs
+from analyze_request_durations import analyze_request_durations
 
 import common
 
@@ -230,9 +231,10 @@ class StressTesterRunner(object):
         # temporary file paths
         filtered_projects = os.path.join(script_dir, 'stress-tester-projects.json')
         results = os.path.join(script_dir, 'stress-tester-results.json')
+        request_durations = os.path.join(script_dir, 'stress-tester-request-durations.json')
 
         # remove temporary files if they already exist
-        self._cleanup([filtered_projects, results])
+        self._cleanup([filtered_projects, results, request_durations])
 
         run_env = {
             'SK_STRESS_TEST': self.stress_tester,
@@ -243,7 +245,9 @@ class StressTesterRunner(object):
             'SK_XFAILS_PATH': self.xfails_path,
             'SK_STRESS_ACTIVE_CONFIG': self.swift_branch,
             'SK_STRESS_SUPPRESS_OUTPUT': 'true',
-            'SK_STRESS_REWRITE_MODES': 'none concurrent insideout'}
+            'SK_STRESS_REWRITE_MODES': 'none concurrent insideout',
+            'SK_STRESS_REQUEST_DURATIONS_FILE': request_durations,
+        }
         run_env.update(os.environ)
         run_cmd = ['./runner.py',
           '--projects', filtered_projects,
@@ -264,6 +268,8 @@ class StressTesterRunner(object):
         except common.ExecuteCommandFailure:
             self.compat_runner_failed = True
 
+        analyze_request_durations(request_durations)
+
         success = self._process_output(results, self.xfails_path)
 
         return success