Skip to content

Commit

Permalink
[Reland][lit] Use sharding for GoogleTest format
Browse files Browse the repository at this point in the history
This helps lit unit test performance by a lot, especially on windows. The performance gain comes from launching one gtest executable for many subtests instead of one (this is the current situation).

The shards are executed by the test runner and the results are stored in the
json format supported by the GoogleTest. Later in the test reporting stage,
all test results in the json file are retrieved to continue the test results
summary etc.

On my Win10 desktop, before this patch: `check-clang-unit`: 177s, `check-llvm-unit`: 38s; after this patch: `check-clang-unit`: 37s, `check-llvm-unit`: 11s.
On my Linux machine, before this patch: `check-clang-unit`: 46s, `check-llvm-unit`: 8s; after this patch: `check-clang-unit`: 7s, `check-llvm-unit`: 4s.

Reviewed By: yln, rnk, abrachet

Differential Revision: https://reviews.llvm.org/D122251
  • Loading branch information
Yuanfang Chen committed Apr 12, 2022
1 parent 163a9f4 commit cd0a588
Show file tree
Hide file tree
Showing 23 changed files with 526 additions and 251 deletions.
5 changes: 5 additions & 0 deletions llvm/unittests/Support/CrashRecoveryTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,11 @@ TEST(CrashRecoveryTest, UnixCRCReturnCode) {
int Res = setenv("LLVM_CRC_UNIXCRCRETURNCODE", "1", 0);
ASSERT_EQ(Res, 0);

Res = unsetenv("GTEST_SHARD_INDEX");
ASSERT_EQ(Res, 0);
Res = unsetenv("GTEST_TOTAL_SHARDS");
ASSERT_EQ(Res, 0);

std::string Error;
bool ExecutionFailed;
int RetCode = ExecuteAndWait(Executable, argv, {}, {}, 0, 0, &Error,
Expand Down
4 changes: 3 additions & 1 deletion llvm/unittests/Support/ProgramTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,9 @@ class ProgramEnvTest : public testing::Test {
};

while (*EnvP != nullptr) {
EnvTable.emplace_back(prepareEnvVar(*EnvP));
auto S = prepareEnvVar(*EnvP);
if (!StringRef(S).startswith("GTEST_"))
EnvTable.emplace_back(S);
++EnvP;
}
}
Expand Down
3 changes: 2 additions & 1 deletion llvm/utils/lit/lit/LitConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class LitConfig(object):

def __init__(self, progname, path, quiet,
useValgrind, valgrindLeakCheck, valgrindArgs,
noExecute, debug, isWindows,
noExecute, debug, isWindows, order,
params, config_prefix = None,
maxIndividualTestTime = 0,
parallelism_groups = {},
Expand All @@ -38,6 +38,7 @@ def __init__(self, progname, path, quiet,
self.noExecute = noExecute
self.debug = debug
self.isWindows = bool(isWindows)
self.order = order
self.params = dict(params)
self.bashPath = None

Expand Down
1 change: 1 addition & 0 deletions llvm/utils/lit/lit/LitTestCase.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def load_test_suite(inputs):
noExecute=False,
debug=False,
isWindows=windows,
order='smart',
params={})

# Perform test discovery.
Expand Down
5 changes: 3 additions & 2 deletions llvm/utils/lit/lit/Test.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,11 +219,12 @@ def getExecPath(self, components):
class Test:
"""Test - Information on a single test instance."""

def __init__(self, suite, path_in_suite, config, file_path = None):
def __init__(self, suite, path_in_suite, config, file_path = None, gtest_json_file = None):
self.suite = suite
self.path_in_suite = path_in_suite
self.config = config
self.file_path = file_path
self.gtest_json_file = gtest_json_file

# A list of conditions under which this test is expected to fail.
# Each condition is a boolean expression of features and target
Expand Down Expand Up @@ -258,7 +259,7 @@ def __init__(self, suite, path_in_suite, config, file_path = None):
# The previous test elapsed time, if applicable.
self.previous_elapsed = 0.0

if '/'.join(path_in_suite) in suite.test_times:
if suite.test_times and '/'.join(path_in_suite) in suite.test_times:
time = suite.test_times['/'.join(path_in_suite)]
self.previous_elapsed = abs(time)
self.previous_failure = time < 0
Expand Down
2 changes: 1 addition & 1 deletion llvm/utils/lit/lit/TestingConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def fromdefaults(litConfig):
'TMPDIR', 'TMP', 'TEMP', 'TEMPDIR', 'AVRLIT_BOARD',
'AVRLIT_PORT', 'FILECHECK_OPTS', 'VCINSTALLDIR',
'VCToolsinstallDir', 'VSINSTALLDIR', 'WindowsSdkDir',
'WindowsSDKLibVersion', 'SOURCE_DATE_EPOCH']
'WindowsSDKLibVersion', 'SOURCE_DATE_EPOCH','GTEST_FILTER']

if sys.platform == 'win32':
pass_vars.append('COMSPEC')
Expand Down
254 changes: 170 additions & 84 deletions llvm/utils/lit/lit/formats/googletest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import absolute_import
import json
import math
import os
import re
import shlex
import subprocess
import sys
Expand All @@ -25,74 +26,25 @@ def __init__(self, test_sub_dirs, test_suffix, run_under = []):
self.test_suffixes = {exe_suffix, test_suffix + '.py'}
self.run_under = run_under

def getGTestTests(self, path, litConfig, localConfig):
"""getGTestTests(path) - [name]
Return the tests available in gtest executable.
Args:
path: String path to a gtest executable
litConfig: LitConfig instance
localConfig: TestingConfig instance"""

list_test_cmd = self.prepareCmd([path, '--gtest_list_tests'])

def get_num_tests(self, path, litConfig, localConfig):
list_test_cmd = self.prepareCmd(
[path, '--gtest_list_tests', '--gtest_filter=-*DISABLED_*'])
try:
output = subprocess.check_output(list_test_cmd,
env=localConfig.environment)
out = subprocess.check_output(list_test_cmd,
env=localConfig.environment)
except subprocess.CalledProcessError as exc:
litConfig.warning(
"unable to discover google-tests in %r: %s. Process output: %s"
% (path, sys.exc_info()[1], exc.output))
# This doesn't look like a valid gtest file. This can
# have a number of causes, none of them good. For
# instance, we could have created a broken executable.
# Alternatively, someone has cruft in their test
# directory. If we don't return a test here, then no
# failures will get reported, so return a dummy test name
# so that the failure is reported later.
yield 'failed_to_discover_tests_from_gtest'
return

upstream_prefix = re.compile('Running main\(\) from .*gtest_main\.cc')
nested_tests = []
for ln in output.splitlines(False): # Don't keep newlines.
ln = lit.util.to_string(ln)

if upstream_prefix.fullmatch(ln):
# Upstream googletest prints this to stdout prior to running
# tests. LLVM removed that print statement in r61540, but we
# handle it here in case upstream googletest is being used.
continue

# The test name list includes trailing comments beginning with
# a '#' on some lines, so skip those. We don't support test names
# that use escaping to embed '#' into their name as the names come
# from C++ class and method names where such things are hard and
# uninteresting to support.
ln = ln.split('#', 1)[0].rstrip()
if not ln.lstrip():
continue

index = 0
while ln[index*2:index*2+2] == ' ':
index += 1
while len(nested_tests) > index:
nested_tests.pop()

ln = ln[index*2:]
if ln.endswith('.'):
nested_tests.append(ln)
elif any([name.startswith('DISABLED_')
for name in nested_tests + [ln]]):
# Gtest will internally skip these tests. No need to launch a
# child process for it.
continue
else:
yield ''.join(nested_tests) + ln

def getTestsInDirectory(self, testSuite, path_in_suite,
litConfig, localConfig):
return None
return sum(
map(lambda line: lit.util.to_string(line).startswith(' '),
out.splitlines(False)))

def getTestsInDirectory(self, testSuite, path_in_suite, litConfig,
localConfig):
init_shard_size = 512 # number of tests in a shard
core_count = lit.util.usable_core_count()
source_path = testSuite.getSourcePath(path_in_suite)
for subdir in self.test_sub_dirs:
dir_path = os.path.join(source_path, subdir)
Expand All @@ -102,52 +54,124 @@ def getTestsInDirectory(self, testSuite, path_in_suite,
suffixes=self.test_suffixes):
# Discover the tests in this executable.
execpath = os.path.join(source_path, subdir, fn)
testnames = self.getGTestTests(execpath, litConfig, localConfig)
for testname in testnames:
testPath = path_in_suite + (subdir, fn, testname)
yield lit.Test.Test(testSuite, testPath, localConfig,
num_tests = self.get_num_tests(execpath, litConfig,
localConfig)
if num_tests is not None:
# Compute the number of shards.
shard_size = init_shard_size
nshard = int(math.ceil(num_tests / shard_size))
while nshard < core_count and shard_size > 1:
shard_size = shard_size // 2
nshard = int(math.ceil(num_tests / shard_size))

# Create one lit test for each shard.
for idx in range(nshard):
testPath = path_in_suite + (subdir, fn, str(idx),
str(nshard))
json_file = '-'.join([
execpath, testSuite.config.name,
str(os.getpid()),
str(idx),
str(nshard)
]) + '.json'
yield lit.Test.Test(testSuite,
testPath,
localConfig,
file_path=execpath,
gtest_json_file=json_file)
else:
# This doesn't look like a valid gtest file. This can
# have a number of causes, none of them good. For
# instance, we could have created a broken executable.
# Alternatively, someone has cruft in their test
# directory. If we don't return a test here, then no
# failures will get reported, so return a dummy test name
# so that the failure is reported later.
testPath = path_in_suite + (
subdir, fn, 'failed_to_discover_tests_from_gtest')
yield lit.Test.Test(testSuite,
testPath,
localConfig,
file_path=execpath)

def execute(self, test, litConfig):
if test.gtest_json_file is None:
return lit.Test.FAIL, ''

testPath,testName = os.path.split(test.getSourcePath())
while not os.path.exists(testPath):
# Handle GTest parametrized and typed tests, whose name includes
# some '/'s.
testPath, namePrefix = os.path.split(testPath)
testName = namePrefix + '/' + testName

cmd = [testPath, '--gtest_filter=' + testName]
testName,total_shards = os.path.split(testName)
testName,shard_idx = os.path.split(testName)
from lit.cl_arguments import TestOrder
use_shuffle = TestOrder(litConfig.order) == TestOrder.RANDOM
shard_env = {
'GTEST_COLOR': 'no',
'GTEST_SHUFFLE': '1' if use_shuffle else '0',
'GTEST_TOTAL_SHARDS': total_shards,
'GTEST_SHARD_INDEX': shard_idx,
'GTEST_OUTPUT': 'json:' + test.gtest_json_file
}
test.config.environment.update(shard_env)

cmd = [testPath]
cmd = self.prepareCmd(cmd)
if litConfig.useValgrind:
cmd = litConfig.valgrindArgs + cmd

if litConfig.noExecute:
return lit.Test.PASS, ''

header = f"Script:\n--\n{' '.join(cmd)}\n--\n"
def get_shard_header(shard_env):
shard_envs = '\n'.join([k + '=' + v for k, v in shard_env.items()])
return f"Script(shard):\n--\n%s\n%s\n--\n" % (shard_envs, ' '.join(cmd))

shard_header = get_shard_header(shard_env)

try:
out, err, exitCode = lit.util.executeCommand(
_, _, exitCode = lit.util.executeCommand(
cmd, env=test.config.environment,
timeout=litConfig.maxIndividualTestTime)
except lit.util.ExecuteCommandTimeoutException:
return (lit.Test.TIMEOUT,
f'{header}Reached timeout of '
return (lit.Test.TIMEOUT, f'{shard_header}Reached timeout of '
f'{litConfig.maxIndividualTestTime} seconds')

if exitCode:
return lit.Test.FAIL, header + out + err

if '[ SKIPPED ] 1 test,' in out:
return lit.Test.SKIPPED, ''
if not os.path.exists(test.gtest_json_file):
errmsg = f"shard JSON output does not exist: %s" % (
test.gtest_json_file)
return lit.Test.FAIL, shard_header + errmsg

passing_test_line = '[ PASSED ] 1 test.'
if passing_test_line not in out:
return (lit.Test.UNRESOLVED,
f'{header}Unable to find {passing_test_line} '
f'in gtest output:\n\n{out}{err}')
if exitCode == 0:
return lit.Test.PASS, ''

return lit.Test.PASS,''
with open(test.gtest_json_file, encoding='utf-8') as f:
jf = json.load(f)

if use_shuffle:
shard_env['GTEST_RANDOM_SEED'] = str(jf['random_seed'])
output = get_shard_header(shard_env) + '\n'

for testcase in jf['testsuites']:
for testinfo in testcase['testsuite']:
result = testinfo['result']
if result == 'SUPPRESSED' or result == 'SKIPPED':
continue
testname = testcase['name'] + '.' + testinfo['name']
header = f"Script:\n--\n%s --gtest_filter=%s\n--\n" % (
' '.join(cmd), testname)
if 'failures' in testinfo:
output += header
for fail in testinfo['failures']:
output += fail['failure'] + '\n'
output += '\n'
elif result != 'COMPLETED':
output += header
output += 'unresolved test result\n'
return lit.Test.FAIL, output

def prepareCmd(self, cmd):
"""Insert interpreter if needed.
Expand All @@ -166,3 +190,65 @@ def prepareCmd(self, cmd):
else:
cmd = shlex.split(self.run_under) + cmd
return cmd

@staticmethod
def post_process_shard_results(selected_tests, discovered_tests):
def remove_gtest(tests):
return [t for t in tests if t.gtest_json_file is None]

discovered_tests = remove_gtest(discovered_tests)
gtests = [t for t in selected_tests if t.gtest_json_file]
selected_tests = remove_gtest(selected_tests)
for test in gtests:
# In case gtest has bugs such that no JSON file was emitted.
if not os.path.exists(test.gtest_json_file):
selected_tests.append(test)
discovered_tests.append(test)
continue

start_time = test.result.start

# Load json file to retrieve results.
with open(test.gtest_json_file, encoding='utf-8') as f:
testsuites = json.load(f)['testsuites']
for testcase in testsuites:
for testinfo in testcase['testsuite']:
# Ignore disabled tests.
if testinfo['result'] == 'SUPPRESSED':
continue

testPath = test.path_in_suite[:-2] + (testcase['name'],
testinfo['name'])
subtest = lit.Test.Test(test.suite, testPath,
test.config, test.file_path)

testname = testcase['name'] + '.' + testinfo['name']
header = f"Script:\n--\n%s --gtest_filter=%s\n--\n" % (
test.file_path, testname)

output = ''
if testinfo['result'] == 'SKIPPED':
returnCode = lit.Test.SKIPPED
elif 'failures' in testinfo:
returnCode = lit.Test.FAIL
output = header
for fail in testinfo['failures']:
output += fail['failure'] + '\n'
elif testinfo['result'] == 'COMPLETED':
returnCode = lit.Test.PASS
else:
returnCode = lit.Test.UNRESOLVED
output = header + 'unresolved test result\n'

elapsed_time = float(testinfo['time'][:-1])
res = lit.Test.Result(returnCode, output, elapsed_time)
res.pid = test.result.pid
res.start = start_time
start_time = start_time + elapsed_time
subtest.setResult(res)

selected_tests.append(subtest)
discovered_tests.append(subtest)
os.remove(test.gtest_json_file)

return selected_tests, discovered_tests
Loading

0 comments on commit cd0a588

Please sign in to comment.