/
extract_resultstore_links.py
294 lines (259 loc) · 10.9 KB
/
extract_resultstore_links.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Extracts ResultStore links from a log containing Bazel invocations.
The links and the invocations' status can then be printed out, or output in the
form of JUnit-based XML.
"""
import argparse
import datetime
import os
import re
from typing import Dict, Union
import xml.etree.ElementTree as ElemTree
ResultDictType = Dict[str, Dict[str, Union[str, int]]]
RESULT_STORE_LINK_RE = re.compile(
r'^INFO: Streaming build results to: (https://[\w./\-]+)')
FAILED_BUILD_LINE = 'FAILED: Build did NOT complete successfully'
BUILD_STATUS_LINE = 'INFO: Build'
TESTS_FAILED_RE = re.compile(r'^INFO: Build completed, \d+ tests? FAILED')
BAZEL_COMMAND_RE = re.compile(
r'(^| )(?P<command>bazel (.*? )?(?P<type>test|build) .+)')
class InvokeStatus:
tests_failed = 'tests_failed'
build_failed = 'build_failed'
passed = 'passed'
def parse_args() -> argparse.Namespace:
"""Parses the commandline args."""
parser = argparse.ArgumentParser(
description='Extracts ResultStore links from a build log.\n'
'These can be then printed out, and/or output into a '
'JUnit-based XML file inside a specified directory.')
parser.add_argument('build_log',
help='Path to a build log.')
parser.add_argument('--xml-out-path',
required=False,
help='Path to which to output '
'the JUnit-based XML with ResultStore links.')
parser.add_argument('--print',
action='store_true', dest='print', default=False,
help='Whether to print out a short summary with the '
'found ResultStore links (if any).')
parser.add_argument('-v', '--verbose',
action='store_true', dest='verbose', default=False,
help='Prints out lines helpful for debugging.')
parsed_args = parser.parse_args()
if not parsed_args.print and not parsed_args.xml_out_path:
raise TypeError('`--print` or `--xml-out-path` must be specified')
return parsed_args
def parse_log(file_path: str,
verbose: bool = False) -> ResultDictType:
"""Finds ResultStore links, and tries to determine their status."""
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
log_lines = f.read().splitlines()
result_store_links: ResultDictType = {}
current_url = None
for i in range(len(log_lines)):
line = log_lines[i]
result_store_line_match = re.search(RESULT_STORE_LINK_RE, line)
if not result_store_line_match:
continue
url = result_store_line_match.group(1)
url_lines = result_store_links.setdefault(url, {})
# Each bazel RBE invocation should produce two
# 'Streaming build results to: ...' lines, one at the start, and one at the
# end of the invocation.
# If there's a failure message, it will be found in-between these two.
if not current_url:
url_lines['start'] = i
elif current_url == url:
url_lines['end'] = i
else:
result_store_links[current_url]['next_url'] = i
url_lines['start'] = i
current_url = url
previous_end_line = None
for url, lines in result_store_links.items():
lines['status'] = InvokeStatus.passed # default to passed
start_line = lines['start']
end_line = lines.get('end', lines.get('next_url', len(log_lines))) - 1
k = end_line
while k > start_line:
backtrack_line = log_lines[k]
build_failed = backtrack_line.startswith(FAILED_BUILD_LINE)
if build_failed or not backtrack_line.startswith(BUILD_STATUS_LINE):
tests_failed = False
else:
tests_failed = re.search(TESTS_FAILED_RE, backtrack_line)
if build_failed or tests_failed:
log_fragment = '\n'.join(
log_lines[max(k - 20, 0):min(end_line + 1, len(log_lines) - 1)])
lines['log_fragment'] = log_fragment
lines['status'] = (InvokeStatus.build_failed if build_failed
else InvokeStatus.tests_failed)
if verbose:
print(f'Found failed invocation: {url.rsplit("/")[-1]}\n'
f'Log fragment:\n'
f'```\n{log_fragment}\n```\n'
f'{"=" * 140}')
break
k -= 1
# A low-effort attempt to find the bazel command that triggered the
# invocation.
bazel_comm_min_line_i = (previous_end_line if previous_end_line is not None
else 0)
while k > bazel_comm_min_line_i:
backtrack_line = log_lines[k]
# Don't attempt to parse multi-line commands broken up by backslashes
if 'bazel ' in backtrack_line and not backtrack_line.endswith('\\'):
bazel_line = BAZEL_COMMAND_RE.search(backtrack_line)
if bazel_line:
lines['command'] = bazel_line.group('command')
lines['command_type'] = bazel_line.group('type')
break
k -= 1
continue
previous_end_line = lines.get('end') or start_line
return result_store_links
def indent_xml(elem, level=0) -> None:
"""Indents and newlines the XML for better output."""
indent_str = '\n' + level * ' '
if len(elem): # pylint: disable=g-explicit-length-test # `if elem` not valid
if not elem.text or not elem.text.strip():
elem.text = indent_str + ' '
if not elem.tail or not elem.tail.strip():
elem.tail = indent_str
for elem in elem:
indent_xml(elem, level + 1)
if not elem.tail or not elem.tail.strip():
elem.tail = indent_str
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = indent_str
def create_xml_file(result_store_dict: ResultDictType,
output_path: str,
verbose: bool = False):
"""Creates a JUnit-based XML file, with each invocation as a testcase."""
os.makedirs(os.path.dirname(output_path), exist_ok=True)
failure_count = 0
error_count = 0
date_time = datetime.datetime
attrib = {'name': 'Bazel Invocations', 'time': '0.0',
'timestamp': date_time.isoformat(date_time.utcnow())}
testsuites = ElemTree.Element('testsuites')
testsuite = ElemTree.SubElement(testsuites, 'testsuite')
for url, invocation_results in result_store_dict.items():
invocation_id = url.rsplit('/')[-1]
if verbose:
print(f'Creating testcase for invocation {invocation_id}')
status = invocation_results['status']
command = invocation_results.get('command')
command_type = invocation_results.get('command_type')
case_attrib = attrib.copy()
if command_type:
command_type = command_type.title()
case_name = f'{command_type} invocation {invocation_id}'
else:
case_name = f' Invocation {invocation_id}'
case_attrib.update({'name': case_name,
'status': 'run', 'result': 'completed'})
testcase = ElemTree.SubElement(testsuite, 'testcase', attrib=case_attrib)
if status in (InvokeStatus.tests_failed, InvokeStatus.build_failed):
if status == InvokeStatus.tests_failed:
failure_count += 1
elem_name = 'failure'
else:
error_count += 1
elem_name = 'error'
if command:
failure_msg = (f'\nThe command was:\n\n'
f'{command}\n\n')
else:
failure_msg = ('\nCouldn\'t parse a bazel command '
'matching the invocation, inside the log. '
'Please look for it in the build log.\n\n')
failure_msg += (
f'See the ResultStore link for a detailed view of failed targets:\n'
f'{url}\n\n')
failure_msg += (
f'Here\'s a fragment of the log containing the failure:\n\n'
f'[ ... TRUNCATED ... ]\n\n'
f'{invocation_results["log_fragment"]}\n'
f'\n[ ... TRUNCATED ... ]\n'
)
failure = ElemTree.SubElement(
testcase, elem_name,
message=f'Bazel invocation {invocation_id} failed.')
failure.text = failure_msg
else:
properties = ElemTree.SubElement(testcase, 'properties')
success_msg = 'Build completed successfully.\n' f'See {url} for details.'
ElemTree.SubElement(properties, 'property',
name='description',
value=success_msg)
if command:
ElemTree.SubElement(properties, 'property',
name='bazel_command',
value=command)
suite_specific = {'tests': str(len(result_store_dict)),
'errors': str(error_count),
'failures': str(failure_count)}
suite_attrib = attrib.copy()
suite_attrib.update(suite_specific)
testsuites.attrib = suite_attrib
testsuite.attrib = suite_attrib
indent_xml(testsuites)
tree = ElemTree.ElementTree(testsuites)
file_path = os.path.join(output_path)
with open(file_path, 'wb') as f:
f.write(b'<?xml version="1.0"?>\n')
tree.write(f)
if verbose:
print(f'\nWrote XML with Bazel invocation results to {file_path}')
def print_invocation_results(result_store_dict: ResultDictType):
"""Prints out a short summary of the found ResultStore links (if any)."""
print()
if not result_store_dict:
print('Found no ResultStore links for Bazel build/test invocations.')
else:
print(f'Found {len(result_store_dict)} ResultStore link(s) for '
f'Bazel invocations.\n'
f'ResultStore contains individual representations of each target '
f'that were run/built during the invocation.\n'
f'These results are generally easier to read than looking through '
f'the entire build log:\n')
i = 1
for url, invocation_results in result_store_dict.items():
line_str = f'Invocation #{i} ({invocation_results["status"]}):\n'
command = invocation_results.get('command')
if command:
line_str += command
else:
line_str += ('Couldn\'t parse the bazel command, '
'check inside the build log instead')
line_str += f'\n{url}\n'
print(line_str)
i += 1
def main():
args = parse_args()
verbose = args.verbose
build_log_path = os.path.expandvars(args.build_log)
links = parse_log(build_log_path, verbose=verbose)
if args.xml_out_path:
output_path = os.path.expandvars(args.xml_out_path)
create_xml_file(links, output_path, verbose=verbose)
if args.print:
print_invocation_results(links)
if __name__ == '__main__':
main()