-
-
Notifications
You must be signed in to change notification settings - Fork 605
/
workunit.py
288 lines (231 loc) · 9.16 KB
/
workunit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
# coding=utf-8
# Copyright 2014 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)
import os
import re
import time
import uuid
from collections import namedtuple
from six.moves import range
from pants.util.dirutil import safe_mkdir_for
from pants.util.memo import memoized_method
from pants.util.rwbuf import FileBackedRWBuf
class WorkUnitLabel(object):
"""
:API: public
"""
# Labels describing a workunit. Reporting code can use this to decide how to display
# information about this workunit.
#
# Note that a workunit can have multiple labels where this makes sense, e.g., TOOL, COMPILER
# and NAILGUN.
SETUP = 'SETUP' # Parsing build files etc.
GOAL = 'GOAL' # Executing a goal.
TASK = 'TASK' # Executing a task within a goal.
GROUP = 'GROUP' # Executing a group.
BOOTSTRAP = 'BOOTSTRAP' # Invocation of code to fetch a tool.
TOOL = 'TOOL' # Single invocations of a tool.
MULTITOOL = 'MULTITOOL' # Multiple consecutive invocations of the same tool.
COMPILER = 'COMPILER' # Invocation of a compiler.
TEST = 'TEST' # Running a test.
JVM = 'JVM' # Running a tool via the JVM.
NAILGUN = 'NAILGUN' # Running a tool via nailgun.
RUN = 'RUN' # Running a binary.
REPL = 'REPL' # Running a repl.
PREP = 'PREP' # Running a prep command
LINT = 'LINT' # Running a lint or static analysis tool.
# Do not attempt to print workunit's label upon invocation
# This has nothing to do with a process's own stderr/stdout.
SUPPRESS_LABEL = 'SUPPRESS_LABEL'
@classmethod
@memoized_method
def keys(cls):
"""
:API: public
"""
return [key for key in dir(cls) if not key.startswith('_') and key.isupper()]
class WorkUnit(object):
"""A hierarchical unit of work, for the purpose of timing and reporting.
A WorkUnit can be subdivided into further WorkUnits. The WorkUnit concept is deliberately
decoupled from the goal/task hierarchy. This allows some flexibility in having, say,
sub-units inside a task. E.g., there might be one WorkUnit representing an entire pants run,
and that can be subdivided into WorkUnits for each goal. Each of those can be subdivided into
WorkUnits for each task, and a task can subdivide that into further work units, if finer-grained
timing and reporting is needed.
:API: public
"""
# The outcome of a workunit.
# It can only be set to a new value <= the old one.
ABORTED = 0
FAILURE = 1
WARNING = 2
SUCCESS = 3
UNKNOWN = 4
# Generic workunit log config.
# log_level: Display log messages up to this level.
# color: log color settings.
LogConfig = namedtuple('LogConfig', ['level', 'colors'])
@staticmethod
def outcome_string(outcome):
"""Returns a human-readable string describing the outcome.
:API: public
"""
return ['ABORTED', 'FAILURE', 'WARNING', 'SUCCESS', 'UNKNOWN'][outcome]
def __init__(self, run_info_dir, parent, name, labels=None, cmd='', log_config=None):
"""
- run_info_dir: The path of the run_info_dir from the RunTracker that tracks this WorkUnit.
- parent: The containing workunit, if any. E.g., 'compile' might contain 'java', 'scala' etc.,
'scala' might contain 'compile', 'split' etc.
- name: A short name for this work. E.g., 'resolve', 'compile', 'scala', 'zinc'.
- labels: An optional iterable of labels. The reporters can use this to decide how to
display information about this work.
- cmd: An optional longer string representing this work.
E.g., the cmd line of a compiler invocation.
- log_config: An optional tuple of registered options affecting reporting output.
"""
self._outcome = WorkUnit.UNKNOWN
self.run_info_dir = run_info_dir
self.parent = parent
self.children = []
self.name = name
self.labels = set(labels or ())
self.cmd = cmd
self.id = uuid.uuid4()
self.log_config = log_config
# In seconds since the epoch. Doubles, to account for fractional seconds.
self.start_time = 0
self.end_time = 0
# A workunit may have multiple outputs, which we identify by a name.
# E.g., a tool invocation may have 'stdout', 'stderr', 'debug_log' etc.
self._outputs = {} # name -> output buffer.
self._output_paths = {}
# Do this last, as the parent's _self_time() might get called before we're
# done initializing ourselves.
# TODO: Ensure that a parent can't be ended before all its children are.
if self.parent:
if not log_config:
self.log_config = self.parent.log_config
self.parent.children.append(self)
def has_label(self, label):
"""
:API: public
"""
return label in self.labels
def start(self, start_time=None):
"""Mark the time at which this workunit started."""
self.start_time = start_time or time.time()
def end(self):
"""Mark the time at which this workunit ended."""
self.end_time = time.time()
for output in self._outputs.values():
output.close()
return self.path(), self.duration(), self._self_time(), self.has_label(WorkUnitLabel.TOOL)
def outcome(self):
"""Returns the outcome of this workunit.
:API: public
"""
return self._outcome
def set_outcome(self, outcome):
"""Set the outcome of this work unit.
We can set the outcome on a work unit directly, but that outcome will also be affected by
those of its subunits. The right thing happens: The outcome of a work unit is the
worst outcome of any of its subunits and any outcome set on it directly."""
if outcome not in range(0, 5):
raise Exception('Invalid outcome: {}'.format(outcome))
if outcome < self._outcome:
self._outcome = outcome
if self.parent: self.parent.set_outcome(self._outcome)
_valid_name_re = re.compile(r'\w+')
def output(self, name):
"""Returns the output buffer for the specified output name (e.g., 'stdout'), creating it if
necessary.
:API: public
"""
m = WorkUnit._valid_name_re.match(name)
if not m or m.group(0) != name:
raise Exception('Invalid output name: {}'.format(name))
if name not in self._outputs:
workunit_name = re.sub(r'\W', '_', self.name)
path = os.path.join(self.run_info_dir,
'tool_outputs', '{workunit_name}-{id}.{output_name}'
.format(workunit_name=workunit_name,
id=self.id,
output_name=name))
safe_mkdir_for(path)
self._outputs[name] = FileBackedRWBuf(path)
self._output_paths[name] = path
return self._outputs[name]
def outputs(self):
"""Returns the map of output name -> output buffer.
:API: public
"""
return self._outputs
def output_paths(self):
"""Returns the map of output name -> path of the output file.
:API: public
"""
return self._output_paths
def duration(self):
"""Returns the time (in fractional seconds) spent in this workunit and its children.
:API: public
"""
return (self.end_time or time.time()) - self.start_time
@property
def start_time_string(self):
"""A convenient string representation of start_time.
:API: public
"""
return time.strftime('%H:%M:%S', time.localtime(self.start_time))
@property
def start_delta_string(self):
"""A convenient string representation of how long after the run started we started.
:API: public
"""
delta = int(self.start_time) - int(self.root().start_time)
return '{:02}:{:02}'.format(int(delta / 60), delta % 60)
def root(self):
"""
:API: public
"""
ret = self
while ret.parent is not None:
ret = ret.parent
return ret
def ancestors(self):
"""Returns a list consisting of this workunit and those enclosing it, up to the root.
:API: public
"""
ret = []
workunit = self
while workunit is not None:
ret.append(workunit)
workunit = workunit.parent
return ret
def path(self):
"""Returns a path string for this workunit, E.g., 'all:compile:jvm:scalac'.
:API: public
"""
return ':'.join(reversed([w.name for w in self.ancestors()]))
def unaccounted_time(self):
"""Returns non-leaf time spent in this workunit.
This assumes that all major work should be done in leaves.
TODO: Is this assumption valid?
:API: public
"""
return 0 if len(self.children) == 0 else self._self_time()
def to_dict(self):
"""Useful for providing arguments to templates.
:API: public
"""
ret = {}
for key in ['name', 'cmd', 'id', 'start_time', 'end_time',
'outcome', 'start_time_string', 'start_delta_string']:
val = getattr(self, key)
ret[key] = val() if hasattr(val, '__call__') else val
ret['parent'] = self.parent.to_dict() if self.parent else None
return ret
def _self_time(self):
"""Returns the time spent in this workunit outside of any children."""
return self.duration() - sum([child.duration() for child in self.children])