-
Notifications
You must be signed in to change notification settings - Fork 5.5k
/
test_bazel_sharding.py
248 lines (200 loc) · 8.19 KB
/
test_bazel_sharding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
from typing import List
import pytest
import os
import shutil
import sys
import tempfile
# Required for bazel
file_parent = os.path.dirname(__file__)
sys.path.append(os.path.join(file_parent, "../"))
import bazel_sharding # noqa: E402
WORKSPACE_KEY = "workspace"
def _prefix_rules(rules):
return list({f"//{WORKSPACE_KEY}:{rule}" for rule in rules})
size_rules = _prefix_rules(["test_small", "test_medium", "test_large", "test_enormous"])
timeout_rules = _prefix_rules(
["test_short", "test_moderate", "test_long", "test_eternal"]
)
size_and_timeout_rules = _prefix_rules(["test_both_size_and_timeout"])
manual_rules = _prefix_rules(["test_default"])
all_rules = size_rules + timeout_rules + manual_rules + size_and_timeout_rules
@pytest.fixture
def mock_build_dir():
"""Create a mock bazel workspace"""
tmpdir = tempfile.mkdtemp()
with open(os.path.join(tmpdir, "WORKSPACE"), "w") as f:
f.write(
"""
workspace(name = "fake_workspace")
"""
)
os.makedirs(os.path.join(tmpdir, WORKSPACE_KEY), exist_ok=True)
shutil.copyfile(
os.path.join(file_parent, "mock_BUILD"),
os.path.join(tmpdir, WORKSPACE_KEY, "BUILD"),
)
cwd = os.getcwd()
os.chdir(os.path.join(tmpdir, WORKSPACE_KEY))
yield
os.chdir(cwd)
shutil.rmtree(tmpdir, ignore_errors=True)
def test_actual_timeouts(mock_build_dir):
"""Test that size and timeout attrs are mapped to seconds correctly.
Assert that each of the fake rules is mapped correctly.
"""
query = bazel_sharding.get_target_expansion_query(
["..."], tests_only=False, exclude_manual=False
)
xml_output = bazel_sharding.run_bazel_query(query, debug=False)
rules = set(bazel_sharding.extract_rules_from_xml(xml_output))
expected_timeouts = {
"test_default": 60 * 5,
"test_small": 60,
"test_medium": 60 * 5,
"test_large": 60 * 15,
"test_enormous": 60 * 60,
"test_short": 60,
"test_moderate": 60 * 5,
"test_long": 60 * 15,
"test_eternal": 60 * 60,
"test_both_size_and_timeout": 60 * 15,
}
assert len(rules) == len(expected_timeouts)
assert (rule.actual_timeout_s == expected_timeouts[rule.name] for rule in rules)
def test_add_rule_to_best_shard():
"""Test that the best shard in optimal strategy is chosen correctly."""
# If we start with an empty list, then add to first shard
shards: List[List[bazel_sharding.BazelRule]] = [list() for _ in range(4)]
optimum = 600
rule = bazel_sharding.BazelRule("mock", "medium")
bazel_sharding.add_rule_to_best_shard(rule, shards, optimum)
assert shards[0][0] == rule
assert all(not shard for shard in shards[1:])
# Add to first shard below optimum
old_rule = bazel_sharding.BazelRule("mock", "medium")
shards: List[List[bazel_sharding.BazelRule]] = [[old_rule] for _ in range(4)]
shards[3] = []
optimum = old_rule.actual_timeout_s
rule = bazel_sharding.BazelRule("mock", "small")
bazel_sharding.add_rule_to_best_shard(rule, shards, optimum)
assert shards[3][0] == rule
assert all(shard[-1] == old_rule for shard in shards[0:3])
# If all shards are above or equal optimum, add to the one with the smallest
# difference
old_rule = bazel_sharding.BazelRule("mock", "large")
shards: List[List[bazel_sharding.BazelRule]] = [[old_rule] for _ in range(4)]
optimum = old_rule.actual_timeout_s
old_rule_medium = bazel_sharding.BazelRule("mock", "medium")
shards[3][0] = old_rule_medium
rule = bazel_sharding.BazelRule("mock", "small")
bazel_sharding.add_rule_to_best_shard(rule, shards, optimum)
assert shards[3][0] == old_rule_medium
assert shards[3][-1] == rule
assert all(shard[-1] == old_rule for shard in shards[0:3])
def test_bazel_sharding_end_to_end(mock_build_dir):
"""Test e2e working of the script without sharding.
Assert that if we are doing no sharding, all the rules
are outputted and the two strategies have the same
outputs.
"""
output = bazel_sharding.main(["..."], index=0, count=1)
output = set(output)
assert output == set(all_rules)
output_naive = bazel_sharding.main(
["..."], index=0, count=1, sharding_strategy="naive"
)
output_naive = set(output_naive)
assert output == output_naive
output = bazel_sharding.main(["..."], index=0, count=1, exclude_manual=True)
output = set(output)
assert output == set(all_rules).difference(set(manual_rules))
def test_bazel_sharding_with_filters(mock_build_dir):
"""Test e2e working of the script without sharding with filters.
Assert that the rules are properly filtered.
"""
output = bazel_sharding.main(["..."], index=0, count=1, tag_filters="size")
output = set(output)
assert output == set(size_rules + size_and_timeout_rules)
output = bazel_sharding.main(["..."], index=0, count=1, tag_filters="-timeout")
output = set(output)
assert output == set(size_rules + manual_rules)
output = bazel_sharding.main(["..."], index=0, count=1, tag_filters="size,timeout")
output = set(output)
assert output == set(size_rules + timeout_rules + size_and_timeout_rules)
output = bazel_sharding.main(["..."], index=0, count=1, tag_filters="size,-timeout")
output = set(output)
assert output == set(size_rules)
output = bazel_sharding.main(
["..."], index=0, count=1, tag_filters="-size,-timeout"
)
output = set(output)
assert output == set(manual_rules)
def test_bazel_sharding_two_shards(mock_build_dir):
"""Test e2e working of the script with sharding.
Assert that the two shards are balanced as expected.
"""
output_1_list = bazel_sharding.main(["..."], index=0, count=2)
output_1 = set(output_1_list)
output_2_list = bazel_sharding.main(["..."], index=1, count=2)
output_2 = set(output_2_list)
assert output_1.union(output_2) == set(all_rules)
# We should be deterministic, therefore we can hardcode this
assert output_1_list == [
f"//{WORKSPACE_KEY}:test_eternal",
f"//{WORKSPACE_KEY}:test_large",
f"//{WORKSPACE_KEY}:test_long",
f"//{WORKSPACE_KEY}:test_small",
]
assert output_2_list == [
f"//{WORKSPACE_KEY}:test_both_size_and_timeout",
f"//{WORKSPACE_KEY}:test_default",
f"//{WORKSPACE_KEY}:test_enormous",
f"//{WORKSPACE_KEY}:test_medium",
f"//{WORKSPACE_KEY}:test_moderate",
f"//{WORKSPACE_KEY}:test_short",
]
output_1_naive_list = bazel_sharding.main(
["..."], index=0, count=2, sharding_strategy="naive"
)
output_1_naive = set(output_1_naive_list)
output_2_naive_list = bazel_sharding.main(
["..."], index=1, count=2, sharding_strategy="naive"
)
output_2_naive = set(output_2_naive_list)
assert output_1_naive.union(output_2_naive) == set(all_rules)
# We should be deterministic, therefore we can hardcode this
assert output_1_naive_list == [
f"//{WORKSPACE_KEY}:test_both_size_and_timeout",
f"//{WORKSPACE_KEY}:test_enormous",
f"//{WORKSPACE_KEY}:test_large",
f"//{WORKSPACE_KEY}:test_medium",
f"//{WORKSPACE_KEY}:test_short",
]
assert output_2_naive_list == [
f"//{WORKSPACE_KEY}:test_default",
f"//{WORKSPACE_KEY}:test_eternal",
f"//{WORKSPACE_KEY}:test_long",
f"//{WORKSPACE_KEY}:test_moderate",
f"//{WORKSPACE_KEY}:test_small",
]
@pytest.mark.parametrize("sharding_strategy", ("optimal", "naive"))
def test_bazel_sharding_optimal_too_many_shards(mock_build_dir, sharding_strategy):
"""
Test e2e working of the script with sharding in the case of more shards than tests.
Assert that the first shard has one test and the final one has none.
"""
output_1 = bazel_sharding.main(
["..."], index=0, count=len(all_rules) + 1, sharding_strategy=sharding_strategy
)
output_1 = set(output_1)
output_2 = bazel_sharding.main(
["..."],
index=len(all_rules),
count=len(all_rules) + 1,
sharding_strategy=sharding_strategy,
)
output_2 = set(output_2)
assert len(output_1) == 1
assert not output_2
if __name__ == "__main__":
sys.exit(pytest.main(["-v", "-s", __file__]))