/
integration-test.py
361 lines (301 loc) · 13.9 KB
/
integration-test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
"""
This file contains basic infrastructure for running the integration test cases.
Most test cases are in v1_integration.py and v2_integration.py. There are a few
exceptions: Test cases that don't test either the v1 or v2 API are in this file,
and test cases that have to run at a specific point in the cycle (e.g. after all
other test cases) are also in this file.
"""
import argparse
import atexit
import datetime
import inspect
import json
import os
import random
import re
import requests
import subprocess
import signal
import time
import startservers
import chisel
from chisel import auth_and_issue
import v1_integration
import v2_integration
from helpers import *
from acme import challenges
def run_client_tests():
root = os.environ.get("CERTBOT_PATH")
assert root is not None, (
"Please set CERTBOT_PATH env variable to point at "
"initialized (virtualenv) client repo root")
cmd = os.path.join(root, 'tests', 'boulder-integration.sh')
run(cmd, cwd=root)
def run_expired_authz_purger():
# Note: This test must be run after all other tests that depend on
# authorizations added to the database during setup
# (e.g. test_expired_authzs_404).
def expect(target_time, num, table):
if CONFIG_NEXT:
tool = "expired-authz-purger2"
out = get_future_output("./bin/expired-authz-purger2 --single-run --config cmd/expired-authz-purger2/config.json", target_time)
else:
tool = "expired-authz-purger"
out = get_future_output("./bin/expired-authz-purger --config cmd/expired-authz-purger/config.json", target_time)
if 'via FAKECLOCK' not in out:
raise Exception("%s was not built with `integration` build tag" % (tool))
if num is None:
return
if CONFIG_NEXT:
expected_output = 'deleted %d expired authorizations' % (num)
else:
expected_output = 'Deleted a total of %d expired authorizations from %s' % (num, table)
if expected_output not in out:
raise Exception("%s did not print '%s'. Output:\n%s" % (
tool, expected_output, out))
now = datetime.datetime.utcnow()
# Run the purger once to clear out any backlog so we have a clean slate.
expect(now+datetime.timedelta(days=+365), None, "")
# Make an authz, but don't attempt its challenges.
chisel.make_client().request_domain_challenges("eap-test.com")
# Run the authz twice: Once immediate, expecting nothing to be purged, and
# once as if it were the future, expecting one purged authz.
after_grace_period = now + datetime.timedelta(days=+14, minutes=+3)
expect(now, 0, "pendingAuthorizations")
expect(after_grace_period, 1, "pendingAuthorizations")
auth_and_issue([random_domain()])
after_grace_period = now + datetime.timedelta(days=+67, minutes=+3)
expect(now, 0, "authz")
expect(after_grace_period, 1, "authz")
def run_janitor():
# Set the fake clock to a year in the future such that all of the database
# rows created during the integration tests are older than the grace period.
now = datetime.datetime.utcnow()
target_time = now+datetime.timedelta(days=+365)
e = os.environ.copy()
e.setdefault("GORACE", "halt_on_error=1")
e.setdefault("FAKECLOCK", fakeclock(target_time))
# Note: Must use exec here so that killing this process kills the command.
cmdline = "exec ./bin/boulder-janitor --config test/config-next/janitor.json"
p = subprocess.Popen(cmdline, shell=True, env=e)
# Wait for the janitor to come up
waitport(8014, "boulder-janitor", None)
def statline(statname, table):
# NOTE: we omit the trailing "}}" to make this match general enough to
# permit new labels in the future.
return "janitor_{0}{{table=\"{1}\"".format(statname, table)
def get_stat_line(port, stat):
url = "http://localhost:%d/metrics" % port
response = requests.get(url)
for l in response.content.split("\n"):
if l.strip().startswith(stat):
return l
return None
def stat_value(line):
parts = line.split(" ")
if len(parts) != 2:
raise Exception("stat line {0} was missing required parts".format(line))
return parts[1]
# Wait for the janitor to report it isn't finding new work
print("waiting for boulder-janitor work to complete...\n")
workDone = False
for i in range(10):
certStatusWorkbatch = get_stat_line(8014, statline("workbatch", "certificateStatus"))
certsWorkBatch = get_stat_line(8014, statline("workbatch", "certificates"))
certsPerNameWorkBatch = get_stat_line(8014, statline("workbatch", "certificatesPerName"))
allReady = True
for line in [certStatusWorkbatch, certsWorkBatch, certsPerNameWorkBatch]:
if stat_value(line) != "0":
allReady = False
if allReady is False:
print("not done after check {0}. Sleeping".format(i))
time.sleep(2)
else:
workDone = True
break
if workDone is False:
raise Exception("Timed out waiting for janitor to report all work completed\n")
# Check deletion stats are not empty/zero
for i in range(10):
certStatusDeletes = get_stat_line(8014, statline("deletions", "certificateStatus"))
certsDeletes = get_stat_line(8014, statline("deletions", "certificates"))
certsPerNameDeletes = get_stat_line(8014, statline("deletions", "certificatesPerName"))
if certStatusDeletes is None or certsDeletes is None or certsPerNameDeletes is None:
print("delete stats not present after check {0}. Sleeping".format(i))
time.sleep(2)
continue
for l in [certStatusDeletes, certsDeletes, certsPerNameDeletes]:
if stat_value(l) == "0":
raise Exception("Expected a non-zero number of deletes to be performed. Found {0}".format(l))
# Check that all error stats are empty
errorStats = [
statline("errors", "certificateStatus"),
statline("errors", "certificates"),
statline("errors", "certificatesPerName"),
]
for eStat in errorStats:
actual = get_stat_line(8014, eStat)
if actual is not None:
raise Exception("Expected to find no error stat lines but found {0}\n".format(eStat))
# Terminate the janitor
p.terminate()
def test_single_ocsp():
"""Run the single-ocsp command, which is used to generate OCSP responses for
intermediate certificates on a manual basis. Then start up an
ocsp-responder configured to respond using the output of single-ocsp,
check that it successfully answers OCSP requests, and shut the responder
back down.
This is a non-API test.
"""
run("./bin/single-ocsp -issuer test/test-root.pem \
-responder test/test-root.pem \
-target test/test-ca2.pem \
-pkcs11 test/test-root.key-pkcs11.json \
-thisUpdate 2016-09-02T00:00:00Z \
-nextUpdate 2020-09-02T00:00:00Z \
-status 0 \
-out /tmp/issuer-ocsp-responses.txt")
p = subprocess.Popen(
'./bin/ocsp-responder --config test/issuer-ocsp-responder.json', shell=True)
waitport(4003, './bin/ocsp-responder --config test/issuer-ocsp-responder.json')
# Verify that the static OCSP responder, which answers with a
# pre-signed, long-lived response for the CA cert, works.
verify_ocsp("test/test-ca2.pem", "test/test-root.pem", "http://localhost:4003", "good")
p.send_signal(signal.SIGTERM)
p.wait()
def test_stats():
"""Fetch Prometheus metrics from a sample of Boulder components to check
they are present.
This is a non-API test.
"""
def expect_stat(port, stat):
url = "http://localhost:%d/metrics" % port
response = requests.get(url)
if not stat in response.content:
print(response.content)
raise Exception("%s not present in %s" % (stat, url))
expect_stat(8000, "\nresponse_time_count{")
expect_stat(8000, "\ngo_goroutines ")
expect_stat(8000, '\ngrpc_client_handling_seconds_count{grpc_method="NewRegistration",grpc_service="ra.RegistrationAuthority",grpc_type="unary"} ')
expect_stat(8002, '\ngrpc_server_handling_seconds_sum{grpc_method="PerformValidation",grpc_service="ra.RegistrationAuthority",grpc_type="unary"} ')
expect_stat(8001, "\ngo_goroutines ")
exit_status = 1
def main():
parser = argparse.ArgumentParser(description='Run integration tests')
parser.add_argument('--certbot', dest='run_certbot', action='store_true',
help="run the certbot integration tests")
parser.add_argument('--chisel', dest="run_chisel", action="store_true",
help="run integration tests using chisel")
parser.add_argument('--filter', dest="test_case_filter", action="store",
help="Regex filter for test cases")
# allow any ACME client to run custom command for integration
# testing (without having to implement its own busy-wait loop)
parser.add_argument('--custom', metavar="CMD", help="run custom command")
parser.set_defaults(run_certbot=False, run_chisel=False,
test_case_filter="", skip_setup=False)
args = parser.parse_args()
if not (args.run_certbot or args.run_chisel or args.run_loadtest or args.custom is not None):
raise Exception("must run at least one of the letsencrypt or chisel tests with --certbot, --chisel, or --custom")
if not args.test_case_filter:
# In CONFIG_NEXT mode, use the basic, non-next config for setup.
# This lets us test the transition to authz2.
config = default_config_dir
if CONFIG_NEXT:
config = "test/config"
now = datetime.datetime.utcnow()
six_months_ago = now+datetime.timedelta(days=-30*6)
if not startservers.start(race_detection=True, fakeclock=fakeclock(six_months_ago), config_dir=config):
raise Exception("startservers failed (mocking six months ago)")
v1_integration.caa_client = caa_client = chisel.make_client()
setup_six_months_ago()
startservers.stop()
twenty_days_ago = now+datetime.timedelta(days=-20)
if not startservers.start(race_detection=True, fakeclock=fakeclock(twenty_days_ago), config_dir=config):
raise Exception("startservers failed (mocking twenty days ago)")
setup_twenty_days_ago()
startservers.stop()
if not startservers.start(race_detection=True):
raise Exception("startservers failed")
if args.run_chisel:
run_chisel(args.test_case_filter)
if args.run_certbot:
run_client_tests()
if args.custom:
run(args.custom)
# Skip the last-phase checks when the test case filter is one, because that
# means we want to quickly iterate on a single test case.
if not args.test_case_filter:
run_cert_checker()
check_balance()
if not CONFIG_NEXT:
run_expired_authz_purger()
# Run the boulder-janitor. This should happen after all other tests because
# it runs with the fake clock set to the future and deletes rows that may
# otherwise be referenced by tests.
run_janitor()
# Run the load-generator last. run_loadtest will stop the
# pebble-challtestsrv before running the load-generator and will not restart
# it.
run_loadtest()
if not startservers.check():
raise Exception("startservers.check failed")
global exit_status
exit_status = 0
def run_chisel(test_case_filter):
for key, value in inspect.getmembers(v1_integration):
if callable(value) and key.startswith('test_') and re.search(test_case_filter, key):
value()
for key, value in inspect.getmembers(v2_integration):
if callable(value) and key.startswith('test_') and re.search(test_case_filter, key):
value()
for key, value in globals().items():
if callable(value) and key.startswith('test_') and re.search(test_case_filter, key):
value()
def run_loadtest():
"""Run the ACME v2 load generator."""
latency_data_file = "%s/integration-test-latency.json" % tempdir
# Stop the global pebble-challtestsrv - it will conflict with the
# load-generator's internal challtestsrv. We don't restart it because
# run_loadtest() is called last and there are no remaining tests to run that
# might benefit from the pebble-challtestsrv being restarted.
startservers.stopChallSrv()
run("./bin/load-generator \
-config test/load-generator/config/integration-test-config.json\
-results %s" % latency_data_file)
def check_balance():
"""Verify that gRPC load balancing across backends is working correctly.
Fetch metrics from each backend and ensure the grpc_server_handled_total
metric is present, which means that backend handled at least one request.
"""
addresses = [
"sa1.boulder:8003",
"sa2.boulder:8103",
"publisher1.boulder:8009",
"publisher2.boulder:8109",
"va1.boulder:8004",
"va2.boulder:8104",
"ca1.boulder:8001",
"ca2.boulder:8104",
"ra1.boulder:8002",
"ra2.boulder:8102",
]
for address in addresses:
metrics = requests.get("http://%s/metrics" % address)
if not "grpc_server_handled_total" in metrics.text:
raise Exception("no gRPC traffic processed by %s; load balancing problem?"
% address)
def run_cert_checker():
run("./bin/cert-checker -config %s/cert-checker.json" % default_config_dir)
if __name__ == "__main__":
try:
main()
except subprocess.CalledProcessError as e:
raise Exception("%s. Output:\n%s" % (e, e.output))
@atexit.register
def stop():
if exit_status == 0:
print("\n\nSUCCESS")
else:
print("\n\nFAILURE")