forked from reingart/pyfpdf
-
Notifications
You must be signed in to change notification settings - Fork 227
/
verapdf.py
executable file
·59 lines (48 loc) · 1.88 KB
/
verapdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env python3
# Invoke veraPDF CLI & parse its output
# Purpose of this script:
# * abort the validation pipeline with a non-zero error code if any check fails on a PDF sample
# * aggregate all checks performed in a concise summary
# * allow to ignore some errors considered harmless, listed in verapdf-ignore.json
# USAGE: ./verapdf.py [$pdf_filepath]
import sys
from subprocess import PIPE, run
from scripts.checker_commons import aggregate, print_aggregated_report
AGGREGATED_REPORT_FILEPATH = "verapdf-aggregated.json"
IGNORE_WHITELIST_FILEPATH = "scripts/verapdf-ignore.json"
CHECKS_DETAILS_URL = "https://docs.verapdf.org/validation/pdfa-part1/ & https://docs.verapdf.org/validation/pdfa-parts-2-and-3/"
BAT_EXT = ".bat" if sys.platform in ("cygwin", "win32") else ""
def analyze_pdf_file(pdf_filepath):
output = run(
[
"verapdf/verapdf" + BAT_EXT,
"--format",
"text",
"-v",
pdf_filepath,
],
stdout=PIPE,
).stdout.decode()
report = parse_output(output)
aggregate(pdf_filepath, report, AGGREGATED_REPORT_FILEPATH)
def parse_output(output):
"Parse VeraPDF CLI output into a dict."
lines = output.splitlines()
try:
grave_line = next(line for line in lines if line.startswith("GRAVE:"))
return {"failure": grave_line}
except StopIteration:
# Skipping the first line
errors = [line[len(" FAIL ") :] for line in lines[1:]]
return {"errors": errors}
if __name__ == "__main__":
if len(sys.argv) < 2:
print_aggregated_report(
AGGREGATED_REPORT_FILEPATH, CHECKS_DETAILS_URL, IGNORE_WHITELIST_FILEPATH
)
elif len(sys.argv) > 2:
print(sys.argv, file=sys.stderr)
print("Exactly one argument must be passed to verapdf.py", file=sys.stderr)
sys.exit(2)
else:
analyze_pdf_file(sys.argv[1])