Skip to content

Commit ed29231

Browse files
committed
8337111: Bad HTML checker for generated documentation
8337113: Bad character checker for generated documentation 8337116: Internal links checker for generated documentation 8337114: DocType checker for generated documentation Reviewed-by: hannesw
1 parent cd15ebb commit ed29231

17 files changed

+3012
-18
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
/*
2+
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
import doccheckutils.FileChecker;
25+
import doccheckutils.FileProcessor;
26+
import doccheckutils.HtmlFileChecker;
27+
import doccheckutils.checkers.BadCharacterChecker;
28+
import doccheckutils.checkers.DocTypeChecker;
29+
import doccheckutils.checkers.LinkChecker;
30+
import doccheckutils.checkers.TidyChecker;
31+
import doccheckutils.checkers.ExtLinkChecker;
32+
import toolbox.TestRunner;
33+
34+
import java.nio.file.Path;
35+
import java.util.*;
36+
37+
/**
38+
* DocCheck
39+
* <p>
40+
* For the sake of brevity, to run all of these checkers use
41+
* <p>
42+
* `make test-docs_all TEST_DEPS=docs-jdk`
43+
* <p>
44+
* This collection of tests provide a variety of checks for JDK documentation bundle.
45+
* <p>
46+
* It is meant to provide a convenient way to alert users of any errors in their documentation
47+
* before a push and verify the quality of the documentation.
48+
* It is not meant to replace more authoritative checkers; instead,
49+
* it is more focused on providing a convenient, easy overview of any possible issues.
50+
* <p>
51+
* It supports the following checks:
52+
* <p>
53+
* *HTML* -- We use the standard `tidy` utility to check for HTML compliance,
54+
* according to the declared version of HTML.
55+
* The output from `tidy` is analysed to generate a report summarizing any issues that were found.
56+
* <p>
57+
* Version `5.9.20` of `tidy` is expected, or the output from the `--version` option should contain the string `version 5`.
58+
* The test warns the user if he is using an earlier version.
59+
* <p>
60+
* *Bad Characters* -- We assumee that HTML files are encoded in UTF-8,
61+
* and reports any character encoding issues that it finds.
62+
* <p>
63+
* *DocType* -- We assume that HTML files should use HTML5, and reports
64+
* any files for which that is not the case.
65+
* <p>
66+
* *Links* -- We check links within a set of files, and reports on links
67+
* to external resources, without otherwise checking them.
68+
* <p>
69+
* *External Links* -- We scan the files for URLs that refer to
70+
* external resources, and validates those references using a "golden file" that includes a list of vetted links.
71+
* <p>
72+
* Each external reference is only checked once; but if an issue is found, all the files containing the
73+
* reference will be reported.
74+
*/
75+
public class DocCheck extends TestRunner {
76+
77+
private static final String DOCCHECK_DIR = System.getProperty("doccheck.dir");
78+
private static final Path DIR = Path.of(DOCCHECK_DIR != null ? DOCCHECK_DIR : "");
79+
private static final Set<String> CHECKS_LIST = new HashSet<>();
80+
private static Path DOCS_DIR;
81+
82+
private static boolean html;
83+
private static boolean links;
84+
private static boolean badchars;
85+
private static boolean doctype;
86+
private static boolean extlinks;
87+
88+
private List<Path> files;
89+
90+
public DocCheck() {
91+
super(System.err);
92+
init();
93+
}
94+
95+
public static void main(String... args) throws Exception {
96+
chooseCheckers();
97+
DocCheck docCheck = new DocCheck();
98+
docCheck.runTests();
99+
}
100+
101+
private static void chooseCheckers() {
102+
final String checks = System.getProperty("doccheck.checks");
103+
104+
if (!checks.isEmpty()) {
105+
if (checks.contains(",")) {
106+
CHECKS_LIST.addAll(Arrays.asList(checks.split(",")));
107+
} else {
108+
CHECKS_LIST.add(checks);
109+
}
110+
}
111+
112+
if (CHECKS_LIST.contains("all")) {
113+
html = true;
114+
links = true;
115+
badchars = true;
116+
doctype = true;
117+
extlinks = true;
118+
} else {
119+
if (CHECKS_LIST.contains("html")) {
120+
html = true;
121+
}
122+
if (CHECKS_LIST.contains("links")) {
123+
links = true;
124+
}
125+
if (CHECKS_LIST.contains("badchars")) {
126+
badchars = true;
127+
}
128+
if (CHECKS_LIST.contains("doctype")) {
129+
doctype = true;
130+
}
131+
if (CHECKS_LIST.contains("extlinks")) {
132+
extlinks = true;
133+
}
134+
}
135+
}
136+
137+
public void init() {
138+
var fileTester = new FileProcessor();
139+
DOCS_DIR = DocTester.resolveDocs();
140+
var baseDir = DOCS_DIR.resolve(DIR);
141+
fileTester.processFiles(baseDir);
142+
files = fileTester.getFiles();
143+
}
144+
145+
public List<FileChecker> getCheckers() {
146+
147+
List<FileChecker> checkers = new ArrayList<>();
148+
if (html) {
149+
checkers.add(new TidyChecker());
150+
}
151+
if (links) {
152+
var linkChecker = new LinkChecker();
153+
linkChecker.setBaseDir(DOCS_DIR);
154+
checkers.add(new HtmlFileChecker(linkChecker, DOCS_DIR));
155+
}
156+
157+
if (extlinks) {
158+
checkers.add(new HtmlFileChecker(new ExtLinkChecker(), DOCS_DIR));
159+
}
160+
161+
// there should be almost nothing reported from these two checkers
162+
// most reports should be broken anchors/links, missing files and errors in html
163+
if (badchars) {
164+
checkers.add(new BadCharacterChecker());
165+
}
166+
if (doctype) {
167+
checkers.add(new HtmlFileChecker(new DocTypeChecker(), DOCS_DIR));
168+
}
169+
170+
return checkers;
171+
}
172+
173+
@Test
174+
public void test() throws Exception {
175+
List<FileChecker> checkers = getCheckers();
176+
runCheckersSequentially(checkers);
177+
}
178+
179+
private void runCheckersSequentially(List<FileChecker> checkers) throws Exception {
180+
List<Throwable> exceptions = new ArrayList<>();
181+
182+
for (FileChecker checker : checkers) {
183+
try (checker) {
184+
checker.checkFiles(files);
185+
} catch (Exception e) {
186+
exceptions.add(e);
187+
}
188+
}
189+
190+
if (!exceptions.isEmpty()) {
191+
throw new Exception("One or more HTML checkers failed: " + exceptions);
192+
}
193+
}
194+
}

0 commit comments

Comments
 (0)