Skip to content

Commit

Permalink
Add an HTML parser option to avoid a default doctype
Browse files Browse the repository at this point in the history
- include/libxml/HTMLparser.h: defines the new HTML parser option
  HTML_PARSE_NODEFDTD
- HTMLparser.c: if option is set don't add a default DTD
- xmllint.c: add the corresponding --nodefdtd option in xmllint
  • Loading branch information
veillard committed Jul 26, 2010
1 parent 2ee91eb commit f1121c4
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 1 deletion.
6 changes: 5 additions & 1 deletion HTMLparser.c
Original file line number Diff line number Diff line change
Expand Up @@ -4670,7 +4670,7 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
ctxt->sax->endDocument(ctxt->userData);

if (ctxt->myDoc != NULL) {
if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) {
dtd = xmlGetIntSubset(ctxt->myDoc);
if (dtd == NULL)
ctxt->myDoc->intSubset =
Expand Down Expand Up @@ -6530,6 +6530,10 @@ htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
ctxt->options |= XML_PARSE_HUGE;
options -= XML_PARSE_HUGE;
}
if (options & HTML_PARSE_NODEFDTD) {
ctxt->options |= HTML_PARSE_NODEFDTD;
options -= HTML_PARSE_NODEFDTD;
}
ctxt->dictNames = 0;
return (options);
}
Expand Down
1 change: 1 addition & 0 deletions include/libxml/HTMLparser.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ XMLPUBFUN void XMLCALL
*/
typedef enum {
HTML_PARSE_RECOVER = 1<<0, /* Relaxed parsing */
HTML_PARSE_NODEFDTD = 1<<2, /* do not default a doctype if not found */
HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */
HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
Expand Down
8 changes: 8 additions & 0 deletions xmllint.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ static int html = 0;
static int xmlout = 0;
#endif
static int htmlout = 0;
#if defined(LIBXML_HTML_ENABLED)
static int nodefdtd = 0;
#endif
#ifdef LIBXML_PUSH_ENABLED
static int push = 0;
#endif /* LIBXML_PUSH_ENABLED */
Expand Down Expand Up @@ -2995,6 +2998,7 @@ static void usage(const char *name) {
#ifdef LIBXML_HTML_ENABLED
printf("\t--html : use the HTML parser\n");
printf("\t--xmlout : force to use the XML serializer when using --html\n");
printf("\t--nodefdtd : do not default HTML doctype\n");
#endif
#ifdef LIBXML_PUSH_ENABLED
printf("\t--push : use the push mode of the parser\n");
Expand Down Expand Up @@ -3157,6 +3161,10 @@ main(int argc, char **argv) {
else if ((!strcmp(argv[i], "-xmlout")) ||
(!strcmp(argv[i], "--xmlout"))) {
xmlout++;
} else if ((!strcmp(argv[i], "-nodefdtd")) ||
(!strcmp(argv[i], "--nodefdtd"))) {
nodefdtd++;
options |= HTML_PARSE_NODEFDTD;
}
#endif /* LIBXML_HTML_ENABLED */
else if ((!strcmp(argv[i], "-loaddtd")) ||
Expand Down

0 comments on commit f1121c4

Please sign in to comment.