Skip to content

Commit

Permalink
Allow unquoted attr values
Browse files Browse the repository at this point in the history
  • Loading branch information
ohler55 committed Jul 14, 2023
1 parent 3c71c40 commit 5c36cf6
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 8 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

All changes to the Ox gem are documented here. Releases follow semantic versioning.

## [2.14.17] - 2023-07-14

### Fixed

- The sax parser in html mode now allows unquoted attribute values with complaints.

## [2.14.16] - 2023-04-11

### Fixed
Expand Down
24 changes: 17 additions & 7 deletions ext/ox/sax.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ static char read_text(SaxDrive dr);
static char read_jump(SaxDrive dr, const char *pat);
static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h);
static char read_name_token(SaxDrive dr);
static char read_quoted_value(SaxDrive dr);
static char read_quoted_value(SaxDrive dr, bool inst);

static void hint_clear_empty(SaxDrive dr);
static Nv hint_try_close(SaxDrive dr, const char *name);
Expand Down Expand Up @@ -1219,6 +1219,7 @@ static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml,
c = buf_next_non_white(&dr->buf);
}
if ('=' != c) {
// TBD allow in smart mode
if (eq_req) {
dr->err = 1;
return c;
Expand All @@ -1230,7 +1231,7 @@ static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml,
pos = dr->buf.pos + 1;
line = dr->buf.line;
col = dr->buf.col + 1;
c = read_quoted_value(dr);
c = read_quoted_value(dr, '?' == termc);
attr_value = dr->buf.str;

if (is_encoding) {
Expand Down Expand Up @@ -1297,10 +1298,11 @@ static char read_name_token(SaxDrive dr) {
return '\0';
}

/* The character after the quote or if there is no quote, the character after the word is returned. dr->buf.tail is one
* past that. dr->buf.str will point to the token which will be '\0' terminated.
/* The character after the quote or if there is no quote, the character after
* the word is returned. dr->buf.tail is one past that. dr->buf.str will point
* to the token which will be '\0' terminated.
*/
static char read_quoted_value(SaxDrive dr) {
static char read_quoted_value(SaxDrive dr, bool inst) {
char c;

c = buf_get(&dr->buf);
Expand All @@ -1324,19 +1326,27 @@ static char read_quoted_value(SaxDrive dr) {
}
// not quoted, look for something that terminates the string
dr->buf.str = dr->buf.tail - 1;
ox_sax_drive_error(dr, WRONG_CHAR "attribute value not in quotes");
// TBD if smart or html then no error
if (!(dr->options.smart && ox_hints_html() != dr->options.hints)) {
ox_sax_drive_error(dr, WRONG_CHAR "attribute value not in quotes");
}
while ('\0' != (c = buf_get(&dr->buf))) {
switch (c) {
case ' ':
// case '/':
case '>':
case '?': // for instructions
case '\t':
case '\n':
case '\r':
*(dr->buf.tail - 1) = '\0'; /* terminate value */
// dr->buf.tail is in the correct position, one after the word terminator
return c;
case '?': // for instructions
if (inst) {
*(dr->buf.tail - 1) = '\0'; /* terminate value */
return c;
}
break;
default: break;
}
}
Expand Down
2 changes: 1 addition & 1 deletion lib/ox/version.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module Ox
# Current version of the module.
VERSION = '2.14.16'
VERSION = '2.14.17'
end
24 changes: 24 additions & 0 deletions test/sax/sax_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1419,4 +1419,28 @@ def test_sax_html_abort
[:abort, :table]
], handler.calls)
end

def test_sax_html_attr
Ox.default_options = $ox_sax_options
handler = AllSax.new
overlay = Ox.sax_html_overlay
html = %{<!doctype HTML>
<html lang=en>
<head url=http://ohler.com?x=2>
</head>
</html>
}
Ox.sax_html(handler, html, overlay: overlay, skip: :skip_white)
assert_equal([
[:doctype, ' HTML'],
[:start_element, :html],
[:attr, :lang, 'en'],
[:start_element, :head],
[:attr, :url, "http://ohler.com?x=2"],

Check failure on line 1439 in test/sax/sax_test.rb

View workflow job for this annotation

GitHub Actions / Formatting Check

[Correctable] Style/StringLiterals: Prefer single-quoted strings when you don't need string interpolation or special symbols.

Check failure on line 1439 in test/sax/sax_test.rb

View workflow job for this annotation

GitHub Actions / Formatting Check

[Correctable] Style/StringLiterals: Prefer single-quoted strings when you don't need string interpolation or special symbols.
[:text, " "],

Check failure on line 1440 in test/sax/sax_test.rb

View workflow job for this annotation

GitHub Actions / Formatting Check

[Correctable] Style/StringLiterals: Prefer single-quoted strings when you don't need string interpolation or special symbols.

Check failure on line 1440 in test/sax/sax_test.rb

View workflow job for this annotation

GitHub Actions / Formatting Check

[Correctable] Style/StringLiterals: Prefer single-quoted strings when you don't need string interpolation or special symbols.
[:end_element, :head],
[:end_element, :html],

Check failure on line 1442 in test/sax/sax_test.rb

View workflow job for this annotation

GitHub Actions / Formatting Check

[Correctable] Style/TrailingCommaInArrayLiteral: Avoid comma after the last item of an array.

Check failure on line 1442 in test/sax/sax_test.rb

View workflow job for this annotation

GitHub Actions / Formatting Check

[Correctable] Style/TrailingCommaInArrayLiteral: Avoid comma after the last item of an array.
], handler.calls)
end

Check failure on line 1445 in test/sax/sax_test.rb

View workflow job for this annotation

GitHub Actions / Formatting Check

[Correctable] Layout/EmptyLinesAroundClassBody: Extra empty line detected at class body end.

Check failure on line 1445 in test/sax/sax_test.rb

View workflow job for this annotation

GitHub Actions / Formatting Check

[Correctable] Layout/EmptyLinesAroundClassBody: Extra empty line detected at class body end.
end

0 comments on commit 5c36cf6

Please sign in to comment.