Skip to content

Commit

Permalink
csv-scanner: decode \xXX sequences in addition to escaped special cha…
Browse files Browse the repository at this point in the history
…racters

Fixes #4274 that shows this access.log entry:

main:80 152.89.196.211 "-" - [23/Dec/2022:19:04:19 +0100] "\x16\x03\x01" 400 226 "-" "-"

Signed-off-by: Balazs Scheidler <bazsi77@gmail.com>
  • Loading branch information
bazsi committed Feb 7, 2023
1 parent 4b41664 commit ae0291a
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 0 deletions.
34 changes: 34 additions & 0 deletions lib/scanner/csv-scanner/csv-scanner.c
Expand Up @@ -176,6 +176,30 @@ _parse_left_whitespace(CSVScanner *self)
_skip_whitespace(&self->src);
}

static gint
_decode_xdigit(gchar xdigit)
{
if (xdigit >= '0' && xdigit <= '9')
return xdigit - '0';
if (xdigit >= 'a' && xdigit <= 'f')
return xdigit - 'a' + 10;
if (xdigit >= 'A' && xdigit <= 'F')
return xdigit - 'A' + 10;
return -1;
}

static gint
_decode_xbyte(gchar xdigit1, gchar xdigit2)
{
gint nibble_hi, nibble_lo;

nibble_hi = _decode_xdigit(xdigit1);
nibble_lo = _decode_xdigit(xdigit2);
if (nibble_hi < 0 || nibble_lo < 0)
return -1;
return (nibble_hi << 4) + nibble_lo;
}

static void
_parse_character_with_quotation(CSVScanner *self)
{
Expand Down Expand Up @@ -213,6 +237,16 @@ _parse_character_with_quotation(CSVScanner *self)
case 'v':
ch = '\v';
break;
case 'x':
if (*(self->src+1) && *(self->src+2))
{
ch = _decode_xbyte(*(self->src+1), *(self->src+2));
if (ch >= 0)
self->src += 2;
else
ch = 'x';
}
break;
default:
break;
}
Expand Down
56 changes: 56 additions & 0 deletions lib/scanner/csv-scanner/tests/test_csv_scanner.c
Expand Up @@ -359,6 +359,62 @@ Test(csv_scanner, escape_backslash_sequences)
csv_scanner_deinit(&scanner);
}

Test(csv_scanner, escape_backslash_x_sequences)
{
const gchar *columns[] = { "foo", "bar", NULL };

_default_options_with_flags(columns, CSV_SCANNER_STRIP_WHITESPACE);

csv_scanner_options_set_dialect(&options, CSV_SCANNER_ESCAPE_BACKSLASH_WITH_SEQUENCES);
csv_scanner_init(&scanner, &options, "foo,\"\\x41\\x00\\x40\"");

cr_expect(_column_name_equals("foo"));
cr_expect(!_scan_complete());

cr_expect(_scan_next());
cr_expect(_column_name_equals("foo"));
cr_expect(!_scan_complete());

cr_expect(_scan_next());
cr_expect(_column_name_equals("bar"));
cr_expect(_column_nv_equals("bar", "A\x00@"));
cr_expect(!_scan_complete());

/* go past the last column */
cr_expect(!_scan_next());
cr_expect(_scan_complete());
cr_expect(_column_name_unset());
csv_scanner_deinit(&scanner);
}

Test(csv_scanner, escape_backslash_invalid_x_sequence)
{
const gchar *columns[] = { "foo", "bar", NULL };

_default_options_with_flags(columns, CSV_SCANNER_STRIP_WHITESPACE);

csv_scanner_options_set_dialect(&options, CSV_SCANNER_ESCAPE_BACKSLASH_WITH_SEQUENCES);
csv_scanner_init(&scanner, &options, "foo,\"\\x4Q\"");

cr_expect(_column_name_equals("foo"));
cr_expect(!_scan_complete());

cr_expect(_scan_next());
cr_expect(_column_name_equals("foo"));
cr_expect(!_scan_complete());

cr_expect(_scan_next());
cr_expect(_column_name_equals("bar"));
cr_expect(_column_nv_equals("bar", "x4Q"));
cr_expect(!_scan_complete());

/* go past the last column */
cr_expect(!_scan_next());
cr_expect(_scan_complete());
cr_expect(_column_name_unset());
csv_scanner_deinit(&scanner);
}

static void
setup(void)
{
Expand Down

0 comments on commit ae0291a

Please sign in to comment.