From 112ea766ec92e4db6fd6227f27e37b6f644117b6 Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Sun, 24 Jul 2022 17:09:20 +0200 Subject: [PATCH 1/2] Fix #69181: READ_CSV|DROP_NEW_LINE drops newlines within fields One may argue that `DROP_NEW_LINE` does not make sense in combination with `READ_CSV`, but without `DROP_NEW_LINE`, `SKIP_EMPTY` does not skip empty lines at all. We could fix that, but do not for BC reasons. Instead we no longer drop newlines in `spl_filesystem_file_read_ex()` when reading CSV, but handle that in `spl_filesystem_file_read_csv()` by treating lines with only (CR)LF as being empty as well. --- ext/spl/spl_directory.c | 28 +++++++++++----- ext/spl/tests/bug69181.phpt | 65 +++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 9 deletions(-) create mode 100644 ext/spl/tests/bug69181.phpt diff --git a/ext/spl/spl_directory.c b/ext/spl/spl_directory.c index 076be2f870b56..0cd6ae9de50b1 100644 --- a/ext/spl/spl_directory.c +++ b/ext/spl/spl_directory.c @@ -1887,7 +1887,7 @@ static zend_result spl_filesystem_object_cast(zend_object *readobj, zval *writeo } /* }}} */ -static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bool silent, zend_long line_add) /* {{{ */ +static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bool silent, zend_long line_add, bool csv) /* {{{ */ { char *buf; size_t line_len = 0; @@ -1917,7 +1917,7 @@ static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bo intern->u.file.current_line = estrdup(""); intern->u.file.current_line_len = 0; } else { - if (SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_DROP_NEW_LINE)) { + if (!csv && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_DROP_NEW_LINE)) { if (line_len > 0 && buf[line_len - 1] == '\n') { line_len--; if (line_len > 0 && buf[line_len - 1] == '\r') { @@ -1935,20 +1935,30 @@ static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bo return SUCCESS; } /* }}} */ -static inline zend_result spl_filesystem_file_read(spl_filesystem_object *intern, bool silent) +static inline zend_result spl_filesystem_file_read(spl_filesystem_object *intern, bool silent, bool csv) { zend_long line_add = (intern->u.file.current_line) ? 1 : 0; - return spl_filesystem_file_read_ex(intern, silent, line_add); + return spl_filesystem_file_read_ex(intern, silent, line_add, csv); +} + +static bool is_line_empty(spl_filesystem_object *intern) +{ + char *current_line = intern->u.file.current_line; + size_t current_line_len = intern->u.file.current_line_len; + return current_line_len == 0 + || ((SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_READ_CSV) && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_DROP_NEW_LINE) + && ((current_line_len == 1 && current_line[0] == '\n') + || (current_line_len == 2 && current_line[0] == '\r' && current_line[1] == '\n')))); } static zend_result spl_filesystem_file_read_csv(spl_filesystem_object *intern, char delimiter, char enclosure, int escape, zval *return_value) /* {{{ */ { do { - zend_result ret = spl_filesystem_file_read(intern, /* silent */ true); + zend_result ret = spl_filesystem_file_read(intern, /* silent */ true, /* csv */ true); if (ret != SUCCESS) { return ret; } - } while (!intern->u.file.current_line_len && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_SKIP_EMPTY)); + } while (is_line_empty(intern) && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_SKIP_EMPTY)); size_t buf_len = intern->u.file.current_line_len; char *buf = estrndup(intern->u.file.current_line, buf_len); @@ -2006,7 +2016,7 @@ static zend_result spl_filesystem_file_read_line_ex(zval * this_ptr, spl_filesys zval_ptr_dtor(&retval); return SUCCESS; } else { - return spl_filesystem_file_read(intern, /* silent */ true); + return spl_filesystem_file_read(intern, /* silent */ true, /* csv */ false); } } /* }}} */ @@ -2214,7 +2224,7 @@ PHP_METHOD(SplFileObject, fgets) CHECK_SPL_FILE_OBJECT_IS_INITIALIZED(intern); - if (spl_filesystem_file_read_ex(intern, /* silent */ false, /* line_add */ 1) == FAILURE) { + if (spl_filesystem_file_read_ex(intern, /* silent */ false, /* line_add */ 1, /* csv */ false) == FAILURE) { RETURN_THROWS(); } RETURN_STRINGL(intern->u.file.current_line, intern->u.file.current_line_len); @@ -2644,7 +2654,7 @@ PHP_METHOD(SplFileObject, fscanf) CHECK_SPL_FILE_OBJECT_IS_INITIALIZED(intern); /* Get next line */ - if (spl_filesystem_file_read(intern, /* silent */ false) == FAILURE) { + if (spl_filesystem_file_read(intern, /* silent */ false, /* csv */ false) == FAILURE) { RETURN_THROWS(); } diff --git a/ext/spl/tests/bug69181.phpt b/ext/spl/tests/bug69181.phpt new file mode 100644 index 0000000000000..e34683008e889 --- /dev/null +++ b/ext/spl/tests/bug69181.phpt @@ -0,0 +1,65 @@ +--TEST-- +Bug #69181 (READ_CSV|DROP_NEW_LINE drops newlines within fields) +--FILE-- +setFlags(SplFileObject::SKIP_EMPTY | SplFileObject::DROP_NEW_LINE | SplFileObject::READ_CSV); +var_dump(iterator_to_array($file)); + +$file->rewind(); +while (($record = $file->fgetcsv())) { + var_dump($record); +} +?> +--EXPECT-- +array(2) { + [0]=> + array(2) { + [0]=> + string(12) "foo + +bar +baz" + [1]=> + string(3) "qux" + } + [2]=> + array(2) { + [0]=> + string(11) "foo +bar +baz" + [1]=> + string(3) "qux" + } +} +array(2) { + [0]=> + string(12) "foo + +bar +baz" + [1]=> + string(3) "qux" +} +array(2) { + [0]=> + string(11) "foo +bar +baz" + [1]=> + string(3) "qux" +} +--CLEAN-- + From fa7decdddf8eadddf46336ba14e5bd6c8d7c499d Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Tue, 26 Jul 2022 12:45:32 +0200 Subject: [PATCH 2/2] Make test expectation more readable by inserting a separator --- ext/spl/tests/bug69181.phpt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ext/spl/tests/bug69181.phpt b/ext/spl/tests/bug69181.phpt index e34683008e889..729a5bd600b7a 100644 --- a/ext/spl/tests/bug69181.phpt +++ b/ext/spl/tests/bug69181.phpt @@ -15,6 +15,8 @@ $file = new SplFileObject($filename); $file->setFlags(SplFileObject::SKIP_EMPTY | SplFileObject::DROP_NEW_LINE | SplFileObject::READ_CSV); var_dump(iterator_to_array($file)); +echo "\n====\n\n"; + $file->rewind(); while (($record = $file->fgetcsv())) { var_dump($record); @@ -42,6 +44,9 @@ baz" string(3) "qux" } } + +==== + array(2) { [0]=> string(12) "foo