Skip to content

Commit

Permalink
Fix #69181: READ_CSV|DROP_NEW_LINE drops newlines within fields
Browse files Browse the repository at this point in the history
One may argue that `DROP_NEW_LINE` does not make sense in combination
with `READ_CSV`, but without `DROP_NEW_LINE`, `SKIP_EMPTY` does not
skip empty lines at all.  We could fix that, but do not for BC reasons.
Instead we no longer drop newlines in `spl_filesystem_file_read_ex()`
when reading CSV, but handle that in `spl_filesystem_file_read_csv()`
by treating lines with only (CR)LF as being empty as well.

Closes GH-7618.
  • Loading branch information
cmb69 committed Jul 26, 2022
1 parent 2fd5e82 commit 5d52d47
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 9 deletions.
4 changes: 4 additions & 0 deletions NEWS
Expand Up @@ -30,6 +30,10 @@ PHP NEWS
- Sockets:
. Added SOL_FILTER socket option for Solaris. (David Carlier)

- SPL:
. Fixed bug #69181 (READ_CSV|DROP_NEW_LINE drops newlines within fields).
(cmb)

21 Jul 2022, PHP 8.2.0beta1

- CLI:
Expand Down
28 changes: 19 additions & 9 deletions ext/spl/spl_directory.c
Expand Up @@ -1887,7 +1887,7 @@ static zend_result spl_filesystem_object_cast(zend_object *readobj, zval *writeo
}
/* }}} */

static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bool silent, zend_long line_add) /* {{{ */
static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bool silent, zend_long line_add, bool csv) /* {{{ */
{
char *buf;
size_t line_len = 0;
Expand Down Expand Up @@ -1917,7 +1917,7 @@ static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bo
intern->u.file.current_line = estrdup("");
intern->u.file.current_line_len = 0;
} else {
if (SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_DROP_NEW_LINE)) {
if (!csv && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_DROP_NEW_LINE)) {
if (line_len > 0 && buf[line_len - 1] == '\n') {
line_len--;
if (line_len > 0 && buf[line_len - 1] == '\r') {
Expand All @@ -1935,20 +1935,30 @@ static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bo
return SUCCESS;
} /* }}} */

static inline zend_result spl_filesystem_file_read(spl_filesystem_object *intern, bool silent)
static inline zend_result spl_filesystem_file_read(spl_filesystem_object *intern, bool silent, bool csv)
{
zend_long line_add = (intern->u.file.current_line) ? 1 : 0;
return spl_filesystem_file_read_ex(intern, silent, line_add);
return spl_filesystem_file_read_ex(intern, silent, line_add, csv);
}

static bool is_line_empty(spl_filesystem_object *intern)
{
char *current_line = intern->u.file.current_line;
size_t current_line_len = intern->u.file.current_line_len;
return current_line_len == 0
|| ((SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_READ_CSV) && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_DROP_NEW_LINE)
&& ((current_line_len == 1 && current_line[0] == '\n')
|| (current_line_len == 2 && current_line[0] == '\r' && current_line[1] == '\n'))));
}

static zend_result spl_filesystem_file_read_csv(spl_filesystem_object *intern, char delimiter, char enclosure, int escape, zval *return_value) /* {{{ */
{
do {
zend_result ret = spl_filesystem_file_read(intern, /* silent */ true);
zend_result ret = spl_filesystem_file_read(intern, /* silent */ true, /* csv */ true);
if (ret != SUCCESS) {
return ret;
}
} while (!intern->u.file.current_line_len && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_SKIP_EMPTY));
} while (is_line_empty(intern) && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_SKIP_EMPTY));

size_t buf_len = intern->u.file.current_line_len;
char *buf = estrndup(intern->u.file.current_line, buf_len);
Expand Down Expand Up @@ -2006,7 +2016,7 @@ static zend_result spl_filesystem_file_read_line_ex(zval * this_ptr, spl_filesys
zval_ptr_dtor(&retval);
return SUCCESS;
} else {
return spl_filesystem_file_read(intern, /* silent */ true);
return spl_filesystem_file_read(intern, /* silent */ true, /* csv */ false);
}
} /* }}} */

Expand Down Expand Up @@ -2214,7 +2224,7 @@ PHP_METHOD(SplFileObject, fgets)

CHECK_SPL_FILE_OBJECT_IS_INITIALIZED(intern);

if (spl_filesystem_file_read_ex(intern, /* silent */ false, /* line_add */ 1) == FAILURE) {
if (spl_filesystem_file_read_ex(intern, /* silent */ false, /* line_add */ 1, /* csv */ false) == FAILURE) {
RETURN_THROWS();
}
RETURN_STRINGL(intern->u.file.current_line, intern->u.file.current_line_len);
Expand Down Expand Up @@ -2644,7 +2654,7 @@ PHP_METHOD(SplFileObject, fscanf)
CHECK_SPL_FILE_OBJECT_IS_INITIALIZED(intern);

/* Get next line */
if (spl_filesystem_file_read(intern, /* silent */ false) == FAILURE) {
if (spl_filesystem_file_read(intern, /* silent */ false, /* csv */ false) == FAILURE) {
RETURN_THROWS();
}

Expand Down
70 changes: 70 additions & 0 deletions ext/spl/tests/bug69181.phpt
@@ -0,0 +1,70 @@
--TEST--
Bug #69181 (READ_CSV|DROP_NEW_LINE drops newlines within fields)
--FILE--
<?php
$filename = __DIR__ . "/bug69181.csv";
$csv = <<<CSV
"foo\n\nbar\nbaz",qux
"foo\nbar\nbaz",qux
CSV;

file_put_contents($filename, $csv);

$file = new SplFileObject($filename);
$file->setFlags(SplFileObject::SKIP_EMPTY | SplFileObject::DROP_NEW_LINE | SplFileObject::READ_CSV);
var_dump(iterator_to_array($file));

echo "\n====\n\n";

$file->rewind();
while (($record = $file->fgetcsv())) {
var_dump($record);
}
?>
--EXPECT--
array(2) {
[0]=>
array(2) {
[0]=>
string(12) "foo

bar
baz"
[1]=>
string(3) "qux"
}
[2]=>
array(2) {
[0]=>
string(11) "foo
bar
baz"
[1]=>
string(3) "qux"
}
}

====

array(2) {
[0]=>
string(12) "foo

bar
baz"
[1]=>
string(3) "qux"
}
array(2) {
[0]=>
string(11) "foo
bar
baz"
[1]=>
string(3) "qux"
}
--CLEAN--
<?php
@unlink(__DIR__ . "/bug69181.csv");
?>

0 comments on commit 5d52d47

Please sign in to comment.