From aaf9414a6f50f23cfbae984a4951297de02af686 Mon Sep 17 00:00:00 2001 From: Ignace Nyamagana Butera Date: Wed, 28 Aug 2019 09:35:02 +0200 Subject: [PATCH] Adding empty records process controls --- src/Polyfill/EmptyEscapeParser.php | 2 +- src/Reader.php | 60 ++++++++++++++++-- tests/Polyfill/EmptyEscapeParserTest.php | 4 +- tests/ReaderTest.php | 79 ++++++++++++++++++++++++ 4 files changed, 139 insertions(+), 6 deletions(-) diff --git a/src/Polyfill/EmptyEscapeParser.php b/src/Polyfill/EmptyEscapeParser.php index 6d445aa8..a4425ea1 100644 --- a/src/Polyfill/EmptyEscapeParser.php +++ b/src/Polyfill/EmptyEscapeParser.php @@ -117,7 +117,7 @@ public static function parse($document): Generator self::$document->rewind(); while (self::$document->valid()) { $record = self::extractRecord(); - if (!in_array(null, $record, true)) { + if ([null] === $record || !in_array(null, $record, true)) { yield $record; } } diff --git a/src/Reader.php b/src/Reader.php index fc04ef61..2bb80415 100644 --- a/src/Reader.php +++ b/src/Reader.php @@ -28,6 +28,7 @@ use function array_pad; use function array_slice; use function array_unique; +use function count; use function gettype; use function is_array; use function iterator_count; @@ -75,6 +76,11 @@ class Reader extends AbstractCsv implements Countable, IteratorAggregate, JsonSe */ protected $stream_filter_mode = STREAM_FILTER_READ; + /** + * @var bool + */ + protected $is_empty_records_skipped = true; + /** * {@inheritdoc} */ @@ -137,7 +143,7 @@ public function getHeader(): array protected function setHeader(int $offset): array { $header = $this->seekRow($offset); - if (false === $header || [] === $header) { + if (false === $header || [] === $header || [null] === $header) { throw new Exception(sprintf('The header record does not exist or is empty at offset: `%s`', $offset)); } @@ -175,7 +181,7 @@ protected function getDocument(): Iterator return EmptyEscapeParser::parse($this->document); } - $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY); + $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD); $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape); $this->document->rewind(); @@ -263,8 +269,8 @@ public function jsonSerialize(): array public function getRecords(array $header = []): Iterator { $header = $this->computeHeader($header); - $normalized = static function ($record): bool { - return is_array($record) && $record != [null]; + $normalized = function ($record): bool { + return is_array($record) && (!$this->is_empty_records_skipped || $record != [null]); }; $bom = $this->getInputBOM(); $document = $this->getDocument(); @@ -276,6 +282,18 @@ public function getRecords(array $header = []): Iterator }); } + if (!$this->is_empty_records_skipped) { + $normalized_empty_records = static function (array $record): array { + if ([null] === $record) { + return []; + } + + return $record; + }; + + return $this->combineHeader(new MapIterator($records, $normalized_empty_records), $header); + } + return $this->combineHeader($records, $header); } @@ -376,4 +394,38 @@ public function setHeaderOffset($offset): self return $this; } + + /** + * Enable skipping empty records. + */ + public function enableEmptyRecordsSkipping(): self + { + if (!$this->is_empty_records_skipped) { + $this->is_empty_records_skipped = true; + $this->nb_records = -1; + } + + return $this; + } + + /** + * Disable skipping empty records. + */ + public function disableEmptyRecordsSkipping(): self + { + if ($this->is_empty_records_skipped) { + $this->is_empty_records_skipped = false; + $this->nb_records = -1; + } + + return $this; + } + + /** + * Tells whether empty records are skipped by the instance. + */ + public function isEmptyRecordsSkipped(): bool + { + return $this->is_empty_records_skipped; + } } diff --git a/tests/Polyfill/EmptyEscapeParserTest.php b/tests/Polyfill/EmptyEscapeParserTest.php index 44bcfd3c..feaf6596 100644 --- a/tests/Polyfill/EmptyEscapeParserTest.php +++ b/tests/Polyfill/EmptyEscapeParserTest.php @@ -100,7 +100,7 @@ public function testWorksWithMultiLinesWithDifferentDelimiter() * @covers ::extractFieldContent * @covers ::extractEnclosedFieldContent */ - public function testRemoveEmptyLines() + public function testPreserveEmptyLines() { $source = <<isEmptyRecordsSkipped()); + foreach ($reader as $offset => $record) { + self::assertSame($expected_with_skipping[$offset], $record); + } + + $reader->disableEmptyRecordsSkipping(); + self::assertFalse($reader->isEmptyRecordsSkipped()); + self::assertCount(4, $reader); + foreach ($reader as $offset => $record) { + self::assertSame($expected_with_no_skipping[$offset], $record); + } + + $reader->enableEmptyRecordsSkipping(); + self::assertCount(2, $reader); + self::assertTrue($reader->isEmptyRecordsSkipped()); + foreach ($reader as $offset => $record) { + self::assertSame($expected_with_skipping[$offset], $record); + } + } + + public function sourceProvider(): array + { + $source = <<fwrite($source); + + return [ + 'FileObject' => [ + Reader::createFromFileObject($rsrc), + $expected_with_skipping, + $expected_with_no_skipping, + ], + 'Stream' => [ + Reader::createFromString($source), + $expected_with_skipping, + $expected_with_no_skipping, + ], + 'FileObject with empty escape char' => [ + Reader::createFromFileObject($rsrc)->setEscape(''), + $expected_with_skipping, + $expected_with_no_skipping, + ], + 'Stream with empty escape char' => [ + Reader::createFromString($source)->setEscape(''), + $expected_with_skipping, + $expected_with_no_skipping, + ], + ]; + } }