Skip to content

Commit

Permalink
Merge 0233f9c into 6a3d0e6
Browse files Browse the repository at this point in the history
  • Loading branch information
nyamsprod committed Aug 26, 2019
2 parents 6a3d0e6 + 0233f9c commit cd2bd94
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 5 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ All Notable changes to `Csv` will be documented in this file

### Added

- Nothing
- Support for disabling/enabling BOM stripping via adding:
- `AbstractCsv::enableBOMSkipping`
- `AbstractCsv::disableBOMSkipping`
- `AbstractCsv::isBOMSkippingEnabled`

### Deprecated

Expand Down
8 changes: 8 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@
"@phpunit"
]
},
"scripts-descriptions": {
"phpcs": "Runs coding style test suite",
"phpstan": "Runs complete codebase static analysis",
"phpstan-src": "Runs source code static analysis",
"phpstan-test": "Runs test suite static analysis",
"phpunit": "Runs unit and functional testing",
"test": "Runs full test suite"
},
"suggest": {
"ext-iconv" : "Needed to ease transcoding CSV using iconv stream filters"
},
Expand Down
32 changes: 32 additions & 0 deletions docs/9.0/connections/bom.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,35 @@ $bom = $csv->getOutputBOM(); //returns "\xEF\xBB\xBF"
<p class="message-info">The default output <code>BOM</code> character is set to an empty string.</p>
<p class="message-warning">The output BOM sequence is <strong>never</strong> saved to the CSV document.</p>

### Controlling BOM sequence skipping

<p class="message-info">Since version <code>9.4.0</code>.</p>

~~~php
AbstractCsv::enableBOMSkipping(): self;
AbstractCsv::disableBOMSkipping(): self;
AbstractCsv::isBOMSkippingEnabled(): bool;
~~~

- `enableBOMSkipping`: enables skipping the input BOM from your CSV document.
- `disableBOMSkipping`: disables skipping the input BOM from your CSV document.
- `isBOMSkippingEnabled`: tells whether skipping the input BOM will be done.

<p class="message-notice">By default and to avoid BC Break, BOM skipping is enabled.</p>

If your document does not contains any BOM sequence you can speed up the CSV iterator by removing the step that ensure the BOM sequence if present is skipped.

~~~php
$raw_csv = Reader::BOM_UTF8."john,doe,john.doe@example.com\njane,doe,jane.doe@example.com\n";
$csv = Reader::createFromString($raw_csv);
$csv->setOutputBOM(Reader::BOM_UTF16_BE);
$csv->disableBOMSkipping();
ob_start();
$csv->output();
$document = ob_get_clean();
~~~

the returned `$document` will contains **2** BOM marker instead of one.

<p class="message-warning">If you are using a <code>stream</code> that can not be seekable you should disabled BOM skipping otherwise an <code>Exception</code> will be triggered.</p>
<p class="message-warning">The BOM sequence is never removed from the CSV document, it is only skipped from the result set.</p>
46 changes: 44 additions & 2 deletions src/AbstractCsv.php
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ abstract class AbstractCsv implements ByteSequence
*/
protected $document;

/**
* Tells whether the BOM must be stripped.
*
* @var bool
*/
protected $is_BOM_skipping_enabled = true;

/**
* New instance.
*
Expand Down Expand Up @@ -247,6 +254,14 @@ public function hasStreamFilter(string $filtername): bool
return $this->stream_filters[$filtername] ?? false;
}

/**
* Tells whether the BOM can be stripped if presents.
*/
public function isBOMSkippingEnabled(): bool
{
return $this->is_BOM_skipping_enabled;
}

/**
* Retuns the CSV document as a Generator of string chunk.
*
Expand Down Expand Up @@ -310,9 +325,12 @@ public function output(string $filename = null): int
if (null !== $filename) {
$this->sendHeaders($filename);
}
$input_bom = $this->getInputBOM();

$this->document->rewind();
$this->document->fseek(strlen($input_bom));
if ($this->is_BOM_skipping_enabled) {
$this->document->fseek(strlen($this->getInputBOM()));
}

echo $this->output_bom;

return strlen($this->output_bom) + $this->document->fpassthru();
Expand Down Expand Up @@ -420,6 +438,30 @@ public function setEscape(string $escape): self
throw new Exception(sprintf('%s() expects escape to be a single character or the empty string %s given', __METHOD__, $escape));
}

/**
* Enables BOM Stripping.
*
* @return static
*/
public function enableBOMSkipping(): self
{
$this->is_BOM_skipping_enabled = true;

return $this;
}

/**
* Disables BOM Stripping.
*
* @return static
*/
public function disableBOMSkipping(): self
{
$this->is_BOM_skipping_enabled = false;

return $this;
}

/**
* Sets the BOM sequence to prepend the CSV on output.
*
Expand Down
8 changes: 6 additions & 2 deletions src/Reader.php
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,13 @@ public function getRecords(array $header = []): Iterator
$normalized = static function ($record): bool {
return is_array($record) && $record != [null];
};
$bom = $this->getInputBOM();
$document = $this->getDocument();

$bom = '';
if ($this->is_BOM_skipping_enabled) {
$bom = $this->getInputBOM();
}

$document = $this->getDocument();
$records = $this->stripBOM(new CallbackFilterIterator($document, $normalized), $bom);
if (null !== $this->header_offset) {
$records = new CallbackFilterIterator($records, function (array $record, int $offset): bool {
Expand Down
41 changes: 41 additions & 0 deletions tests/CsvTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
use function function_exists;
use function iterator_to_array;
use function League\Csv\is_iterable as CSVIsiterable;
use function ob_get_clean;
use function ob_start;
use function strtolower;
use function tmpfile;
use function unlink;
Expand Down Expand Up @@ -457,4 +459,43 @@ public function testGetPathnameWithTempFile()
self::assertSame('php://temp', Writer::createFromString(new SplTempFileObject())->getPathname());
self::assertSame('php://temp', Writer::createFromFileObject(new SplTempFileObject())->getPathname());
}

/**
* @covers ::isBOMSkippingEnabled
* @covers ::disableBOMSkipping
* @covers ::enableBOMSkipping
*/
public function testBOMStripping()
{
$reader = Reader::createFromString();
self::assertTrue($reader->isBOMSkippingEnabled());
$reader->disableBOMSkipping();
self::assertFalse($reader->isBOMSkippingEnabled());
$reader->enableBOMSkipping();
self::assertTrue($reader->isBOMSkippingEnabled());
}

/**
* @runInSeparateProcess
* @covers ::output
*/
public function testOutputDoesNotStripBOM()
{
$raw_csv = Reader::BOM_UTF8."john,doe,john.doe@example.com\njane,doe,jane.doe@example.com\n";
$csv = Reader::createFromString($raw_csv);
$csv->setOutputBOM(Reader::BOM_UTF16_BE);
ob_start();
$csv->output();
$result = ob_get_clean();
self::assertNotContains(Reader::BOM_UTF8, $result);
self::assertContains(Reader::BOM_UTF16_BE, $result);

$csv->disableBOMSkipping();
ob_start();
$csv->output();
$result = ob_get_clean();
self::assertContains(Reader::BOM_UTF16_BE, $result);
self::assertContains(Reader::BOM_UTF8, $result);
self::assertTrue(0 === strpos($result, Reader::BOM_UTF16_BE.Reader::BOM_UTF8));
}
}
16 changes: 16 additions & 0 deletions tests/ReaderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,22 @@ public function testStripNoBOM()
}
}

public function testDisablingBOMStripping()
{
$expected_record = [Reader::BOM_UTF16_LE.'john', 'doe', 'john.doe@example.com'];
$fp = fopen('php://temp', 'r+');
fputcsv($fp, $expected_record);
$csv = Reader::createFromStream($fp);
$csv->disableBOMSkipping();
self::assertSame(Reader::BOM_UTF16_LE, $csv->getInputBOM());
foreach ($csv as $offset => $record) {
self::assertSame($expected_record, $record);
}
$csv = null;
fclose($fp);
$fp = null;
}

/**
* @covers ::getIterator
* @dataProvider appliedFlagsProvider
Expand Down

0 comments on commit cd2bd94

Please sign in to comment.