Skip to content

Commit

Permalink
Add context options to handle BOM
Browse files Browse the repository at this point in the history
  • Loading branch information
malarzm committed Oct 7, 2019
1 parent e3b513b commit 618b90d
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 2 deletions.
31 changes: 31 additions & 0 deletions src/Symfony/Component/Serializer/ByteOrderMark.php
@@ -0,0 +1,31 @@
<?php

namespace Symfony\Component\Serializer;

final class ByteOrderMark
{
/**
* UTF-8 BOM sequence.
*/
const BOM_UTF8 = "\xEF\xBB\xBF";

/**
* UTF-16 BE BOM sequence.
*/
const BOM_UTF16_BE = "\xFE\xFF";

/**
* UTF-16 LE BOM sequence.
*/
const BOM_UTF16_LE = "\xFF\xFE";

/**
* UTF-32 BE BOM sequence.
*/
const BOM_UTF32_BE = "\x00\x00\xFE\xFF";

/**
* UTF-32 LE BOM sequence.
*/
const BOM_UTF32_LE = "\xFF\xFE\x00\x00";
}
3 changes: 3 additions & 0 deletions src/Symfony/Component/Serializer/CHANGELOG.md
Expand Up @@ -5,6 +5,9 @@ CHANGELOG
-----

* deprecated the `XmlEncoder::TYPE_CASE_ATTRIBUTES` constant, use `XmlEncoder::TYPE_CAST_ATTRIBUTES` instead
* added common BOMs in `ByteOrderMark`
* added option to output BOM in CSV encoder via `CsvEncoder::OUTPUT_BOM` context option
* added option to skip BOM in CSV being decoded via `CsvEncoder::SKIP_INPUT_BOM` (defaults to `true`)

4.3.0
-----
Expand Down
31 changes: 29 additions & 2 deletions src/Symfony/Component/Serializer/Encoder/CsvEncoder.php
Expand Up @@ -11,6 +11,7 @@

namespace Symfony\Component\Serializer\Encoder;

use Symfony\Component\Serializer\ByteOrderMark;
use Symfony\Component\Serializer\Exception\InvalidArgumentException;

/**
Expand All @@ -30,6 +31,8 @@ class CsvEncoder implements EncoderInterface, DecoderInterface
const ESCAPE_FORMULAS_KEY = 'csv_escape_formulas';
const AS_COLLECTION_KEY = 'as_collection';
const NO_HEADERS_KEY = 'no_headers';
const OUTPUT_BOM = 'output_bom';
const SKIP_INPUT_BOM = 'skip_input_bom';

private $formulasStartCharacters = ['=', '-', '+', '@'];
private $defaultContext = [
Expand All @@ -40,6 +43,8 @@ class CsvEncoder implements EncoderInterface, DecoderInterface
self::HEADERS_KEY => [],
self::KEY_SEPARATOR_KEY => '.',
self::NO_HEADERS_KEY => false,
self::OUTPUT_BOM => '',
self::SKIP_INPUT_BOM => true,
];

/**
Expand Down Expand Up @@ -90,7 +95,7 @@ public function encode($data, $format, array $context = [])
}
}

list($delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas) = $this->getCsvOptions($context);
list($delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas, $outputBOM) = $this->getCsvOptions($context);

foreach ($data as &$value) {
$flattened = [];
Expand All @@ -101,6 +106,8 @@ public function encode($data, $format, array $context = [])

$headers = array_merge(array_values($headers), array_diff($this->extractHeaders($data), $headers));

fwrite($handle, $outputBOM);

if (!($context[self::NO_HEADERS_KEY] ?? false)) {
fputcsv($handle, $headers, $delimiter, $enclosure, $escapeChar);
}
Expand Down Expand Up @@ -134,6 +141,11 @@ public function decode($data, $format, array $context = [])
fwrite($handle, $data);
rewind($handle);

if (($context[self::SKIP_INPUT_BOM] ?? true) === false) {
$inputBom = $this->determineBom(substr($data, 0, 4));
fseek($handle, strlen($inputBom));
}

$headers = null;
$nbHeaders = 0;
$headerCount = [];
Expand Down Expand Up @@ -238,12 +250,13 @@ private function getCsvOptions(array $context): array
$keySeparator = $context[self::KEY_SEPARATOR_KEY] ?? $this->defaultContext[self::KEY_SEPARATOR_KEY];
$headers = $context[self::HEADERS_KEY] ?? $this->defaultContext[self::HEADERS_KEY];
$escapeFormulas = $context[self::ESCAPE_FORMULAS_KEY] ?? $this->defaultContext[self::ESCAPE_FORMULAS_KEY];
$outputBOM = $context[self::OUTPUT_BOM] ?? $this->defaultContext[self::OUTPUT_BOM];

if (!\is_array($headers)) {
throw new InvalidArgumentException(sprintf('The "%s" context variable must be an array or null, given "%s".', self::HEADERS_KEY, \gettype($headers)));
}

return [$delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas];
return [$delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas, $outputBOM];
}

/**
Expand Down Expand Up @@ -281,4 +294,18 @@ private function extractHeaders(iterable $data): array

return $headers;
}

private function determineBom(string $text): string
{
static $list;

$list = $list ?? (new \ReflectionClass(ByteOrderMark::class))->getConstants();
foreach ($list as $sequence) {
if (0 === strpos($text, $sequence)) {
return $sequence;
}
}

return '';
}
}
26 changes: 26 additions & 0 deletions src/Symfony/Component/Serializer/Tests/Encoder/CsvEncoderTest.php
Expand Up @@ -12,6 +12,7 @@
namespace Symfony\Component\Serializer\Tests\Encoder;

use PHPUnit\Framework\TestCase;
use Symfony\Component\Serializer\ByteOrderMark;
use Symfony\Component\Serializer\Encoder\CsvEncoder;

/**
Expand Down Expand Up @@ -595,4 +596,29 @@ public function testDecodeWithoutHeader()
CsvEncoder::NO_HEADERS_KEY => true,
]));
}

public function testBOMIsAddedOnDemand()
{
$value = ['foo' => 'hello', 'bar' => 'hey ho'];

$this->assertEquals(ByteOrderMark::BOM_UTF8 . <<<'CSV'
foo,bar
hello,"hey ho"
CSV
, $this->encoder->encode($value, 'csv', [CsvEncoder::OUTPUT_BOM => ByteOrderMark::BOM_UTF8]));
}

public function testBOMIsStripped()
{
$csv = ByteOrderMark::BOM_UTF8 . <<<'CSV'
foo,bar
hello,"hey ho"
CSV;
$this->assertEquals(
['foo' => 'hello', 'bar' => 'hey ho'],
$this->encoder->decode($csv, 'csv', [CsvEncoder::SKIP_INPUT_BOM => false])
);
}
}

0 comments on commit 618b90d

Please sign in to comment.