Skip to content

Commit

Permalink
Add context options to handle BOM
Browse files Browse the repository at this point in the history
  • Loading branch information
malarzm committed Oct 9, 2019
1 parent e3b513b commit 93602d0
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 2 deletions.
1 change: 1 addition & 0 deletions src/Symfony/Component/Serializer/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ CHANGELOG
-----

* deprecated the `XmlEncoder::TYPE_CASE_ATTRIBUTES` constant, use `XmlEncoder::TYPE_CAST_ATTRIBUTES` instead
* added option to output an UTF-8 BOM in CSV encoder via `CsvEncoder::OUTPUT_UTF8_BOM_KEY` context option

4.3.0
-----
Expand Down
22 changes: 20 additions & 2 deletions src/Symfony/Component/Serializer/Encoder/CsvEncoder.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
namespace Symfony\Component\Serializer\Encoder;

use Symfony\Component\Serializer\Exception\InvalidArgumentException;
use Symfony\Component\Serializer\Exception\UnexpectedValueException;

/**
* Encodes CSV data.
Expand All @@ -30,6 +31,9 @@ class CsvEncoder implements EncoderInterface, DecoderInterface
const ESCAPE_FORMULAS_KEY = 'csv_escape_formulas';
const AS_COLLECTION_KEY = 'as_collection';
const NO_HEADERS_KEY = 'no_headers';
const OUTPUT_UTF8_BOM_KEY = 'output_utf8_bom';

private const UTF8_BOM = "\xEF\xBB\xBF";

private $formulasStartCharacters = ['=', '-', '+', '@'];
private $defaultContext = [
Expand All @@ -40,6 +44,7 @@ class CsvEncoder implements EncoderInterface, DecoderInterface
self::HEADERS_KEY => [],
self::KEY_SEPARATOR_KEY => '.',
self::NO_HEADERS_KEY => false,
self::OUTPUT_UTF8_BOM_KEY => false,
];

/**
Expand Down Expand Up @@ -90,7 +95,7 @@ public function encode($data, $format, array $context = [])
}
}

list($delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas) = $this->getCsvOptions($context);
list($delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas, $outputBom) = $this->getCsvOptions($context);

foreach ($data as &$value) {
$flattened = [];
Expand All @@ -114,6 +119,14 @@ public function encode($data, $format, array $context = [])
$value = stream_get_contents($handle);
fclose($handle);

if ($outputBom) {
if (!preg_match('//u', $value)) {
throw new UnexpectedValueException('You are trying to add an UTF-8 BOM to a non UTF-8 text.');
}

$value = self::UTF8_BOM . $value;
}

return $value;
}

Expand All @@ -134,6 +147,10 @@ public function decode($data, $format, array $context = [])
fwrite($handle, $data);
rewind($handle);

if (0 === strpos($data, self::UTF8_BOM)) {
fseek($handle, \strlen(self::UTF8_BOM));
}

$headers = null;
$nbHeaders = 0;
$headerCount = [];
Expand Down Expand Up @@ -238,12 +255,13 @@ private function getCsvOptions(array $context): array
$keySeparator = $context[self::KEY_SEPARATOR_KEY] ?? $this->defaultContext[self::KEY_SEPARATOR_KEY];
$headers = $context[self::HEADERS_KEY] ?? $this->defaultContext[self::HEADERS_KEY];
$escapeFormulas = $context[self::ESCAPE_FORMULAS_KEY] ?? $this->defaultContext[self::ESCAPE_FORMULAS_KEY];
$outputBom = $context[self::OUTPUT_UTF8_BOM_KEY] ?? $this->defaultContext[self::OUTPUT_UTF8_BOM_KEY];

if (!\is_array($headers)) {
throw new InvalidArgumentException(sprintf('The "%s" context variable must be an array or null, given "%s".', self::HEADERS_KEY, \gettype($headers)));
}

return [$delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas];
return [$delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas, $outputBom];
}

/**
Expand Down
35 changes: 35 additions & 0 deletions src/Symfony/Component/Serializer/Tests/Encoder/CsvEncoderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

use PHPUnit\Framework\TestCase;
use Symfony\Component\Serializer\Encoder\CsvEncoder;
use Symfony\Component\Serializer\Exception\UnexpectedValueException;

/**
* @author Kévin Dunglas <dunglas@gmail.com>
Expand Down Expand Up @@ -595,4 +596,38 @@ public function testDecodeWithoutHeader()
CsvEncoder::NO_HEADERS_KEY => true,
]));
}

public function testBOMIsAddedOnDemand()
{
$value = ['foo' => 'hello', 'bar' => 'hey ho'];

$this->assertEquals("\xEF\xBB\xBF".<<<'CSV'
foo,bar
hello,"hey ho"

CSV
, $this->encoder->encode($value, 'csv', [CsvEncoder::OUTPUT_UTF8_BOM_KEY => true]));
}

public function testBOMCanNotBeAddedToNonUtf8Csv()
{
$value = [mb_convert_encoding('ÄÖÜ', 'ISO-8859-1', 'UTF-8')];

$this->expectException(UnexpectedValueException::class);
$this->expectExceptionMessage('You are trying to add an UTF-8 BOM to a non UTF-8 text.');
$this->encoder->encode($value, 'csv', [CsvEncoder::OUTPUT_UTF8_BOM_KEY => true]);
}

public function testBOMIsStripped()
{
$csv = "\xEF\xBB\xBF".<<<'CSV'
foo,bar
hello,"hey ho"

CSV;
$this->assertEquals(
['foo' => 'hello', 'bar' => 'hey ho'],
$this->encoder->decode($csv, 'csv', [CsvEncoder::AS_COLLECTION_KEY => false])
);
}
}

0 comments on commit 93602d0

Please sign in to comment.