Skip to content

Commit

Permalink
Improve Polyfill parser
Browse files Browse the repository at this point in the history
  • Loading branch information
nyamsprod committed Sep 27, 2018
1 parent c23027f commit 27a0dd8
Showing 1 changed file with 44 additions and 21 deletions.
65 changes: 44 additions & 21 deletions src/Polyfill/EmptyEscapeParser.php
Expand Up @@ -36,6 +36,21 @@
* with the SplFileObject::READ_CSV and SplFileObject::SKIP_EMPTY flags on
* and the empty string as the escape parameter.
*
* <code>
* $file = new SplFileObject('/path/to/file.csv', 'r');
* $file->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
* $file->setCsvControl($delimiter, $enclosure, ''); //this does not currently in any PHP stable release
* </code>
*
* instead you can do this
*
* <code>
* $file = new SplFileObject('/path/to/file.csv', 'r');
* $file->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
* $file->setCsvControl($delimiter, $enclosure, $escape);
* EmptyEscapeParser::parse($file); //parsing will be done while ignoring the escape character value.
* </code>
*
* @see https://php.net/manual/en/function.fgetcsv.php
* @see https://php.net/manual/en/function.fgets.php
* @see https://tools.ietf.org/html/rfc4180
Expand Down Expand Up @@ -78,12 +93,11 @@ final class EmptyEscapeParser
/**
* Converts the document into a CSV record iterator.
*
* The returned record array is similar to the returned value of fgetcsv
*
* - If the line is empty the record is skipped
* - Otherwise the array contains strings.
* Each record array contains strings elements.
*
* @param SplFileObject|Stream $document
*
* @return Generator|array[]
*/
public static function parse($document): Generator
{
Expand All @@ -93,27 +107,15 @@ public static function parse($document): Generator
self::$document->setFlags(0);
self::$document->rewind();
while (self::$document->valid()) {
$record = [];
self::$line = self::$document->fgets();
do {
$method = 'extractFieldContent';
$buffer = ltrim(self::$line, self::$trim_mask);
if (($buffer[0] ?? '') === self::$enclosure) {
$method = 'extractEnclosedFieldContent';
self::$line = $buffer;
}

$record[] = self::$method();
} while (false !== self::$line);

$record = self::extractRecord();
if ([null] !== $record) {
yield $record;
}
}
}

/**
* Filter the submitted document.
* Filters the submitted document.
*
* @param SplFileObject|Stream $document
*
Expand All @@ -133,7 +135,28 @@ private static function filterDocument($document)
}

/**
* Extract field without enclosure as per RFC4180.
* Extracts a record form the CSV document.
*/
private static function extractRecord(): array
{
$record = [];
self::$line = self::$document->fgets();
do {
$method = 'extractFieldContent';
$buffer = ltrim(self::$line, self::$trim_mask);
if (($buffer[0] ?? '') === self::$enclosure) {
$method = 'extractEnclosedFieldContent';
self::$line = $buffer;
}

$record[] = self::$method();
} while (false !== self::$line);

return $record;
}

/**
* Extracts the content from a field without enclosure.
*
* - Leading and trailing whitespaces must be removed.
* - trailing line-breaks must be removed.
Expand All @@ -157,13 +180,13 @@ private static function extractFieldContent()
}

/**
* Extract field with enclosure as per RFC4180.
* Extracts the content from a field with enclosure.
*
* - Field content can spread on multiple document lines.
* - Content inside enclosure must be preserved.
* - Double enclosure sequence must be replaced by single enclosure character.
* - Trailing line break must be removed if they are not part of the field content.
* - Invalid fields content are treated as per fgetcsv behavior.
* - Invalid field content are treated as per fgetcsv behavior.
*/
private static function extractEnclosedFieldContent(): string
{
Expand Down

0 comments on commit 27a0dd8

Please sign in to comment.