6
6
use Ems \Contracts \Core \ContentIterator ;
7
7
use Ems \Contracts \Core \Configurable ;
8
8
use Ems \Contracts \Core \StringConverter ;
9
+ use Ems \Core \Exceptions \DetectionFailedException ;
9
10
use Ems \Core \StringConverter \MBStringConverter ;
11
+ use Ems \Core \StringConverter \CharsetGuard ;
10
12
use Ems \Core \ConfigurableTrait ;
11
13
use Ems \Core \LocalFilesystem ;
12
14
use Ems \Core \Helper ;
@@ -94,6 +96,11 @@ class CsvReadIterator implements ContentIterator
94
96
**/
95
97
protected $ firstLines ;
96
98
99
+ /**
100
+ * @var string
101
+ **/
102
+ protected $ firstLinesRaw ;
103
+
97
104
/**
98
105
* @var self
99
106
**/
@@ -117,8 +124,20 @@ class CsvReadIterator implements ContentIterator
117
124
protected $ shouldConvert = false ;
118
125
119
126
/**
120
- * @param string $filePath (optional)
121
- * @param Filesystem $filesystem (optional)
127
+ * @var string
128
+ **/
129
+ protected $ convertFrom = '' ;
130
+
131
+ /**
132
+ * @var CharsetGuard
133
+ **/
134
+ protected $ charsetGuard ;
135
+
136
+ /**
137
+ * @param string $filePath (optional)
138
+ * @param Filesystem $filesystem (optional)
139
+ * @param CsvDetector $detector (optional)
140
+ * @param LineReadIterator $lineReader (optional)
122
141
**/
123
142
public function __construct ($ filePath = '' , Filesystem $ filesystem = null , CsvDetector $ detector = null , LineReadIterator $ lineReader = null )
124
143
{
@@ -229,11 +248,38 @@ public function setLineReader(LineReadIterator $lineReader)
229
248
return $ this ;
230
249
}
231
250
251
+ /**
252
+ * @return StringConverter
253
+ **/
232
254
public function getStringConverter ()
233
255
{
234
256
return $ this ->stringConverter ;
235
257
}
236
258
259
+ /**
260
+ * @return CharsetGuard
261
+ **/
262
+ public function getCharsetGuard ()
263
+ {
264
+ if (!$ this ->charsetGuard ) {
265
+ $ this ->setCharsetGuard (new CharsetGuard );
266
+ }
267
+ return $ this ->charsetGuard ;
268
+ }
269
+
270
+ /**
271
+ * Set the charset guard to better handle encoding errors.
272
+ *
273
+ * @param CharsetGuard $guard
274
+ *
275
+ * @return self
276
+ **/
277
+ public function setCharsetGuard (CharsetGuard $ guard )
278
+ {
279
+ $ this ->charsetGuard = $ guard ;
280
+ return $ this ;
281
+ }
282
+
237
283
/**
238
284
* Return the csv header. A header is an array of column names ['id', 'name'...].
239
285
*
@@ -245,19 +291,42 @@ public function getHeader()
245
291
return $ this ->header ;
246
292
}
247
293
294
+ $ this ->updateConversion ();
295
+
248
296
if (!$ this ->getFilePath ()) {
249
297
return $ this ->header ;
250
298
}
251
299
252
- $ header = $ this ->detector ->header (
253
- $ this ->firstLines (),
254
- $ this ->getSeparator (),
255
- $ this ->getDelimiter ()
256
- );
300
+ $ detectorException = null ;
257
301
258
- $ this ->headerWasDetected = !$ this ->isNumericHeader ($ header );
302
+ try {
303
+ $ header = $ this ->detector ->header (
304
+ $ this ->firstLines (),
305
+ $ this ->getSeparator (),
306
+ $ this ->getDelimiter ()
307
+ );
308
+ } catch (DetectionFailedException $ detectorException ) {
309
+ $ header = [];
310
+ }
259
311
260
- $ this ->setHeader ($ this ->headerWasDetected ? $ header : []);
312
+ $ this ->headerWasDetected = $ header && !$ this ->isNumericHeader ($ header );
313
+
314
+
315
+ if ($ this ->headerWasDetected ) {
316
+ $ this ->setHeader ($ header );
317
+ return $ this ->header ;
318
+ }
319
+
320
+ // If the header could not be detected, check for encoding errors
321
+ // These will be thrown also if the header is not forced
322
+ $ this ->getCharsetGuard ()->forceCharset ($ this ->firstLinesRaw (), $ this ->convertFrom );
323
+
324
+ // If we can get here the charsetGuard did not throw an exception
325
+ if ($ detectorException ) {
326
+ throw $ detectorException ;
327
+ }
328
+
329
+ $ this ->setHeader ([]);
261
330
262
331
return $ this ->header ;
263
332
}
@@ -334,7 +403,7 @@ protected function readRow($handle, $chunkSize)
334
403
}
335
404
336
405
if (!$ this ->hasHeader ) {
337
- return $ this ->convertEncoding ($ row );
406
+ return array_map ( function ( $ value ) { return $ this ->convertEncoding ($ value ); }, $ row );
338
407
}
339
408
340
409
$ namedRow = [];
@@ -377,6 +446,7 @@ protected function isSkippableRow(array $row)
377
446
**/
378
447
protected function onFileChanged ()
379
448
{
449
+ $ this ->firstLinesRaw = null ;
380
450
$ this ->firstLines = null ;
381
451
if ($ this ->separatorWasDetected ) {
382
452
$ this ->separator = '' ;
@@ -394,8 +464,6 @@ protected function onFileChanged()
394
464
protected function onRewind ()
395
465
{
396
466
$ this ->headerRowSkipped = false ;
397
- $ this ->shouldConvert = strtolower ($ this ->getOption (self ::ENCODING )) != 'utf-8 ' ;
398
-
399
467
// Trigger detection once
400
468
$ this ->getHeader ();
401
469
}
@@ -413,6 +481,25 @@ protected function firstLines($lineCount=20)
413
481
return $ this ->firstLines ;
414
482
}
415
483
484
+ $ this ->firstLines = $ this ->convertEncoding ($ this ->firstLinesRaw ($ lineCount ));
485
+
486
+ return $ this ->firstLines ;
487
+
488
+ }
489
+
490
+ /**
491
+ * Return the first lines of the input file
492
+ *
493
+ * @param int $lineCount
494
+ *
495
+ * @return string
496
+ **/
497
+ protected function firstLinesRaw ($ lineCount =20 )
498
+ {
499
+ if ($ this ->firstLinesRaw !== null ) {
500
+ return $ this ->firstLinesRaw ;
501
+ }
502
+
416
503
$ this ->lineReader ->setFilePath ($ this ->filePath );
417
504
418
505
$ lines = [];
@@ -427,10 +514,9 @@ protected function firstLines($lineCount=20)
427
514
428
515
$ this ->lineReader ->releaseHandle ();
429
516
430
- $ this ->firstLines = $ this ->convertEncoding (implode ("\n" , $ lines ));
431
-
432
- return $ this ->firstLines ;
517
+ $ this ->firstLinesRaw = implode ("\n" , $ lines );
433
518
519
+ return $ this ->firstLinesRaw ;
434
520
}
435
521
436
522
/**
@@ -467,27 +553,25 @@ protected function newCountInstance()
467
553
/**
468
554
* Converts encoding if needed.
469
555
*
470
- * @param array| string $data
556
+ * @param string $data
471
557
*
472
- * @return array| string
558
+ * @return string
473
559
**/
474
560
protected function convertEncoding ($ data )
475
561
{
476
-
477
562
if (!$ this ->shouldConvert ) {
478
563
return $ data ;
479
564
}
565
+ return $ this ->stringConverter ->convert ("$ data " , 'utf-8 ' , $ this ->getOption (self ::ENCODING ));
566
+ }
480
567
481
- if (!is_array ($ data )) {
482
- return $ this ->stringConverter ->convert ("$ data " , 'utf-8 ' , $ this ->getOption (self ::ENCODING ));
483
- }
484
- dd ('Issoch array ' );
485
- $ converted = [];
486
-
487
- foreach ($ data as $ key =>$ value ) {
488
- $ converted [$ key ] = $ this ->convert ($ data );
489
- }
490
-
491
- return $ converted ;
568
+ /**
569
+ * Update the convertion options. This is handled like this for performance
570
+ * reasons.
571
+ **/
572
+ protected function updateConversion ()
573
+ {
574
+ $ this ->convertFrom = strtoupper ($ this ->getOption (self ::ENCODING ));
575
+ $ this ->shouldConvert = $ this ->convertFrom != 'UTF-8 ' ;
492
576
}
493
577
}
0 commit comments