@@ -166,6 +166,7 @@ def eos?
166
166
end
167
167
168
168
def keep_start
169
+ adjust_last_keep
169
170
@keeps . push ( [ @scanner . pos , nil ] )
170
171
end
171
172
@@ -196,32 +197,48 @@ def keep_back
196
197
end
197
198
198
199
def keep_drop
199
- @keeps . pop
200
+ _ , buffer = @keeps . pop
201
+ return unless buffer
202
+
203
+ last_keep = @keeps . last
204
+ return unless last_keep
205
+
206
+ if last_keep [ 1 ]
207
+ last_keep [ 1 ] << buffer
208
+ else
209
+ last_keep [ 1 ] = buffer
210
+ end
200
211
end
201
212
202
213
def rest
203
214
@scanner . rest
204
215
end
205
216
206
217
private
218
+ def adjust_last_keep
219
+ keep = @keeps . last
220
+ return if keep . nil?
221
+
222
+ keep_start = keep [ 0 ]
223
+ return if @scanner . pos == keep_start
224
+
225
+ string = @scanner . string
226
+ keep_data = string . byteslice ( keep_start , @scanner . pos - keep_start )
227
+ if keep_data
228
+ keep_buffer = keep [ 1 ]
229
+ if keep_buffer
230
+ keep_buffer << keep_data
231
+ else
232
+ keep [ 1 ] = keep_data . dup
233
+ end
234
+ end
235
+ keep [ 0 ] = 0
236
+ end
237
+
207
238
def read_chunk
208
239
return false if @last_scanner
209
240
210
- unless @keeps . empty?
211
- keep = @keeps . last
212
- keep_start = keep [ 0 ]
213
- string = @scanner . string
214
- keep_data = string . byteslice ( keep_start , @scanner . pos - keep_start )
215
- if keep_data
216
- keep_buffer = keep [ 1 ]
217
- if keep_buffer
218
- keep_buffer << keep_data
219
- else
220
- keep [ 1 ] = keep_data . dup
221
- end
222
- end
223
- keep [ 0 ] = 0
224
- end
241
+ adjust_last_keep
225
242
226
243
input = @inputs . first
227
244
case input
@@ -728,28 +745,26 @@ def may_quoted?
728
745
sample [ 0 , 128 ] . index ( @quote_character )
729
746
end
730
747
731
- SCANNER_TEST = ( ENV [ "CSV_PARSER_SCANNER_TEST" ] == "yes" )
732
- if SCANNER_TEST
733
- class UnoptimizedStringIO
734
- def initialize ( string )
735
- @io = StringIO . new ( string , "rb:#{ string . encoding } " )
736
- end
748
+ class UnoptimizedStringIO # :nodoc:
749
+ def initialize ( string )
750
+ @io = StringIO . new ( string , "rb:#{ string . encoding } " )
751
+ end
737
752
738
- def gets ( *args )
739
- @io . gets ( *args )
740
- end
753
+ def gets ( *args )
754
+ @io . gets ( *args )
755
+ end
741
756
742
- def each_line ( *args , &block )
743
- @io . each_line ( *args , &block )
744
- end
757
+ def each_line ( *args , &block )
758
+ @io . each_line ( *args , &block )
759
+ end
745
760
746
- def eof?
747
- @io . eof?
748
- end
761
+ def eof?
762
+ @io . eof?
749
763
end
764
+ end
750
765
751
- SCANNER_TEST_CHUNK_SIZE =
752
- Integer ( ( ENV [ "CSV_PARSER_SCANNER_TEST_CHUNK_SIZE" ] || "1" ) , 10 )
766
+ SCANNER_TEST = ( ENV [ "CSV_PARSER_SCANNER_TEST" ] == "yes" )
767
+ if SCANNER_TEST
753
768
def build_scanner
754
769
inputs = @samples . collect do |sample |
755
770
UnoptimizedStringIO . new ( sample )
@@ -759,9 +774,11 @@ def build_scanner
759
774
else
760
775
inputs << @input
761
776
end
777
+ chunk_size =
778
+ Integer ( ( ENV [ "CSV_PARSER_SCANNER_TEST_CHUNK_SIZE" ] || "1" ) , 10 )
762
779
InputsScanner . new ( inputs ,
763
780
@encoding ,
764
- chunk_size : SCANNER_TEST_CHUNK_SIZE )
781
+ chunk_size : chunk_size )
765
782
end
766
783
else
767
784
def build_scanner
0 commit comments