10
10
import org .perl6 .nqp .runtime .ExceptionHandling ;
11
11
import org .perl6 .nqp .runtime .ThreadContext ;
12
12
import org .perl6 .nqp .sixmodel .SixModelObject ;
13
+ import org .perl6 .nqp .sixmodel .StorageSpec ;
13
14
14
15
public class DecoderInstance extends SixModelObject {
15
16
private CharsetDecoder decoder ;
16
17
private List <ByteBuffer > toDecode ;
17
18
private List <CharBuffer > decoded ;
19
+ private List <String > lineSeps ;
18
20
19
21
public void configure (ThreadContext tc , String encoding , SixModelObject config ) {
20
- if (decoder == null )
22
+ if (decoder == null ) {
21
23
decoder = Charset .forName (encoding ).newDecoder ();
22
- else
24
+ lineSeps = new ArrayList <String >();
25
+ lineSeps .add ("\n " );
26
+ lineSeps .add ("\r \n " );
27
+ }
28
+ else {
23
29
throw ExceptionHandling .dieInternal (tc , "Decoder already configured" );
30
+ }
31
+ }
32
+
33
+ public void setLineSeps (ThreadContext tc , SixModelObject seps ) {
34
+ final int prim = seps .st .REPR .get_value_storage_spec (tc , seps .st ).boxed_primitive ;
35
+ if (prim != StorageSpec .BP_STR )
36
+ ExceptionHandling .dieInternal (tc ,
37
+ "Line separators must be provided as an array of native strings" );
38
+ lineSeps .clear ();
39
+ long numSeps = seps .elems (tc );
40
+ for (long i = 0 ; i < numSeps ; i ++) {
41
+ seps .at_pos_native (tc , i );
42
+ lineSeps .add (tc .native_s );
43
+ }
24
44
}
25
45
26
46
public void addBytes (ThreadContext tc , ByteBuffer bytes ) {
@@ -37,7 +57,7 @@ public String takeChars(ThreadContext tc, long chars) {
37
57
return "" ;
38
58
39
59
CharBuffer target = CharBuffer .allocate ((int )chars + 1 );
40
- eatDecodedChars (target );
60
+ eatAllDecodedChars (target );
41
61
if (target .position () != chars )
42
62
eatUndecodedBytes (target , false );
43
63
@@ -68,7 +88,7 @@ public String takeAvailableChars(ThreadContext tc) {
68
88
69
89
int maxChars = availableDecodedChars () + availableUndecodedBytes ();
70
90
CharBuffer target = CharBuffer .allocate (maxChars );
71
- eatDecodedChars (target );
91
+ eatAllDecodedChars (target );
72
92
eatUndecodedBytes (target , true );
73
93
74
94
String normalized = Normalizer .normalize (
@@ -86,7 +106,7 @@ public String takeAllChars(ThreadContext tc) {
86
106
ensureConfigured (tc );
87
107
int maxChars = availableDecodedChars () + availableUndecodedBytes ();
88
108
CharBuffer target = CharBuffer .allocate (maxChars );
89
- eatDecodedChars (target );
109
+ eatAllDecodedChars (target );
90
110
if (toDecode != null ) {
91
111
if (toDecode .size () == 0 )
92
112
toDecode .add (ByteBuffer .allocate (0 ));
@@ -97,6 +117,72 @@ public String takeAllChars(ThreadContext tc) {
97
117
return Normalizer .normalize (decodedBuffer (target ), Normalizer .Form .NFC );
98
118
}
99
119
120
+ public String takeLine (ThreadContext tc , boolean chomp , boolean eof ) {
121
+ ensureConfigured (tc );
122
+ while (true ) {
123
+ /* See if we can find the separator in any of the decoded chars. */
124
+ int charsToTake = 0 ;
125
+ for (int i = 0 ; i < (decoded == null ? 0 : decoded .size ()); i ++) {
126
+ CharBuffer search = decoded .get (i );
127
+ for (int j = 0 ; j < search .remaining (); j ++) {
128
+ char c = search .charAt (j );
129
+ for (int k = 0 ; k < lineSeps .size (); k ++) {
130
+ String sep = lineSeps .get (k );
131
+ if (sep .charAt (0 ) == c ) {
132
+ if (sep .length () == 1 || sepMatchAt (i , j , sep )) {
133
+ return takeCharsSkipChars (
134
+ chomp ? charsToTake : charsToTake + sep .length (),
135
+ chomp ? sep .length () : 0 );
136
+ }
137
+ }
138
+ }
139
+ charsToTake ++;
140
+ }
141
+ }
142
+
143
+ /* If there are no more buffers to decode then we're done. */
144
+ if (toDecode == null || toDecode .size () == 0 )
145
+ break ;
146
+
147
+ /* Otherwise decode one of them. */
148
+ ByteBuffer decodee = toDecode .get (0 );
149
+ CharBuffer target = CharBuffer .allocate (decodee .limit ());
150
+ decoder .decode (decodee , target , eof && toDecode .size () == 1 );
151
+ target .rewind ();
152
+ if (decoded == null )
153
+ decoded = new ArrayList <CharBuffer >();
154
+ decoded .add (target );
155
+ toDecode .remove (0 );
156
+ }
157
+
158
+ return eof ? takeAllChars (tc ) : null ;
159
+ }
160
+
161
+ private boolean sepMatchAt (int decStart , int charStart , String sep ) {
162
+ int sepIndex = 0 ;
163
+ boolean firstBuffer = true ;
164
+ for (int i = decStart ; i < decoded .size (); i ++) {
165
+ CharBuffer search = decoded .get (i );
166
+ for (int j = firstBuffer ? charStart : 0 ; j < search .remaining (); j ++) {
167
+ if (search .charAt (j ) != sep .charAt (sepIndex ++))
168
+ return false ;
169
+ if (sepIndex == sep .length ())
170
+ return true ;
171
+ }
172
+ firstBuffer = false ;
173
+ }
174
+ return false ;
175
+ }
176
+
177
+ private String takeCharsSkipChars (int take , int skip ) {
178
+ CharBuffer target = CharBuffer .allocate (take );
179
+ eatDecodedChars (target , take );
180
+ if (skip > 0 )
181
+ eatDecodedChars (CharBuffer .allocate (skip ), skip );
182
+ target .rewind ();
183
+ return Normalizer .normalize (target , Normalizer .Form .NFC );
184
+ }
185
+
100
186
private int availableDecodedChars () {
101
187
int available = 0 ;
102
188
if (decoded != null )
@@ -113,14 +199,32 @@ private int availableUndecodedBytes() {
113
199
return available ;
114
200
}
115
201
116
- private void eatDecodedChars (CharBuffer target ) {
202
+ private void eatAllDecodedChars (CharBuffer target ) {
117
203
if (decoded != null ) {
118
- for (int i = 0 ; i < decoded .size (); i ++)
204
+ for (int i = 0 ; i < decoded .size (); i ++) {
119
205
target .append (decoded .get (i ));
206
+ }
120
207
decoded .clear ();
121
208
}
122
209
}
123
210
211
+ private void eatDecodedChars (CharBuffer target , int n ) {
212
+ int remaining = n ;
213
+ while (remaining > 0 && decoded .size () > 0 ) {
214
+ CharBuffer source = decoded .get (0 );
215
+ if (source .remaining () <= remaining ) {
216
+ target .append (source );
217
+ remaining -= source .remaining ();
218
+ decoded .remove (0 );
219
+ }
220
+ else {
221
+ target .append (source .subSequence (0 , remaining ));
222
+ decoded .set (0 , source .subSequence (remaining , source .remaining ()));
223
+ remaining = 0 ;
224
+ }
225
+ }
226
+ }
227
+
124
228
private void eatUndecodedBytes (CharBuffer target , boolean eof ) {
125
229
if (toDecode != null ) {
126
230
while (toDecode .size () > 0 ) {
@@ -156,6 +260,6 @@ public long isEmpty(ThreadContext tc) {
156
260
157
261
private void ensureConfigured (ThreadContext tc ) {
158
262
if (decoder == null )
159
- throw ExceptionHandling .dieInternal (tc , "Docder not yet configured" );
263
+ throw ExceptionHandling .dieInternal (tc , "Decoder not yet configured" );
160
264
}
161
265
}
0 commit comments