7
7
import java .text .Normalizer ;
8
8
import java .util .ArrayList ;
9
9
import java .util .List ;
10
+ import org .perl6 .nqp .runtime .Buffers ;
10
11
import org .perl6 .nqp .runtime .ExceptionHandling ;
11
12
import org .perl6 .nqp .runtime .ThreadContext ;
12
13
import org .perl6 .nqp .sixmodel .SixModelObject ;
14
+ import org .perl6 .nqp .sixmodel .StorageSpec ;
13
15
14
16
public class DecoderInstance extends SixModelObject {
17
+ private Charset charset ;
15
18
private CharsetDecoder decoder ;
16
19
private List <ByteBuffer > toDecode ;
17
20
private List <CharBuffer > decoded ;
21
+ private List <String > lineSeps ;
18
22
19
23
public void configure (ThreadContext tc , String encoding , SixModelObject config ) {
20
- if (decoder == null )
21
- decoder = Charset .forName (encoding ).newDecoder ();
22
- else
24
+ if (decoder == null ) {
25
+ charset = Charset .forName (encoding );
26
+ decoder = charset .newDecoder ();
27
+ decoded = new ArrayList <CharBuffer >();
28
+ lineSeps = new ArrayList <String >();
29
+ lineSeps .add ("\n " );
30
+ lineSeps .add ("\r \n " );
31
+ }
32
+ else {
23
33
throw ExceptionHandling .dieInternal (tc , "Decoder already configured" );
34
+ }
35
+ }
36
+
37
+ public void setLineSeps (ThreadContext tc , SixModelObject seps ) {
38
+ final int prim = seps .st .REPR .get_value_storage_spec (tc , seps .st ).boxed_primitive ;
39
+ if (prim != StorageSpec .BP_STR )
40
+ ExceptionHandling .dieInternal (tc ,
41
+ "Line separators must be provided as an array of native strings" );
42
+ lineSeps .clear ();
43
+ long numSeps = seps .elems (tc );
44
+ for (long i = 0 ; i < numSeps ; i ++) {
45
+ seps .at_pos_native (tc , i );
46
+ lineSeps .add (tc .native_s );
47
+ }
24
48
}
25
49
26
50
public void addBytes (ThreadContext tc , ByteBuffer bytes ) {
@@ -37,8 +61,8 @@ public String takeChars(ThreadContext tc, long chars) {
37
61
return "" ;
38
62
39
63
CharBuffer target = CharBuffer .allocate ((int )chars + 1 );
40
- eatDecodedChars (target );
41
- if (target .position () != chars )
64
+ eatDecodedChars (target , ( int )( chars + 1 ) );
65
+ if (target .position () != chars + 1 )
42
66
eatUndecodedBytes (target , false );
43
67
44
68
String normalized = Normalizer .normalize (
@@ -48,8 +72,6 @@ public String takeChars(ThreadContext tc, long chars) {
48
72
String result = normalized .substring (0 , (int )chars );
49
73
String remaining = normalized .substring ((int )chars , normalized .length ());
50
74
if (remaining .length () > 0 ) {
51
- if (decoded == null )
52
- decoded = new ArrayList <CharBuffer >();
53
75
decoded .add (CharBuffer .wrap (remaining ));
54
76
}
55
77
return result ;
@@ -68,7 +90,7 @@ public String takeAvailableChars(ThreadContext tc) {
68
90
69
91
int maxChars = availableDecodedChars () + availableUndecodedBytes ();
70
92
CharBuffer target = CharBuffer .allocate (maxChars );
71
- eatDecodedChars (target );
93
+ eatAllDecodedChars (target );
72
94
eatUndecodedBytes (target , true );
73
95
74
96
String normalized = Normalizer .normalize (
@@ -86,7 +108,7 @@ public String takeAllChars(ThreadContext tc) {
86
108
ensureConfigured (tc );
87
109
int maxChars = availableDecodedChars () + availableUndecodedBytes ();
88
110
CharBuffer target = CharBuffer .allocate (maxChars );
89
- eatDecodedChars (target );
111
+ eatAllDecodedChars (target );
90
112
if (toDecode != null ) {
91
113
if (toDecode .size () == 0 )
92
114
toDecode .add (ByteBuffer .allocate (0 ));
@@ -97,6 +119,101 @@ public String takeAllChars(ThreadContext tc) {
97
119
return Normalizer .normalize (decodedBuffer (target ), Normalizer .Form .NFC );
98
120
}
99
121
122
+ public String takeLine (ThreadContext tc , boolean chomp , boolean eof ) {
123
+ ensureConfigured (tc );
124
+ while (true ) {
125
+ /* See if we can find the separator in any of the decoded chars. */
126
+ int charsToTake = 0 ;
127
+ for (int i = 0 ; i < (decoded == null ? 0 : decoded .size ()); i ++) {
128
+ CharBuffer search = decoded .get (i );
129
+ for (int j = 0 ; j < search .remaining (); j ++) {
130
+ char c = search .charAt (j );
131
+ for (int k = 0 ; k < lineSeps .size (); k ++) {
132
+ String sep = lineSeps .get (k );
133
+ if (sep .charAt (0 ) == c ) {
134
+ if (sep .length () == 1 || sepMatchAt (i , j , sep )) {
135
+ return takeCharsSkipChars (
136
+ chomp ? charsToTake : charsToTake + sep .length (),
137
+ chomp ? sep .length () : 0 );
138
+ }
139
+ }
140
+ }
141
+ charsToTake ++;
142
+ }
143
+ }
144
+
145
+ /* If there are no more buffers to decode then we're done. */
146
+ if (toDecode == null || toDecode .size () == 0 )
147
+ break ;
148
+
149
+ /* Otherwise decode one of them. */
150
+ ByteBuffer decodee = toDecode .get (0 );
151
+ CharBuffer target = CharBuffer .allocate (decodee .limit ());
152
+ decoder .decode (decodee , target , eof && toDecode .size () == 1 );
153
+ target .rewind ();
154
+ decoded .add (target );
155
+ toDecode .remove (0 );
156
+ }
157
+
158
+ return eof ? takeAllChars (tc ) : null ;
159
+ }
160
+
161
+ public long bytesAvailable (ThreadContext tc ) {
162
+ ensureConfigured (tc );
163
+ forceDecodedBackToBytes ();
164
+ return availableUndecodedBytes ();
165
+ }
166
+
167
+ public SixModelObject takeBytes (ThreadContext tc , SixModelObject bufType , long lBytes ) {
168
+ int available = (int )bytesAvailable (tc ); // Implicitly forces decoded back to bytes
169
+ SixModelObject res = bufType .st .REPR .allocate (tc , bufType .st );
170
+ byte [] resBytes = new byte [available ];
171
+ int bytes = (int )lBytes ;
172
+ if (bytes > available )
173
+ bytes = available ;
174
+ int need = bytes ;
175
+ while (need > 0 ) {
176
+ ByteBuffer takeFrom = toDecode .get (0 );
177
+ int fromAvailable = takeFrom .remaining ();
178
+ if (need >= fromAvailable ) {
179
+ takeFrom .get (resBytes , bytes - need , fromAvailable );
180
+ need -= fromAvailable ;
181
+ toDecode .remove (0 );
182
+ }
183
+ else {
184
+ takeFrom .get (resBytes , bytes - need , need );
185
+ need = 0 ;
186
+ }
187
+ }
188
+ Buffers .stashBytes (tc , res , resBytes );
189
+ return res ;
190
+ }
191
+
192
+ private boolean sepMatchAt (int decStart , int charStart , String sep ) {
193
+ int sepIndex = 0 ;
194
+ boolean firstBuffer = true ;
195
+ for (int i = decStart ; i < decoded .size (); i ++) {
196
+ CharBuffer search = decoded .get (i );
197
+ for (int j = firstBuffer ? charStart : 0 ; j < search .remaining (); j ++) {
198
+ if (search .charAt (j ) != sep .charAt (sepIndex ++))
199
+ return false ;
200
+ if (sepIndex == sep .length ())
201
+ return true ;
202
+ }
203
+ firstBuffer = false ;
204
+ }
205
+ return false ;
206
+ }
207
+
208
+ private String takeCharsSkipChars (int take , int skip ) {
209
+ CharBuffer target = CharBuffer .allocate (take );
210
+ eatDecodedChars (target , take );
211
+ if (skip > 0 )
212
+ eatDecodedChars (CharBuffer .allocate (skip ), skip );
213
+ target .rewind ();
214
+ return Normalizer .normalize (target , Normalizer .Form .NFC );
215
+ }
216
+
100
217
private int availableDecodedChars () {
101
218
int available = 0 ;
102
219
if (decoded != null )
@@ -109,18 +226,36 @@ private int availableUndecodedBytes() {
109
226
int available = 0 ;
110
227
if (toDecode != null )
111
228
for (int i = 0 ; i < toDecode .size (); i ++)
112
- available += toDecode .get (i ).capacity ();
229
+ available += toDecode .get (i ).remaining ();
113
230
return available ;
114
231
}
115
232
116
- private void eatDecodedChars (CharBuffer target ) {
233
+ private void eatAllDecodedChars (CharBuffer target ) {
117
234
if (decoded != null ) {
118
- for (int i = 0 ; i < decoded .size (); i ++)
235
+ for (int i = 0 ; i < decoded .size (); i ++) {
119
236
target .append (decoded .get (i ));
237
+ }
120
238
decoded .clear ();
121
239
}
122
240
}
123
241
242
+ private void eatDecodedChars (CharBuffer target , int n ) {
243
+ int remaining = n ;
244
+ while (remaining > 0 && decoded .size () > 0 ) {
245
+ CharBuffer source = decoded .get (0 );
246
+ if (source .remaining () <= remaining ) {
247
+ target .append (source );
248
+ remaining -= source .remaining ();
249
+ decoded .remove (0 );
250
+ }
251
+ else {
252
+ target .append (source .subSequence (0 , remaining ));
253
+ decoded .set (0 , source .subSequence (remaining , source .remaining ()));
254
+ remaining = 0 ;
255
+ }
256
+ }
257
+ }
258
+
124
259
private void eatUndecodedBytes (CharBuffer target , boolean eof ) {
125
260
if (toDecode != null ) {
126
261
while (toDecode .size () > 0 ) {
@@ -145,6 +280,13 @@ private CharBuffer decodedBuffer(CharBuffer buf) {
145
280
return buf .subSequence (0 , pos );
146
281
}
147
282
283
+ private void forceDecodedBackToBytes () {
284
+ for (int i = decoded .size () - 1 ; i >= 0 ; i --) {
285
+ toDecode .add (0 , charset .encode (decoded .get (i )));
286
+ decoded .remove (i );
287
+ }
288
+ }
289
+
148
290
public long isEmpty (ThreadContext tc ) {
149
291
ensureConfigured (tc );
150
292
if (toDecode != null && toDecode .size () > 0 )
@@ -156,6 +298,6 @@ public long isEmpty(ThreadContext tc) {
156
298
157
299
private void ensureConfigured (ThreadContext tc ) {
158
300
if (decoder == null )
159
- throw ExceptionHandling .dieInternal (tc , "Docder not yet configured" );
301
+ throw ExceptionHandling .dieInternal (tc , "Decoder not yet configured" );
160
302
}
161
303
}
0 commit comments