Skip to content

Commit 41b67f4

Browse files
committed
[jvm] fix decoding of bufs for bufs that do not match encoding bit width
When we want to decode a buf8 to utf16, we have to take two elems, assemble what Java thinks is a codepoint and append that to our StringBuilder. For buf32->utf16 we do the opposite and for buf16->utf16 we get away with appending the elems to the StringBuilder. Note: Since StringBuilder does not distinguish between utf16 and utf32, we treat them equally until we know better.
1 parent bdb3259 commit 41b67f4

File tree

1 file changed

+33
-12
lines changed
  • src/vm/jvm/runtime/org/perl6/nqp/runtime

1 file changed

+33
-12
lines changed

src/vm/jvm/runtime/org/perl6/nqp/runtime/Ops.java

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,9 @@
7979
import org.perl6.nqp.sixmodel.reprs.VMArray;
8080
import org.perl6.nqp.sixmodel.reprs.VMArrayInstance;
8181
import org.perl6.nqp.sixmodel.reprs.VMArrayInstance_i16;
82+
import org.perl6.nqp.sixmodel.reprs.VMArrayInstance_u16;
8283
import org.perl6.nqp.sixmodel.reprs.VMArrayInstance_i32;
84+
import org.perl6.nqp.sixmodel.reprs.VMArrayInstance_u32;
8385
import org.perl6.nqp.sixmodel.reprs.VMArrayInstance_i8;
8486
import org.perl6.nqp.sixmodel.reprs.VMArrayInstance_u8;
8587
import org.perl6.nqp.sixmodel.reprs.VMExceptionInstance;
@@ -3356,21 +3358,40 @@ else if (encoding.equals("ascii")) {
33563358
else if (encoding.equals("iso-8859-1")) {
33573359
return decode8(buf, "ISO-8859-1", tc);
33583360
}
3359-
else if (encoding.equals("utf16")) {
3361+
else if (encoding.equals("utf16") || encoding.equals("utf32")) {
33603362
int n = (int)buf.elems(tc);
33613363
StringBuilder sb = new StringBuilder(n);
3362-
for (int i = 0; i < n; i++) {
3363-
buf.at_pos_native(tc, i);
3364-
sb.append((char)tc.native_i);
3364+
if (buf instanceof VMArrayInstance_u8 || buf instanceof VMArrayInstance_i8) {
3365+
if (encoding.equals("utf16") && n % 2 == 1) {
3366+
throw ExceptionHandling.dieInternal(tc, "Malformed UTF-16; odd number of bytes");
3367+
}
3368+
if (encoding.equals("utf32") && n % 4 > 0) {
3369+
throw ExceptionHandling.dieInternal(tc, "Malformed UTF-32; number of bytes must be factor of four");
3370+
}
3371+
for (int i = 0; i < n;) {
3372+
buf.at_pos_native(tc, i++);
3373+
int a = (int)tc.native_i;
3374+
buf.at_pos_native(tc, i++);
3375+
int b = (int)tc.native_i;
3376+
sb.appendCodePoint(a + (b << 8));
3377+
}
33653378
}
3366-
return sb.toString();
3367-
}
3368-
else if (encoding.equals("utf32")) {
3369-
int n = (int)buf.elems(tc);
3370-
StringBuilder sb = new StringBuilder(n);
3371-
for (int i = 0; i < n; i++) {
3372-
buf.at_pos_native(tc, i);
3373-
sb.appendCodePoint((int)tc.native_i);
3379+
else if (buf instanceof VMArrayInstance_i16 || buf instanceof VMArrayInstance_u16) {
3380+
for (int i = 0; i < n; i++) {
3381+
buf.at_pos_native(tc, i);
3382+
sb.appendCodePoint((int)tc.native_i);
3383+
}
3384+
}
3385+
else if (buf instanceof VMArrayInstance_i32 || buf instanceof VMArrayInstance_u32) {
3386+
for (int i = 0; i < n; i++) {
3387+
buf.at_pos_native(tc, i);
3388+
int a = (int)tc.native_i;
3389+
sb.appendCodePoint(a & 0xFFFF);
3390+
sb.appendCodePoint(a >> 16);
3391+
}
3392+
}
3393+
else {
3394+
throw ExceptionHandling.dieInternal(tc, "Unknown buf type: " + buf.getClass() + "/" + Ops.typeName(buf, tc));
33743395
}
33753396
return sb.toString();
33763397
}

0 commit comments

Comments
 (0)