Permalink
Browse files

fix bs_utf{8,16}_size and bs_put_utf{8,16}

This fixes these two instructions so they
correctly handle the full range of utf8/utf16,
including various error/edge cases.
  • Loading branch information...
1 parent de9fe76 commit 6c7528fb42196320ba1eb81f0e8d97213eda2026 @krestenkrab krestenkrab committed Oct 7, 2013
@@ -269,6 +269,30 @@ protected void put_int32_big(int val) {
put_byte(b4);
}
+ protected void put_int16(int val, int flags) {
+ if ((flags & EBinMatchState.BSF_LITTLE) > 0) {
+ put_int16_little(val);
+ } else {
+ put_int16_big(val);
+ }
+ }
+
+ protected void put_int16_little(int val) {
+ byte b1, b2;
+ b1 = (byte)val; val >>= 8;
+ b2 = (byte)val;
+ put_byte(b1);
+ put_byte(b2);
+ }
+
+ protected void put_int16_big(int val) {
+ byte b1, b2;
+ b2 = (byte)val; val >>= 8;
+ b1 = (byte)val;
+ put_byte(b1);
+ put_byte(b2);
+ }
+
private void put_byte(byte val) {
if (extra_bits == 0) {
@@ -363,66 +387,56 @@ public void put_bitstring(EObject str, int size, int flags) {
public void put_utf8(EObject value, int flags) {
ESmall sm;
- if ((sm=value.testSmall()) != null && sm.value >= 0 && sm.value <= 0x10FFFF) {
+ if ((sm=value.testSmall()) != null && sm.value >= 0 && sm.value < 0x110000) {
- byte[] raw = new String(new char[]{ (char) sm.value }).getBytes(IO.UTF8);
-
- for (int i = 0; i < raw.length; i++) {
- put_byte(raw[i]);
- }
-
- return;
-
- /*
if (sm.value < 0x80) {
put_byte((byte) sm.value);
return;
}
if (sm.value < 0x0800) {
put_byte((byte) (0xc0 | ((sm.value >> 6) & 0x1f)));
- put_byte((byte) (0x80 | (sm.value & 0x3f)));
+ put_byte((byte) (0x80 | (sm.value & 0x3f)));
return;
}
if (sm.value < 0x10000) {
put_byte((byte) (0xe0 | ((sm.value >> 12) & 0x0f)));
- put_byte((byte) (0x80 | ((sm.value >> 6) & 0x3f)));
- put_byte((byte) (0x80 | (sm.value & 0x3f)));
+ put_byte((byte) (0x80 | ((sm.value >> 6) & 0x3f)));
+ put_byte((byte) (0x80 | (sm.value & 0x3f)));
return;
}
- if (sm.value < 0x20000) {
+ if (sm.value < 0x110000) {
put_byte((byte) (0xf0 | ((sm.value >> 18) & 0x7)));
put_byte((byte) (0x80 | ((sm.value >> 12) & 0x3f)));
- put_byte((byte) (0x80 | ((sm.value >> 6) & 0x3f)));
- put_byte((byte) (0x80 | (sm.value & 0x3f)));
+ put_byte((byte) (0x80 | ((sm.value >> 6) & 0x3f)));
+ put_byte((byte) (0x80 | (sm.value & 0x3f)));
return;
}
-
- if (sm.value < 0x4000000) {
- put_byte((byte) (0xf8 | ((sm.value >> 24) & 0x3)));
- put_byte((byte) (0x80 | ((sm.value >> 18) & 0x3f)));
- put_byte((byte) (0x80 | ((sm.value >> 12) & 0x3f)));
- put_byte((byte) (0x80 | ((sm.value >> 6) & 0x3f)));
- put_byte((byte) (0x80 | (sm.value & 0x3f)));
- return;
- }
- */
}
- throw new NotImplemented("val="+value);
+ throw ERT.badarg(value);
}
public void put_utf16(EObject value, int flags) {
ESmall num = value.testSmall();
- if (num == null || !Character.isDefined(num.value))
- throw ERT.badarg(value); // TODO: throw what?
- String val = new String(new char[] { (char) num.value });
- byte[] bytes = val.getBytes(IO.UTF16);
- for (int i = 0; i < bytes.length; i++) {
- put_byte(bytes[i]);
+ if (num == null
+ || num.value < 0
+ || num.value > 0x10FFFF
+ || (0xD800 <= num.value && num.value <= 0xDFFF)) {
+ throw ERT.badarg(value);
+ }
+
+ if (num.value < 0x10000) {
+ put_int16(num.value, flags);
+ } else {
+ int low = num.value - 0x10000;
+ int num1 = 0xD800 | ((low >> 10) & 0x3ff);
+ int num2 = 0xDC00 | (low & 0x3ff);
+ put_int16(num1, flags);
+ put_int16(num2, flags);
}
}
@@ -442,29 +456,24 @@ static public ESmall bs_utf8_size(EObject value) {
ESmall sm;
if ((sm=value.testSmall()) != null) {
- byte[] raw = new String(new char[]{ (char) sm.value }).getBytes(IO.UTF8);
- return ERT.box(raw.length);
-
- /*
+ if (sm.value < 0) return null;
if (sm.value < 0x80) return ERT.box(1);
- if (sm.value < 0x0800) return ERT.box(2);
+ if (sm.value < 0x800) return ERT.box(2);
if (sm.value < 0x10000) return ERT.box(3);
if (sm.value < 0x200000) return ERT.box(4);
if (sm.value < 0x4000000) return ERT.box(5);
return ERT.box(6);
- */
}
- throw new NotImplemented("val="+value);
+ return null;
}
// compute size of utf16 char
static public ESmall bs_utf16_size(EObject value) {
ESmall num = value.testSmall();
- if (num == null || !Character.isDefined(num.value))
- throw ERT.badarg(value); // TODO: throw what?
- String val = new String(new char[] { (char) num.value });
- byte[] bytes = val.getBytes(IO.UTF16);
- return ERT.box(bytes.length);
+ if (num == null || num.value < 0)
+ return null;
+ if (num.value < 0x10000) return ERT.box(2);
+ return ERT.box(4);
}
}
@@ -102,6 +102,10 @@ public static ErlangError badarg(EObject o1, EObject o2) throws ErlangError {
throw new ErlangError(am_badarg, NIL.cons(o2).cons(o1));
}
+ public static ErlangError badarg(EObject o1) throws ErlangError {
+ throw new ErlangError(am_badarg, NIL.cons(o1));
+ }
+
public static ErlangError badarith(EObject... args) {
throw new ErlangError(AM_BADARITH, args);
}
@@ -1124,26 +1124,27 @@ public void visitBS(BeamOpcode opcode, Arg arg, Arg imm, int failLabel) {
pop(arg, EOBJECT_TYPE);
return;
- case bs_utf8_size:
- push(arg, EOBJECT_TYPE);
- mv.visitMethodInsn(INVOKESTATIC,
- EBINSTRINGBUILDER_TYPE.getInternalName(),
- "bs_utf8_size", "(" + EOBJECT_DESC + ")"
- + ESMALL_TYPE.getDescriptor());
- if (failLabel != 0) mv.visitInsn(DUP);
- pop(imm, ESMALL_TYPE);
- if (failLabel != 0) mv.visitJumpInsn(IFNULL, getLabel(failLabel));
- return;
-
case bs_utf16_size:
+ case bs_utf8_size:
push(arg, EOBJECT_TYPE);
mv.visitMethodInsn(INVOKESTATIC,
EBINSTRINGBUILDER_TYPE.getInternalName(),
- "bs_utf16_size", "(" + EOBJECT_DESC + ")"
+ opcode.name(), "(" + EOBJECT_DESC + ")"
+ ESMALL_TYPE.getDescriptor());
- if (failLabel != 0) mv.visitInsn(DUP);
- pop(imm, ESMALL_TYPE);
- if (failLabel != 0) mv.visitJumpInsn(IFNULL, getLabel(failLabel));
+ if (failLabel == 0) {
+ mv.visitInsn(DUP);
+ Label okLabel = new Label();
+ mv.visitJumpInsn(IFNONNULL, okLabel);
+ push(arg, EOBJECT_TYPE);
+ mv.visitMethodInsn(INVOKESTATIC, ERT_NAME, "badarg", "(Lerjang/EObject;)Lerjang/ErlangError;");
+ mv.visitInsn(ATHROW);
+ mv.visitLabel(okLabel);
+ pop(imm, ESMALL_TYPE);
+ } else {
+ mv.visitInsn(DUP);
+ pop(imm, ESMALL_TYPE);
+ mv.visitJumpInsn(IFNULL, getLabel(failLabel));
+ }
return;
}

0 comments on commit 6c7528f

Please sign in to comment.