Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Invalid code generated with uglify-js 3.4.9 #3260

Closed
alippai opened this issue Sep 17, 2018 · 4 comments · Fixed by #3329
Closed

Invalid code generated with uglify-js 3.4.9 #3260

alippai opened this issue Sep 17, 2018 · 4 comments · Fixed by #3329

Comments

@alippai
Copy link

alippai commented Sep 17, 2018

This:

/**
 * Encode javascript string as utf8 byte array
 * @param {string} str
 * @returns {Uint8Array}
 */
function encode(str) {
  if (typeof str !== 'string') throw new Error('Only strings are supported');
  if (str === '') return new Uint8Array([]);

  // the resulting byte count is at least the length of the string
  const length = str.length;
  let i;
  let cp;
  let cursor = 0;
  // First, we count the needed number of bytes, by iterating through once
  for (i = 0; i < length; i += cp > 0xffff ? 2 : 1) {
    cp = codePointAt(str, i);
    // Expecting most common codepoints below 0x80, so checking for that first
    if (cp < 0x80) {
      cursor += 1;
    } else if (cp < 0x800) {
      cursor += 2;
    } else if (cp < 0x10000) {
      cursor += 3;
    } else if (cp < 0x200000) {
      cursor += 4;
    } else {
      encodeError(cp);
    }
  }
  // Allocate the buffer of suitable size
  const bytes = new Uint8Array(cursor);
  // Fill the right sized buffer with utf8 data
  for (i = 0, cursor = 0; i < length; i += cp > 0xffff ? 2 : 1) {
    cp = codePointAt(str, i);
    if (cp < 0x80) {
      bytes[cursor++] = cp;
    } else if (cp < 0x800) {
      bytes[cursor++] = (cp >>> 6) | 0xc0;
      bytes[cursor++] = (cp & 0x3f) | 0x80;
    } else if (cp < 0x10000) {
      bytes[cursor++] = (cp >>> 12) | 0xe0;
      bytes[cursor++] = ((cp >>> 6) & 0x3f) | 0x80;
      bytes[cursor++] = (cp & 0x3f) | 0x80;
    } else {
      bytes[cursor++] = (cp >>> 18) | 0xf0;
      bytes[cursor++] = ((cp >>> 12) & 0x3f) | 0x80;
      bytes[cursor++] = ((cp >>> 6) & 0x3f) | 0x80;
      bytes[cursor++] = (cp & 0x3f) | 0x80;
    }
  }
  return bytes;
}

Is compiled to (v3.4.9):

function e(e) {
    if ('string' != typeof e) throw new Error('Only strings are supported');
    if ('' === e) return new Uint8Array([]);
    var t,
      r,
      n = e.length,
      i = 0;
    for (t = 0; t < n; t += R(r))
      (r = I(e, t)), r < 128 ? (i += 1) : r < 2048 ? (i += 2) : r < 65536 ? (i += 3) : r < 2097152 ? (i += 4) : C(r);
    var a = new Uint8Array(i);
    for (t = 0, i = 0; t < n; t += 65535 < r ? 2 : 1)
      (r = I(e, t)),
        (a[i++] =
          r < 128
            ? r
            : ((a[i++] =
                r < 2048
                  ? (r >>> 6) | 192
                  : ((a[i++] = r < 65536 ? (r >>> 12) | 224 : ((a[i++] = (r >>> 18) | 240), ((r >>> 12) & 63) | 128)),
                    ((r >>> 6) & 63) | 128)),
              (63 & r) | 128));
    return a;
  }

Instead of this (v3.4.8):

function e(e) {
    if ('string' != typeof e) throw new Error('Only strings are supported');
    if ('' === e) return new Uint8Array([]);
    var t,
      r,
      n = e.length,
      i = 0;
    for (t = 0; t < n; t += R(r))
      (r = I(e, t)), r < 128 ? (i += 1) : r < 2048 ? (i += 2) : r < 65536 ? (i += 3) : r < 2097152 ? (i += 4) : C(r);
    var a = new Uint8Array(i);
    for (t = 0, i = 0; t < n; t += 65535 < r ? 2 : 1)
      (r = I(e, t)),
        r < 128
          ? (a[i++] = r)
          : (r < 2048
              ? (a[i++] = (r >>> 6) | 192)
              : (r < 65536
                  ? (a[i++] = (r >>> 12) | 224)
                  : ((a[i++] = (r >>> 18) | 240), (a[i++] = ((r >>> 12) & 63) | 128)),
                (a[i++] = ((r >>> 6) & 63) | 128)),
            (a[i++] = (63 & r) | 128));
    return a;
  }
@alippai
Copy link
Author

alippai commented Sep 17, 2018

Which encodes sequence c2 a0 into a0 c2 (flips the order somewhat)

@alippai
Copy link
Author

alippai commented Sep 17, 2018

I can try to narrow down the test case later next week, but until that maybe this bugreport helps you to find the bug in the latest release.

@kzc
Copy link
Contributor

kzc commented Sep 17, 2018

Duplicate of #3245.

Curious that it's a different utf8 encoder.

@alippai
Copy link
Author

alippai commented Sep 17, 2018

It's our internal lib, definitely not protobufjs, but it seems to be similar :)) Nice coincidence (maybe they origin from the same C++/wiki/other pseudo code)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants