Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ const generateData = function(version) {
'.gitignore',
'.npmignore',
'.npmrc',
'decode-property-map.js',
'decode-ranges.js',
].forEach(function(file) {
cp.sync(
path.resolve(staticPath, file),
Expand Down
112 changes: 71 additions & 41 deletions scripts/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@ const zlib = require('zlib');
const jsesc = require('jsesc');
const mkdirp = require('mkdirp');
const regenerate = require('regenerate');
const decodeRanges = require('../static/decode-ranges.js');

const gzipInline = function(data) {
if (data instanceof Map) {
return `new Map(${ gzipInline([...data]) })`;
}
const json = jsesc(data, { 'json': true });
const gzipBuffer = zlib.gzipSync(json);
return `JSON.parse(require('zlib').gunzipSync(${ jsesc(gzipBuffer) }))`;
const str = gzipBuffer.toString('base64');
return `JSON.parse(require('zlib').gunzipSync(Buffer.from('${ str }','base64')))`;
};

const range = function(start, stop) {
Expand All @@ -37,6 +39,24 @@ const append = function(object, key, value) {
}
};

const samePropertyRuns = function(codePointProperties) {
const result = [];
const len = codePointProperties.length;
for (let last = 0, cur = 0; cur < len; ) {
const begin = cur;
const value = codePointProperties[cur];
while (++cur < len && codePointProperties[cur] === value)
;
if (value !== undefined) {
const gapLen = begin - last;
const runLen = cur - begin;
result.push(gapLen, runLen, value);
last = cur;
}
}
return result;
};

const writeFiles = function(options) {
const version = options.version;
const map = options.map;
Expand Down Expand Up @@ -98,39 +118,45 @@ const writeFiles = function(options) {
}

// Save the data to a file
fs.writeFileSync(
path.resolve(dir, 'code-points.js'),
'module.exports=' + (
codePoints.length > 999 ? gzipInline : jsesc
)(codePoints)
);
let codePointsExports = `require('./ranges').flatMap(r=>Array.from(r.keys()))`;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let codePointsExports = `require('./ranges').flatMap(r=>Array.from(r.keys()))`;
let codePointsExports = `require('./ranges.js').flatMap(r=>Array.from(r.keys()))`;

let symbolsExports = `require('./ranges.js').flatMap(r=>Array.from(r.values()))`;
if (!isCaseFolding) {
const sortedCodePoints = [...codePoints].sort((a, b) => a - b);
fs.writeFileSync(
path.resolve(dir, 'ranges.js'),
`module.exports=require('../../decode-ranges.js')('${
decodeRanges.encode(sortedCodePoints)
}')`
);
fs.writeFileSync(
path.resolve(dir, 'regex.js'),
'module.exports=/' + regenerate(codePoints).toString() + '/'
);
}

const symbols = isCaseFolding ?
(() => {
const result = new Map();
for (let [from, to] of codePoints) {
from = String.fromCodePoint(from);
if (Array.isArray(to)) {
to = String.fromCodePoint.apply(null, to);
} else {
to = String.fromCodePoint(to);
}
result.set(from, to);
if (codePoints.length < 10) {
codePointsExports = jsesc(codePoints);
symbolsExports = jsesc(codePoints.map(cp => String.fromCodePoint(cp)));
}
} else {
const symbols = new Map();
for (let [from, to] of codePoints) {
from = String.fromCodePoint(from);
if (Array.isArray(to)) {
to = String.fromCodePoint.apply(null, to);
} else {
to = String.fromCodePoint(to);
}
return result;
})() :
codePoints.map((codePoint) => String.fromCodePoint(codePoint));
symbols.set(from, to);
}
codePointsExports = jsesc(codePoints);
symbolsExports = jsesc(symbols);
}
fs.writeFileSync(
path.resolve(dir, 'code-points.js'),
`module.exports=${ codePointsExports }`
);
fs.writeFileSync(
path.resolve(dir, 'symbols.js'),
'module.exports=' + (
!isCaseFolding && symbols.length > 999 ? gzipInline : jsesc
)(symbols)
`module.exports=${ symbolsExports }`
);
});
Object.keys(auxMap).forEach(function(type) {
Expand All @@ -143,22 +169,26 @@ const writeFiles = function(options) {
}
mkdirp.sync(dir);
let output = '';
if (/^(?:Bidi_Class|Bidi_Mirroring_Glyph|bidi-brackets|Names)$/.test(type)) {
const map = new Map();
Object.keys(auxMap[type]).forEach(function(key) {
const codePoint = Number(key);
const value = auxMap[type][key];
map.set(codePoint, value);
});
if ('Bidi_Mirroring_Glyph' == type) { // `Bidi_Mirroring_Glyph/index.js`
// Note: `Bidi_Mirroring_Glyph` doesn’t have repeated strings; don’t gzip.
output = `module.exports=${ jsesc(map) }`;
} else { // `Bidi_Class/index.js` or `bidi-brackets/index.js` or `Names/index.js`
output = `module.exports=${ gzipInline(map) }`;
}
if ('Bidi_Mirroring_Glyph' == type) { // `Bidi_Mirroring_Glyph/index.js`
// Note: `Bidi_Mirroring_Glyph` doesn’t have repeated strings; don’t gzip.
const flatPairs = auxMap[type]
.map(ch => ch.codePointAt(0))
.flatMap((a, b) => a < b ? [a, b - a] : []);
output = [
`const chr=String.fromCodePoint`,
`const pair=(t,u,v)=>[t?u+v:v,chr(t?u:u+v)]`,
`module.exports=new Map(${
jsesc(flatPairs)
}.map((v,i,a)=>pair(i&1,a[i^1],v)))`
].join(';');
} else { // `categories/index.js`
const array = auxMap[type];
output = `var x=${ gzipInline(array) };module.exports=new Map(x.entries())`;
// or `Bidi_Class/index.js`
// or `bidi-brackets/index.js`
// or `Names/index.js`
const flatRuns = samePropertyRuns(auxMap[type]);
output = `module.exports=require('../decode-property-map.js')(${
gzipInline(flatRuns)
})`;
}
fs.writeFileSync(
path.resolve(dir, 'index.js'),
Expand Down
20 changes: 20 additions & 0 deletions static/decode-property-map.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* Generate [codePoint, value] pairs from RLE array of values.
*/
function * generateEntries(runs) {
const len = runs.length - 2;
for (let cp = 0, i = 0; i < len; ) {
cp += runs[i++];
const end = cp + runs[i++];
const value = runs[i++];
while (cp < end) {
yield [cp++, value];
}
}
}

function decodePropertyMap(runs) {
return new Map(generateEntries(runs));
}

module.exports = decodePropertyMap;
114 changes: 114 additions & 0 deletions static/decode-ranges.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
const base64enc =
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_';

const base64dec = Object.freeze(Object.fromEntries(
Array.from(base64enc, (c, i) => [c, i])
));

class UnicodeRange {
constructor(begin, end) {
this.begin = begin;
this.end = end;
this.length = end - begin;
}
*keys() {
const { begin, end } = this;
for (let i = begin; i < end; ++i) {
yield i;
}
}
*values() {
const { begin, end } = this;
for (let i = begin; i < end; ++i) {
yield String.fromCodePoint(i);
}
}
}

/**
* Base64 decode variable-length deltas (5/10/15/21-bit).
*/
function decodeDeltas(input) {
const output = [];
for (let i = 0; i < input.length; ) {
let x = base64dec[input[i++]];
switch (x & 56) {
case 32:
case 40:
x = (x & 15) << 6;
x |= base64dec[input[i++]];
break;
case 48:
x = (x & 7) << 12;
x |= base64dec[input[i++]] << 6;
x |= base64dec[input[i++]];
break;
case 56:
x = (x & 7) << 18;
x |= base64dec[input[i++]] << 12;
x |= base64dec[input[i++]] << 6;
x |= base64dec[input[i++]];
break;
}
output.push(x);
}
return output;
}

/**
* Base64 encode variable-length deltas (5/10/15/21-bit).
*/
function encodeDeltas(input) {
const output = [];
for (let i = 0; i < input.length; ++i) {
const x = input[i];
if ((x >> 5) === 0) {
output.push(x);
} else if ((x >> 10) === 0) {
output.push(32 + (x >> 6), x);
} else if ((x >> 15) === 0) {
output.push(48 + (x >> 12), x >> 6, x);
} else {
console.assert((x >> 21) === 0, `delta ${x} out of range`);
output.push(56 + (x >> 18), x >> 12, x >> 6, x);
}
}
return output.map(x => base64enc[x & 63]).join('');
}

/**
* RLE + base64 decode code point ranges.
*/
function decodeRanges(input) {
const deltas = decodeDeltas(input);
const ranges = [];
for (let end = -1, i = 1; i < deltas.length; i += 2) {
const begin = end + 1 + deltas[i - 1];
const length = 1 + deltas[i];
end = begin + length;
ranges.push(new UnicodeRange(begin, end));
}
return ranges;
}

/**
* RLE + base64 encode code point ranges.
*/
function encodeRanges(values) {
const deltas = [];
for (let end = -1, i = 0; i < values.length; ) {
const begin = values[i];
console.assert(begin > end, `code point ${begin} out of order`);
deltas.push(begin - end - 1);
end = begin + 1;
while (++i < values.length && values[i] === end) {
++end;
}
deltas.push(end - begin - 1);
}
return encodeDeltas(deltas);
}

module.exports = Object.defineProperty(
decodeRanges, 'encode', { value: encodeRanges }
);