Skip to content

Commit c03bf37

Browse files
committed
[js] Turn some uses of the graphemeBreaker modules into use of our homebrew regexp
1 parent 72ece8b commit c03bf37

File tree

9 files changed

+1519
-7
lines changed

9 files changed

+1519
-7
lines changed

src/vm/js/nqp-runtime/.bignum.js.swo

24 KB
Binary file not shown.

src/vm/js/nqp-runtime/.core.js.swn

16 KB
Binary file not shown.

src/vm/js/nqp-runtime/.core.js.swo

52 KB
Binary file not shown.

src/vm/js/nqp-runtime/.ctx.js.swo

24 KB
Binary file not shown.
12 KB
Binary file not shown.
20 KB
Binary file not shown.

src/vm/js/nqp-runtime/core.js

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ const stripMarks = require('./strip-marks.js');
4343

4444
const codecs = require('./codecs.js');
4545

46+
const graphemeRegexp = require('./graphemes').regexp;
47+
4648
const foldCase = require('fold-case');
4749

4850
const xregexp = require('xregexp');
@@ -1722,31 +1724,45 @@ op.flip = function(str) {
17221724
};
17231725

17241726
op.charsnfg = function(str) {
1725-
return graphemeBreaker.countBreaks(str);
1727+
let count = 0;
1728+
graphemeRegexp.lastIndex = 0;
1729+
while (graphemeRegexp.test(str)) {
1730+
count++;
1731+
}
1732+
return count;
17261733
};
17271734

17281735
op.substr3nfg = function(str, start, length) {
17291736
let startGraphemes = start;
1730-
let startChars = 0;
1737+
1738+
graphemeRegexp.lastIndex = 0;
17311739
while (startGraphemes--) {
1732-
startChars = graphemeBreaker.nextBreak(str, startChars);
1740+
if (!graphemeRegexp.test(str)) return '';
17331741
}
17341742

1735-
let lengthGraphemes = length;
1743+
let startChars = graphemeRegexp.lastIndex;
1744+
17361745
let substringEnd = startChars;
1746+
let lengthGraphemes = length;
17371747
while (lengthGraphemes--) {
1738-
substringEnd = graphemeBreaker.nextBreak(str, substringEnd);
1748+
if (graphemeRegexp.test(str)) {
1749+
substringEnd = graphemeRegexp.lastIndex;
1750+
} else {
1751+
break;
1752+
}
17391753
}
17401754

17411755
return str.substring(startChars, substringEnd);
17421756
};
17431757

17441758
op.substr2nfg = function(str, start) {
17451759
let startGraphemes = start;
1746-
let startChars = 0;
1760+
1761+
graphemeRegexp.lastIndex = 0;
17471762
while (startGraphemes--) {
1748-
startChars = graphemeBreaker.nextBreak(str, startChars);
1763+
if (!graphemeRegexp.test(str)) return '';
17491764
}
1765+
let startChars = graphemeRegexp.lastIndex;
17501766

17511767
return str.substr(startChars);
17521768
};

src/vm/js/nqp-runtime/graphemes.js

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
'use strict';
2+
const data = require('nqp-unicode-data');
3+
4+
function build(source) {
5+
return source.replace(/ /g, '').replace(/<([^>]+)>/g, function(match, part) {
6+
const content = parts[part] ? build(parts[part]) : data.regex(part);
7+
if (content === undefined) throw "Can't do anything with: " + part;
8+
return '(?:' + content + ')';
9+
});
10+
}
11+
12+
const parts = {};
13+
14+
parts.CRLF = '<CR><LF>';
15+
parts.RISequence = '<Regional_Indicator><Regional_Indicator>';
16+
parts.HangulSyllable = '<L>*<V>+<T>*|<L>*<LV><V>*<T>*|<L>*<LVT><T>*|<L>+|<T>+';
17+
parts.Grapheme_Extend = '<Extend> | (?:<ZWJ><Glue_After_Zwj>) | (?:<ZWJ><E_Base_GAZ><Extend>*<E_Modifier>?) | <ZWJ>'
18+
parts.Modified = '<Prepend>*(?:<RISequence>|<HangulSyllable>|(?:(?:<E_Base>|<E_Base_GAZ>)<Extend>*<E_Modifier>)|(?:(?!<Control>).))(<Grapheme_Extend>|<SpacingMark>)*';
19+
20+
parts.Degenerate = '<ZWJ>(?:<Glue_After_Zwj>|<E_Base_GAZ><Extend>*<E_Modifier>?)';
21+
22+
const graphemePattern = build('<CRLF>|<Degenerate>|<Modified>|[^]');
23+
24+
exports.regexp = new RegExp(graphemePattern, 'yu');

0 commit comments

Comments
 (0)