Permalink
Browse files

added Unicode category aliases

  • Loading branch information...
1 parent a94c925 commit 9e909c64e421de43adf2571a375bfb640f01a82f @slevithan committed Mar 3, 2012
View
274 README.md
@@ -1,156 +1,154 @@
-[XRegExp](http://xregexp.com/)
-==============================
+# [XRegExp](http://xregexp.com/) <sup>v1.6.0-alpha</sup>
XRegExp provides augmented, extensible JavaScript regular expressions. You get new syntax, flags, and methods beyond what browsers support natively. XRegExp is also a regular expression utility belt with tools to make your client-side grepping simpler and more powerful, while freeing you from worrying about pesky cross-browser inconsistencies and the unreliable `lastIndex` property.
-
-A few usage examples
---------------------
-
-```html
-<script src="xregexp.js"></script>
-<script>
- var date, dateStr, match, str, pos = 0, result = [];
-
- // Using named capture and the x flag (free-spacing and comments)
- date = new XRegExp('(?<year> [0-9]{4}) -? # year \n\
- (?<month> [0-9]{2}) -? # month \n\
- (?<day> [0-9]{2}) # day ', 'x');
-
- // XRegExp.exec gives you named backreferences on the match result
- dateStr = '2012-02-22';
- match = XRegExp.exec(dateStr, date);
- match.day; // -> '22'
-
- // It also supports optional pos and sticky arguments
- str = '<1><2><3>4<5>';
- while (match = XRegExp.exec(str, XRegExp.cache('<(\\d+)>'), pos, true)) {
- result.push(match[1]);
- pos = match.index + match[0].length;
- }
- // result -> ['1', '2', '3']
-
- // XRegExp.replace allows named backreferences in replacements
- XRegExp.replace(dateStr, date, '${month}/${day}/${year}'); // -> '02/22/2012'
-
- // In fact, all XRegExps are RegExps and work perfectly with native methods
- date.test(dateStr); // -> true
-
- // The only caveat is that named captures must be referred to using numbered backreferences
- dateStr.replace(date, '$2/$3/$1'); // -> '02/22/2012'
-
- // If you want, you can extend native methods so you don't have to worry about this
- XRegExp.install('natives');
- dateStr.replace(date, '${month}/${day}/${year}'); // -> '02/22/2012'
- dateStr.replace(date, function (match) {
- return match.month + '/' + match.day + '/' +match.year;
- }); // -> '02/22/2012'
- date.exec(dateStr).day; // -> 22
-
- // Get an array of backreference-only arrays using XRegExp.forEach
- str = '<a href="http://xregexp.com/api/">XRegExp</a>\
- <a href="http://www.google.com/">Google</a>';
- XRegExp.forEach(str, new XRegExp('<a href="([^"]+)">(.*?)</a>', 'is'), function (match) {
- this.push(match.slice(1));
- }, []);
- // -> [['http://xregexp.com/api/', 'XRegExp'], ['http://www.google.com/', 'Google']]
-
- // Get an array of numbers within <b> tags using XRegExp.matchChain
- XRegExp.matchChain('1 <b>2</b> 3 <b>4 a 56</b>', [/<b>.*?<\/b>/i, /\d+/]);
- // -> ['2', '4', '56']
-
- // XRegExp.matchChain can also pass forward and return specific backreferences
- XRegExp.matchChain(str, [
- {regex: /<a href="([^"]+)">/i, backref: 1},
- {regex: new XRegExp('(?i)^https?://(?<domain>[^/?#]+)'), backref: 'domain'}
- ]);
- // -> ['xregexp.com', 'www.google.com']
-
- // XRegExp regexes get call and apply methods
- // To demonstrate, let's first create the function we'll be calling...
- function filter (array, fn) {
- var res = [];
- array.forEach(function (el) {if (fn.call(null, el)) res.push(el);});
- return res;
- }
- // Now we can filter arrays using functions and regexes
- filter(['a', 'ba', 'ab', 'b'], new XRegExp('^a'));
- // -> ['a', 'ab']
-</script>
-```
-
-These examples should give you an idea of what's possible, but they don't show all of XRegExp's tricks. You can even augment XRegExp's regular expression syntax with addons (see below) or write your own. For the full scoop, see [API](http://xregexp.com/api/), [syntax](http://xregexp.com/syntax/), [flags](http://xregexp.com/flags/), and [browser fixes](http://xregexp.com/cross_browser/).
-
-
-Unicode Base addon usage examples
----------------------------------
-
-```html
+## A few usage examples
+
+~~~ js
+var date, dateStr, match, str, pos = 0, result = [];
+
+// Using named capture and the x flag (free-spacing and comments)
+date = new XRegExp('(?<year> [0-9]{4}) -? # year \n\
+ (?<month> [0-9]{2}) -? # month \n\
+ (?<day> [0-9]{2}) # day ', 'x');
+
+// XRegExp.exec gives you named backreferences on the match result
+dateStr = '2012-02-22';
+match = XRegExp.exec(dateStr, date);
+match.day; // -> '22'
+
+// It also supports optional pos and sticky arguments
+str = '<1><2><3>4<5>';
+while (match = XRegExp.exec(str, XRegExp.cache('<(\\d+)>'), pos, true)) {
+ result.push(match[1]);
+ pos = match.index + match[0].length;
+}
+// result -> ['1', '2', '3']
+
+// XRegExp.replace allows named backreferences in replacements
+XRegExp.replace(dateStr, date, '${month}/${day}/${year}'); // -> '02/22/2012'
+
+// In fact, all XRegExps are RegExps and work perfectly with native methods
+date.test(dateStr); // -> true
+
+// The only caveat is that named captures must be referred to using numbered backreferences
+dateStr.replace(date, '$2/$3/$1'); // -> '02/22/2012'
+
+// If you want, you can extend native methods so you don't have to worry about this
+XRegExp.install('natives');
+dateStr.replace(date, '${month}/${day}/${year}'); // -> '02/22/2012'
+dateStr.replace(date, function (match) {
+ return match.month + '/' + match.day + '/' +match.year;
+}); // -> '02/22/2012'
+date.exec(dateStr).day; // -> 22
+
+// Get an array of backreference-only arrays using XRegExp.forEach
+str = '<a href="http://xregexp.com/api/">XRegExp</a>\
+ <a href="http://www.google.com/">Google</a>';
+XRegExp.forEach(str, new XRegExp('<a href="([^"]+)">(.*?)</a>', 'is'), function (match) {
+ this.push(match.slice(1));
+}, []);
+// -> [['http://xregexp.com/api/', 'XRegExp'], ['http://www.google.com/', 'Google']]
+
+// Get an array of numbers within <b> tags using XRegExp.matchChain
+XRegExp.matchChain('1 <b>2</b> 3 <b>4 a 56</b>', [/<b>.*?<\/b>/i, /\d+/]);
+// -> ['2', '4', '56']
+
+// XRegExp.matchChain can also pass forward and return specific backreferences
+XRegExp.matchChain(str, [
+ {regex: /<a href="([^"]+)">/i, backref: 1},
+ {regex: new XRegExp('(?i)^https?://(?<domain>[^/?#]+)'), backref: 'domain'}
+]);
+// -> ['xregexp.com', 'www.google.com']
+
+// XRegExp regexes get call and apply methods
+// To demonstrate, let's first create the function we'll be using...
+function filter (array, fn) {
+ var res = [];
+ array.forEach(function (el) {if (fn.call(null, el)) res.push(el);});
+ return res;
+}
+// Now we can filter arrays using functions and regexes
+filter(['a', 'ba', 'ab', 'b'], new XRegExp('^a'));
+// -> ['a', 'ab']
+~~~
+
+These examples should give you an idea of what's possible, but they don't show all of XRegExp's tricks. You can even augment XRegExp's regular expression syntax with addons (see below) or write your own. For the full scoop, see [API](http://xregexp.com/api/), [syntax](http://xregexp.com/syntax/), [flags](http://xregexp.com/flags/), [browser fixes](http://xregexp.com/cross_browser/), and [roadmap](https://github.com/slevithan/XRegExp/wiki/Roadmap).
+
+## Unicode addon usage examples
+
+First include the Unicode Base script:
+
+~~~ html
<script src="xregexp.js"></script>
<script src="addons/unicode/xregexp-unicode-base.js"></script>
-<script>
- var unicodeWord = new XRegExp('^\\p{L}+$');
- unicodeWord.test('Русский'); // -> true
- unicodeWord.test('日本語'); // -> true
- unicodeWord.test('العربية'); // -> true
-</script>
-
-<!-- \p{L} is included in the base script, but other categories, scripts,
-and blocks require addon packages -->
-<script src="addons/unicode/xregexp-unicode-scripts.js"></script>
-<script>
- new XRegExp('^\\p{Hiragana}+$').test('ひらがな'); // -> true
-</script>
-```
+~~~
+
+Then you can do this:
+
+~~~ js
+var unicodeWord = new XRegExp('^\\p{L}+$');
+unicodeWord.test('Русский'); // -> true
+unicodeWord.test('日本語'); // -> true
+unicodeWord.test('العربية'); // -> true
+~~~
+
+The base script adds `\p{L}` (or you can use its alias, `\p{Letter}`), but other Unicode categories, scripts, and blocks require addon packages. Try these after additionally including `xregexp-unicode-scripts.js`:
+
+~~~ js
+new XRegExp('^\\p{Hiragana}+$').test('ひらがな'); // -> true
+new XRegExp('^[\\p{Latin}\\p{Common}]+$').test('Ümlaut Café'); // -> true
+~~~
XRegExp uses the Unicode 6.1 character database (released 2012-01). More details [here](http://xregexp.com/plugins/#unicode).
+## Match Recursive addon usage examples
-Match Recursive addon usage examples
-------------------------------------
+First include the Match Recursive script:
-```html
+~~~ html
<script src="xregexp.js"></script>
<script src="addons/xregexp-matchrecursive.js"></script>
-<script>
- var str = '(t((e))s)t()(ing)';
- XRegExp.matchRecursive(str, '\\(', '\\)', 'g');
- // -> ['t((e))s', '', 'ing']
-
- // Extended information mode with valueNames
- str = 'Here is <div>a <div>nested</div> tag</div> example.';
- XRegExp.matchRecursive(str, '<div\s*>', '</div>', 'gi', {
- valueNames: ['between', 'left', 'match', 'right']
- });
- // -> [['between', 'Here is ', 0, 8],
- // ['left', '<div>', 8, 13],
- // ['match', 'a <div>nested</div> tag', 13, 37],
- // ['right', '</div>', 36, 42],
- // ['between', ' example.', 42, 51]]
-
- // Omitting unneeded parts with null valueNames, and using escapeChar
- str = '...{1}\\{{function(x,y){return y+x;}}';
- XRegExp.matchRecursive(str, '{', '}', 'g', {
- valueNames: ['literal', null, 'value', null],
- escapeChar: '\\'
- });
- // -> [['literal', '...', 0, 3],
- // ['value', '1', 4, 5],
- // ['literal', '\\{', 6, 8],
- // ['value', 'function(x,y){return y+x;}', 9, 35]]
-
- // Sticky mode via the y flag (works everywhere, not just where /y is natively supported)
- str = '<1><<<2>>><3>4<5>';
- XRegExp.matchRecursive(str, '<', '>', 'gy');
- // -> ['1', '<<2>>', '3']
-</script>
-```
+~~~
+
+Then get recursive:
+
+~~~ js
+var str = '(t((e))s)t()(ing)';
+XRegExp.matchRecursive(str, '\\(', '\\)', 'g');
+// -> ['t((e))s', '', 'ing']
+
+// Extended information mode with valueNames
+str = 'Here is <div>a <div>nested</div> tag</div> example.';
+XRegExp.matchRecursive(str, '<div\s*>', '</div>', 'gi', {
+ valueNames: ['between', 'left', 'match', 'right']
+});
+// -> [['between', 'Here is ', 0, 8],
+// ['left', '<div>', 8, 13],
+// ['match', 'a <div>nested</div> tag', 13, 37],
+// ['right', '</div>', 36, 42],
+// ['between', ' example.', 42, 51]]
+
+// Omitting unneeded parts with null valueNames, and using escapeChar
+str = '...{1}\\{{function(x,y){return y+x;}}';
+XRegExp.matchRecursive(str, '{', '}', 'g', {
+ valueNames: ['literal', null, 'value', null],
+ escapeChar: '\\'
+});
+// -> [['literal', '...', 0, 3],
+// ['value', '1', 4, 5],
+// ['literal', '\\{', 6, 8],
+// ['value', 'function(x,y){return y+x;}', 9, 35]]
+
+// Sticky mode via the y flag (works everywhere, not just where /y is natively supported)
+str = '<1><<<2>>><3>4<5>';
+XRegExp.matchRecursive(str, '<', '>', 'gy');
+// -> ['1', '<<2>>', '3']
+~~~
More details [here](http://xregexp.com/plugins/#matchRecursive).
-
-Changelog
----------
+## Changelog
* Historical changes: [Version history](http://xregexp.com/history/).
* Planned changes: [Roadmap](https://github.com/slevithan/XRegExp/wiki/Roadmap).
@@ -1,20 +1,13 @@
-// XRegExp addon: Unicode Base 0.6
-// (c) 2008-2012 Steven Levithan
-// MIT License
-// <http://xregexp.com>
-// Uses Unicode 6.1 <http://unicode.org/Public/6.1.0/ucd/>
-
-/*
-The Unicode Base addon adds support for the \p{L} token only (Unicode category
-Letter). Addon packages are available that add support for the remaining
-Unicode categories, as well as Unicode scripts and blocks.
-
-All Unicode tokens can be inverted by using an uppercase P; e.g., \P{L} matches
-any character not in Unicode's Letter category. Negated Unicode tokens are not
-supported within character classes.
-
-Letter case, spaces, hyphens, and underscores are ignored when comparing
-Unicode token names.
+/*!
+ * XRegExp addon: Unicode Base v1.0.0-alpha
+ * (c) 2008-2012 Steven Levithan <http://xregexp.com>
+ * Available under the MIT License
+ * Uses Unicode 6.1 <http://unicode.org/Public/6.1.0/ucd/>
+ *
+ * Adds support for the \p{L} or \p{Letter} Unicode category. Addon packages
+ * for the remaining Unicode categories, scripts, and blocks are available.
+ * All Unicode tokens can be inverted using \P{..} or \p{^..}. Token names are
+ * case insensitive, and any spaces, hyphens, and underscores are ignored.
*/
;var XRegExp;
@@ -28,16 +21,25 @@ if (!XRegExp) {
var unicode = {}; // protected storage for package tokens
- XRegExp.addUnicodePackage = function (pack) {
- var codePoint = /\w{4}/g,
- clip = /[- _]+/g,
- name, p;
+ function rename (name) {
+ return name.replace(/[- _]+/g, "").toLowerCase();
+ }
+
+ XRegExp.addUnicodePackage = function (pack, aliases) {
+ var p, name, alias;
for (p in pack) {
if (pack.hasOwnProperty(p)) {
- name = p.replace(clip, "").toLowerCase();
- // disallow overriding properties that have already been added
- if (!unicode.hasOwnProperty(name)) {
- unicode[name] = pack[p].replace(codePoint, "\\u$&");
+ name = rename(p);
+ if (!unicode.hasOwnProperty(name)) // disallow replacing added tokens
+ unicode[name] = pack[p].replace(/\w{4}/g, "\\u$&");
+ }
+ }
+ if (aliases) {
+ for (p in aliases) {
+ if (aliases.hasOwnProperty(p)) {
+ alias = rename(aliases[p]);
+ if (!unicode.hasOwnProperty(alias)) // disallow replacing added tokens
+ unicode[alias] = unicode[rename(p)];
}
}
}
@@ -49,7 +51,7 @@ if (!XRegExp) {
var negated = (match[1] === "P" || match[2]),
item = match[3].replace(/[- _]+/g, "").toLowerCase();
- // \p{}, \P{}, and \p{^} are valid, but the double negative \P{^} isn't
+ // \p{..}, \P{..}, and \p{^..} are valid, but the double negative \P{^..} isn't
if (match[1] === "P" && match[2])
throw new SyntaxError("erroneous characters: " + match[0]);
if (negated && scope === XRegExp.INSIDE_CLASS)
@@ -66,6 +68,9 @@ if (!XRegExp) {
XRegExp.addUnicodePackage({
L: "0041-005A0061-007A00AA00B500BA00C0-00D600D8-00F600F8-02C102C6-02D102E0-02E402EC02EE0370-037403760377037A-037D03860388-038A038C038E-03A103A3-03F503F7-0481048A-05270531-055605590561-058705D0-05EA05F0-05F20620-064A066E066F0671-06D306D506E506E606EE06EF06FA-06FC06FF07100712-072F074D-07A507B107CA-07EA07F407F507FA0800-0815081A082408280840-085808A008A2-08AC0904-0939093D09500958-09610971-09770979-097F0985-098C098F09900993-09A809AA-09B009B209B6-09B909BD09CE09DC09DD09DF-09E109F009F10A05-0A0A0A0F0A100A13-0A280A2A-0A300A320A330A350A360A380A390A59-0A5C0A5E0A72-0A740A85-0A8D0A8F-0A910A93-0AA80AAA-0AB00AB20AB30AB5-0AB90ABD0AD00AE00AE10B05-0B0C0B0F0B100B13-0B280B2A-0B300B320B330B35-0B390B3D0B5C0B5D0B5F-0B610B710B830B85-0B8A0B8E-0B900B92-0B950B990B9A0B9C0B9E0B9F0BA30BA40BA8-0BAA0BAE-0BB90BD00C05-0C0C0C0E-0C100C12-0C280C2A-0C330C35-0C390C3D0C580C590C600C610C85-0C8C0C8E-0C900C92-0CA80CAA-0CB30CB5-0CB90CBD0CDE0CE00CE10CF10CF20D05-0D0C0D0E-0D100D12-0D3A0D3D0D4E0D600D610D7A-0D7F0D85-0D960D9A-0DB10DB3-0DBB0DBD0DC0-0DC60E01-0E300E320E330E40-0E460E810E820E840E870E880E8A0E8D0E94-0E970E99-0E9F0EA1-0EA30EA50EA70EAA0EAB0EAD-0EB00EB20EB30EBD0EC0-0EC40EC60EDC-0EDF0F000F40-0F470F49-0F6C0F88-0F8C1000-102A103F1050-1055105A-105D106110651066106E-10701075-1081108E10A0-10C510C710CD10D0-10FA10FC-1248124A-124D1250-12561258125A-125D1260-1288128A-128D1290-12B012B2-12B512B8-12BE12C012C2-12C512C8-12D612D8-13101312-13151318-135A1380-138F13A0-13F41401-166C166F-167F1681-169A16A0-16EA1700-170C170E-17111720-17311740-17511760-176C176E-17701780-17B317D717DC1820-18771880-18A818AA18B0-18F51900-191C1950-196D1970-19741980-19AB19C1-19C71A00-1A161A20-1A541AA71B05-1B331B45-1B4B1B83-1BA01BAE1BAF1BBA-1BE51C00-1C231C4D-1C4F1C5A-1C7D1CE9-1CEC1CEE-1CF11CF51CF61D00-1DBF1E00-1F151F18-1F1D1F20-1F451F48-1F4D1F50-1F571F591F5B1F5D1F5F-1F7D1F80-1FB41FB6-1FBC1FBE1FC2-1FC41FC6-1FCC1FD0-1FD31FD6-1FDB1FE0-1FEC1FF2-1FF41FF6-1FFC2071207F2090-209C21022107210A-211321152119-211D212421262128212A-212D212F-2139213C-213F2145-2149214E218321842C00-2C2E2C30-2C5E2C60-2CE42CEB-2CEE2CF22CF32D00-2D252D272D2D2D30-2D672D6F2D80-2D962DA0-2DA62DA8-2DAE2DB0-2DB62DB8-2DBE2DC0-2DC62DC8-2DCE2DD0-2DD62DD8-2DDE2E2F300530063031-3035303B303C3041-3096309D-309F30A1-30FA30FC-30FF3105-312D3131-318E31A0-31BA31F0-31FF3400-4DB54E00-9FCCA000-A48CA4D0-A4FDA500-A60CA610-A61FA62AA62BA640-A66EA67F-A697A6A0-A6E5A717-A71FA722-A788A78B-A78EA790-A793A7A0-A7AAA7F8-A801A803-A805A807-A80AA80C-A822A840-A873A882-A8B3A8F2-A8F7A8FBA90A-A925A930-A946A960-A97CA984-A9B2A9CFAA00-AA28AA40-AA42AA44-AA4BAA60-AA76AA7AAA80-AAAFAAB1AAB5AAB6AAB9-AABDAAC0AAC2AADB-AADDAAE0-AAEAAAF2-AAF4AB01-AB06AB09-AB0EAB11-AB16AB20-AB26AB28-AB2EABC0-ABE2AC00-D7A3D7B0-D7C6D7CB-D7FBF900-FA6DFA70-FAD9FB00-FB06FB13-FB17FB1DFB1F-FB28FB2A-FB36FB38-FB3CFB3EFB40FB41FB43FB44FB46-FBB1FBD3-FD3DFD50-FD8FFD92-FDC7FDF0-FDFBFE70-FE74FE76-FEFCFF21-FF3AFF41-FF5AFF66-FFBEFFC2-FFC7FFCA-FFCFFFD2-FFD7FFDA-FFDC"
+ },
+ {
+ L: "Letter"
});
})();
Oops, something went wrong.

0 comments on commit 9e909c6

Please sign in to comment.