Skip to content

Commit

Permalink
Added support in the expression parser for mathematical alphanumeric …
Browse files Browse the repository at this point in the history
…symbols (see #265)
  • Loading branch information
josdejong committed Sep 25, 2015
1 parent 33f515e commit d9f237d
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 10 deletions.
7 changes: 7 additions & 0 deletions HISTORY.md
@@ -1,5 +1,12 @@
# History

## not yet released, version 2.4.0

- Added support in the expression parser for mathematical alphanumeric symbols
in the expression parser: unicode range \u{1D400} to \u{1D7FF} excluding
invalid code points.


## 2015-09-19, version 2.3.0

- Implemented function `distance`. Thanks @devanp92.
Expand Down
38 changes: 28 additions & 10 deletions lib/expression/parse.js
Expand Up @@ -314,8 +314,8 @@ function factory (type, config, load, typed) {
}

// check for variables, functions, named operators
if (isAlpha(c)) {
while (isAlpha(c) || isDigit(c)) {
if (currentIsAlpha()) {
while (currentIsAlpha() || isDigit(c)) {
token += c;
next();
}
Expand Down Expand Up @@ -366,22 +366,40 @@ function factory (type, config, load, typed) {
}

/**
* checks if the given char c is:
* - a latin letter (upper or lower case)
* - a latin letter with accents
* - a greek letter
* - an underscore
* @param {string} c a string with one character
* Checks whether the current character `c` is a valid alpha character:
*
* - A latin letter (upper or lower case) Ascii: a-z, A-Z
* - A underscore Ascii: _
* - A latin letter with accents Unicode: \u00C0 - \u02AF
* - A greek letter Unicode: \u0370 - \u03FF
* - A mathematical alphanumeric symbol Unicode: \u{1D400} - \u{1D7FF} excluding invalid code points
*
* @return {boolean}
* @private
*/
function isAlpha (c) {
function currentIsAlpha () {
// http://unicode-table.com/en/
// http://www.wikiwand.com/en/Mathematical_operators_and_symbols_in_Unicode
//
// Note: In ES6 will be unicode aware:
// http://stackoverflow.com/questions/280712/javascript-unicode-regexes
// https://mathiasbynens.be/notes/es6-unicode-regex
return /^[a-zA-Z_\u00C0-\u02AF\u0370-\u03FF]$/.test(c);
var cPrev = expression.charAt(index - 1);
var cNext = expression.charAt(index + 1);

var isValidLatinOrGreek = function (p) {
return /^[a-zA-Z_\u00C0-\u02AF\u0370-\u03FF]$/.test(p);
};

var isValidMathSymbol = function (high, low) {
return /^[\uD835]$/.test(high) &&
/^[\uDC00-\uDFFF]$/.test(low) &&
/^[^\uDC55\uDC9D\uDCA0\uDCA1\uDCA3\uDCA4\uDCA7\uDCA8\uDCAD\uDCBA\uDCBC\uDCC4\uDD06\uDD0B\uDD0C\uDD15\uDD1D\uDD3A\uDD3F\uDD45\uDD47-\uDD49\uDD51\uDEA6\uDEA7\uDFCC\uDFCD]$/.test(low);
};

return isValidLatinOrGreek(c)
|| isValidMathSymbol(c, cNext)
|| isValidMathSymbol(cPrev, c);
}

/**
Expand Down
10 changes: 10 additions & 0 deletions test/expression/parse.test.js
Expand Up @@ -74,6 +74,16 @@ describe('parse', function() {

math.eval('k\u00F6ln = 5', scope); // Combination of latin and unicode
assert.strictEqual(scope['k\u00F6ln'], 5);

// test unicode characters in the astral plane (surrogate pairs
math.eval('\uD835\uDD38 = 1', scope); // double struck capital A
assert.strictEqual(scope['\uD835\uDD38'], 1);

// should not allow the "holes"
assert.throws(function () {
math.eval('\uD835\uDCA3 = 1', scope);
})

});

describe('multiline', function () {
Expand Down

0 comments on commit d9f237d

Please sign in to comment.