/
is_unicode.js
35 lines (31 loc) · 1.21 KB
/
is_unicode.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
module.exports = function is_unicode(vr) {
// discuss at: https://locutus.io/php/is_unicode/
// original by: Brett Zamir (https://brett-zamir.me)
// note 1: Almost all strings in JavaScript should be Unicode
// example 1: is_unicode('We the peoples of the United Nations...!')
// returns 1: true
if (typeof vr !== 'string') {
return false
}
// If surrogates occur outside of high-low pairs, then this is not Unicode
let arr = []
const highSurrogate = '[\uD800-\uDBFF]'
const lowSurrogate = '[\uDC00-\uDFFF]'
const highSurrogateBeforeAny = new RegExp(highSurrogate + '([\\s\\S])', 'g')
const lowSurrogateAfterAny = new RegExp('([\\s\\S])' + lowSurrogate, 'g')
const singleLowSurrogate = new RegExp('^' + lowSurrogate + '$')
const singleHighSurrogate = new RegExp('^' + highSurrogate + '$')
while ((arr = highSurrogateBeforeAny.exec(vr)) !== null) {
if (!arr[1] || !arr[1].match(singleLowSurrogate)) {
// If high not followed by low surrogate
return false
}
}
while ((arr = lowSurrogateAfterAny.exec(vr)) !== null) {
if (!arr[1] || !arr[1].match(singleHighSurrogate)) {
// If low not preceded by high surrogate
return false
}
}
return true
}