-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
strlen.js
73 lines (67 loc) · 2.69 KB
/
strlen.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
module.exports = function strlen(string) {
// discuss at: https://locutus.io/php/strlen/
// original by: Kevin van Zonneveld (https://kvz.io)
// improved by: Sakimori
// improved by: Kevin van Zonneveld (https://kvz.io)
// input by: Kirk Strobeck
// bugfixed by: Onno Marsman (https://twitter.com/onnomarsman)
// revised by: Brett Zamir (https://brett-zamir.me)
// note 1: May look like overkill, but in order to be truly faithful to handling all Unicode
// note 1: characters and to this function in PHP which does not count the number of bytes
// note 1: but counts the number of characters, something like this is really necessary.
// example 1: strlen('Kevin van Zonneveld')
// returns 1: 19
// example 2: ini_set('unicode.semantics', 'on')
// example 2: strlen('A\ud87e\udc04Z')
// returns 2: 3
const str = string + ''
const iniVal = (typeof require !== 'undefined' ? require('../info/ini_get')('unicode.semantics') : undefined) || 'off'
if (iniVal === 'off') {
return str.length
}
let i = 0
let lgth = 0
const getWholeChar = function (str, i) {
const code = str.charCodeAt(i)
let next = ''
let prev = ''
if (code >= 0xd800 && code <= 0xdbff) {
// High surrogate (could change last hex to 0xDB7F to
// treat high private surrogates as single characters)
if (str.length <= i + 1) {
throw new Error('High surrogate without following low surrogate')
}
next = str.charCodeAt(i + 1)
if (next < 0xdc00 || next > 0xdfff) {
throw new Error('High surrogate without following low surrogate')
}
return str.charAt(i) + str.charAt(i + 1)
} else if (code >= 0xdc00 && code <= 0xdfff) {
// Low surrogate
if (i === 0) {
throw new Error('Low surrogate without preceding high surrogate')
}
prev = str.charCodeAt(i - 1)
if (prev < 0xd800 || prev > 0xdbff) {
// (could change last hex to 0xDB7F to treat high private surrogates
// as single characters)
throw new Error('Low surrogate without preceding high surrogate')
}
// We can pass over low surrogates now as the second
// component in a pair which we have already processed
return false
}
return str.charAt(i)
}
for (i = 0, lgth = 0; i < str.length; i++) {
if (getWholeChar(str, i) === false) {
continue
}
// Adapt this line at the top of any loop, passing in the whole string and
// the current iteration and returning a variable to represent the individual character;
// purpose is to treat the first part of a surrogate pair as the whole character and then
// ignore the second part
lgth++
}
return lgth
}