/
limax.js
61 lines (54 loc) · 2.46 KB
/
limax.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
'use strict';
const speakingurl = require('speakingurl');
const pinyin = require('pinyin');
const hepburn = require('hepburn');
function customCharsAsArray (custom) {
custom = custom || [];
return Array.isArray(custom) ? custom : Object.keys(custom);
}
module.exports = function (text, opt) {
const options = typeof opt === 'string'
? { separator: opt }
: opt || {};
// Remove apostrophes contained within a word
text = text.replace(/(\S)['\u2018\u2019\u201A\u201B\u2032\u2035\u0301](\S)/g, '$1$2'); // eslint-disable-line no-misleading-character-class
// Break out any numbers contained within a word
if (options.separateNumbers === true) {
text = text.replace(/([^\d\s])([0-9]+)([^\d\s])/g, '$1 $2 $3');
}
// Should we remove the separator before a digit where previous word does not end in a digit?
let mergeDigitSuffixes = false;
// Language-specific behaviour
const lang = options.lang || '';
if (lang.toLowerCase().startsWith('ja') || hepburn.containsKana(text)) {
// Convert from Japanese Kana using Hepburn romanisation
text = hepburn.fromKana(text);
// Remove any remaining non-Kana, e.g. Kanji
text = text.replace(/([^A-Za-z0-9\- ]+)/g, '');
} else if (lang.toLowerCase().startsWith('zh') || /[\u4e00-\u9fa5]+/.test(text)) {
// Should we use tone numbers? (default is true)
const tone = (typeof options.tone === 'boolean') ? options.tone : true;
mergeDigitSuffixes = tone;
text = pinyin(text, {
style: tone ? pinyin.STYLE_TONE2 : pinyin.STYLE_NORMAL
}).join(' ');
// Remove punctuation symbols
const customNonPunctuation = customCharsAsArray(options.custom).map(function (c) { return `\\${c}`; }).join('');
const nonPunctuationMatcher = new RegExp(`([^0-9A-Za-z ${customNonPunctuation}]+)`, 'g');
text = text.replace(nonPunctuationMatcher, '');
// Remove space around single character words, caused by non-Mandarin symbols in otherwise Mandarin text
text = text.replace(/([^1-4]) ([A-Za-z]) /g, '$1$2');
}
// Convert to slug using speakingurl
const separator = options.replacement || options.separator;
const slug = speakingurl(text, {
lang: lang || 'en',
separator: typeof separator === 'string' ? separator : '-',
maintainCase: options.maintainCase || false,
custom: options.custom || {}
});
// Remove separator before a digit where previous word does not end in a digit
return mergeDigitSuffixes
? slug.replace(/([^0-9])-([0-9])/g, '$1$2')
: slug;
};