/
romanize.js
96 lines (75 loc) · 2.81 KB
/
romanize.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
const replaceHangul = require("./hangul/hangulReplace");
const { decomposeHangul } = require("./hangul/unicode/decompose");
const jamos = require("./jamo");
const getJamoDictionary = (jamo, idx) =>
jamos[idx].find(o => o.jamo === jamo) ||
jamos[idx].find(o => o.compatJamo === jamo);
function searchJamo(node, params, prevNode) {
const { method, vowelNext, consonantNext, consonantPrev } = params || {
method: "RR"
};
if (typeof node === "string") {
return node;
}
if (!node) {
console.warn(prevNode);
throw new Error("No node found after" + JSON.stringify(prevNode));
}
// treat empty string (initial silent ieung/ㅇ as truthy)
if (node.roman || typeof node.roman === "string") {
return next(node.roman);
}
if (method && (node[method] || typeof node[method] === "string")) {
return next(node[method]);
}
if (vowelNext && (node.vowelNext || typeof node.vowelNext === "string")) {
return next(node.vowelNext);
}
if (consonantNext || consonantPrev) {
const assimilation = String.fromCodePoint(consonantNext || consonantPrev);
if (typeof node[assimilation] === "string") {
return node[assimilation];
} else if (node[assimilation]) {
return node[assimilation];
}
}
if (node.default || typeof node.default === "string") {
return next(node.default);
}
throw new Error("Unimplemented: " + JSON.stringify(node, null, 2));
function next(nextNode) {
return searchJamo(nextNode, params, node);
}
}
const syllableParser = (method = "RR") =>
function(syllable, idx, word) {
// next subsequent initial consonant (choseong)
const next = idx + 1 < word.length ? word[idx + 1][0] : undefined;
const vowelNext = next === 0x110b || next === "ᄋ";
// only exists this isn't first syllable in word
const prev = idx > 0 ? word[idx - 1] : null;
// previous adjacent trailing consonant (jongseong)
const consonantPrev = prev && prev[2] ? prev[2] : undefined;
return syllable.map((jamo, jamoIdx) => {
const dict =
getJamoDictionary(jamo, jamoIdx) ||
getJamoDictionary(String.fromCodePoint(jamo), jamoIdx);
if (!dict) {
throw new Error("missing dict " + jamo);
}
return searchJamo(dict, {
method,
vowelNext: jamoIdx === 2 ? vowelNext : undefined,
consonantPrev: jamoIdx === 0 ? consonantPrev : undefined,
consonantNext: jamoIdx === 2 ? next : undefined
});
});
};
const mapJamoToRoman = (word, method = "RR") =>
decomposeHangul(word).map(syllableParser(method));
const romanizeWord = (word, method = "RR") =>
mapJamoToRoman(word, method)
.reduce((acc, val) => acc.concat(val), [])
.join("");
const romanize = (text, options) => replaceHangul(text, romanizeWord);
module.exports = { searchJamo, syllableParser, romanizeWord, romanize };