Skip to content

Commit

Permalink
Add basic/prelim support for RR, RR translit WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
mooniker committed Sep 30, 2019
1 parent 6202a08 commit f649e80
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 45 deletions.
23 changes: 13 additions & 10 deletions jamo.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,17 @@ const jamoMapper = jamoSet => ({ jamo, roman }, idx) => {
return Object.assign(jamoSet[idx], { roman, compatJamo, compatJamoHex });
};

// initial consonants
const choseong = [
{ jamo: "ㄱ", roman: "g" },
{ jamo: "ㄲ", roman: "gg" },
{ jamo: "ㄲ", roman: "kk" },
{ jamo: "ㄴ", roman: "n" },
{ jamo: "ㄷ", roman: "d" },
{ jamo: "ㄸ", roman: "dd" },
{ jamo: "ㄹ", roman: "r" },
{ jamo: "ㄹ", roman: { default: "r", RRT: "l" } },
{ jamo: "ㅁ", roman: "m" },
{ jamo: "ㅂ", roman: "b" },
{ jamo: "ㅃ", roman: "bb" },
{ jamo: "ㅃ", roman: "pp" },
{ jamo: "ㅅ", roman: "s" },
{ jamo: "ㅆ", roman: "ss" },
{ jamo: "ㅇ", roman: "" },
Expand All @@ -54,6 +55,7 @@ const choseong = [
{ jamo: "ㅎ", roman: "h" }
].map(jamoMapper(initialConsonants));

// medial vowels
const jungseong = [
{ jamo: "ㅏ", roman: "a" },
{ jamo: "ㅐ", roman: "ae" },
Expand All @@ -78,15 +80,16 @@ const jungseong = [
{ jamo: "ㅣ", roman: "i" }
].map(jamoMapper(medialVowels));

// final consonants
const jongseong = [
{ jamo: null, roman: "" },
{ jamo: "ㄱ", roman: "k" },
{ jamo: "ㄲ", roman: "k" },
{ jamo: "ㄱ", roman: { default: "k", vowelNext: "g", RRT: "g" } },
{ jamo: "ㄲ", roman: "kk" },
{ jamo: "ㄳ", roman: "k" },
{ jamo: "ㄴ", roman: "n" },
{ jamo: "ㄵ", roman: "n" },
{ jamo: "ㄶ", roman: "n" },
{ jamo: "ㄷ", roman: "d" },
{ jamo: "ㄷ", roman: { default: "t", vowelNext: "d", RRT: "d" } },
{ jamo: "ㄹ", roman: "l" },
{ jamo: "ㄺ", roman: "r" },
{ jamo: "ㄻ", roman: "lm" },
Expand All @@ -96,17 +99,17 @@ const jongseong = [
{ jamo: "ㄿ", roman: "lp" },
{ jamo: "ㅀ", roman: "lh" },
{ jamo: "ㅁ", roman: "m" },
{ jamo: "ㅂ", roman: "b" },
{ jamo: "ㅂ", roman: { default: "p", vowelNext: "b", RRT: "b" } },
{ jamo: "ㅄ", roman: "bs" },
{ jamo: "ㅅ", roman: "s" },
{ jamo: "ㅆ", roman: "ss" },
{ jamo: "ㅇ", roman: "ng" },
{ jamo: "ㅈ", roman: "j" },
{ jamo: "ㅊ", roman: "ch" },
{ jamo: "ㅈ", roman: { default: "t", vowelNext: "j" } },
{ jamo: "ㅊ", roman: { default: "t", vowelNext: "ch", RRT: "ch" } },
{ jamo: "ㅋ", roman: "k" },
{ jamo: "ㅌ", roman: "t" },
{ jamo: "ㅍ", roman: "p" },
{ jamo: "ㅎ", roman: "h" }
{ jamo: "ㅎ", roman: { default: "t", RRT: "h" } }
].map(jamoMapper(finalConsonants));

module.exports = [choseong, jungseong, jongseong];
78 changes: 50 additions & 28 deletions romanize.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,49 +6,71 @@ const _ = require("lodash");
const getJamoDictionary = (jamo, idx) =>
_.find(jamos[idx], { jamo }) || _.find(jamos[idx], { compatJamo: jamo });

function searchJamo(node) {
function searchJamo(node, params, prevNode) {
const { method, vowelNext } = params || {};
if (typeof node === "string") {
return node;
}

if (!node) {
throw new Error("No node found.");
console.warn(prevNode);
throw new Error("No node found:" + node);
}

if (typeof node === "string") {
return node;
// treat empty string (initial silent ieung/ㅇ as truthy)
if (node.roman || typeof node.roman === "string") {
return next(node.roman);
}

if (node.roman) {
return searchJamo(node.roman);
if (method && (node[method] || typeof node[method] === "string")) {
return next(node[method]);
}

throw new Error("unimplemented");
}
// console.log(params, vowelNext, node.vowelNext, node);
if (vowelNext && (node.vowelNext || typeof node.vowelNext === "string")) {
return next(node.vowelNext);
}

function romanize(text) {
return replaceHangul(text, romanizeWord);
if (node.default || typeof node.default === "string") {
return next(node.default);
}

console.warn(prevNode);
throw new Error("Unimplemented: " + JSON.stringify(node, null, 2));

function next(nextNode) {
return searchJamo(nextNode, params, node);
}
}

function parseSyllable(syllable, idx, syllabary) {
// next subsequent initial consonant (choseong)
const next = idx + 1 < syllabary.length ? syllabary[idx + 1][0] : null;
const syllableParser = (method = "RR") =>
function(syllable, idx, word) {
// next subsequent initial consonant (choseong)
const next = idx + 1 < word.length ? word[idx + 1][0] : undefined;
const vowelNext = next === 0x110b || next === "ᄋ";

// previous adjacent trailing consonant (jongseong)
const prev = idx > 0 ? syllabary[idx - 1][2] : null;
// previous adjacent trailing consonant (jongseong)
// const prev = idx > 0 ? word[idx - 1][2] : undefined;

return syllable.map((jamo, idx, syllable) => {
const dict =
getJamoDictionary(jamo, idx) ||
getJamoDictionary(String.fromCodePoint(jamo), idx);
if (!dict) {
throw new Error("missing dict " + jamo);
}
return syllable.map((jamo, jamoIdx) => {
const dict =
getJamoDictionary(jamo, jamoIdx) ||
getJamoDictionary(String.fromCodePoint(jamo), jamoIdx);

return searchJamo(dict);
});
}
if (!dict) {
throw new Error("missing dict " + jamo);
}

return searchJamo(dict, { method, vowelNext });
});
};

const romanizeWord = word =>
const romanizeWord = (word, method = "RR") =>
decomposeHangul(word)
.map(parseSyllable)
.map(syllableParser(method))
.reduce((acc, val) => acc.concat(val), [])
.join("");

module.exports = { romanizeWord, romanize };
const romanize = (text, options) => replaceHangul(text, romanizeWord);

module.exports = { syllableParser, romanizeWord, romanize };
88 changes: 81 additions & 7 deletions romanize.test.js
Original file line number Diff line number Diff line change
@@ -1,27 +1,101 @@
const { romanize, romanizeWord } = require("./romanize");
const { syllableParser, romanize, romanizeWord } = require("./romanize");
const translations = require("./translations");

const testWords = {
const simpleWords = {
: "ga",
: "na",
: "da",
로마자: "romaja",
표기법: "pyogibeop",
// 국어의: "gugeoui",
만남: "mannam",
동무: "dongmu"
};

const plosiveCases = {
구미: "Gumi",
영동: "Yeongdong",
백암: "Baegam",
옥천: "Okcheon",
합덕: "Hapdeok",
호법: "Hobeop",
월곶: "Wolgot", // [월곧]
벚꽃: "beotkkot", // [벋꼳]
한밭: "Hanbat" // [한받]
};

const wordsWithAdjacentConsonantAssimilation = {
백마: "Baengma", // [뱅마]
신문로: "Sinmunno", // [신문노]
종로: "Jongno", // [종노]
왕십리: "Wangsimni", // [왕심니]
별내: "Byeollae", // [별래]
신라: "Silla" // [실라]
};

const transliterationCases = {
: "jib",
: "jip",
: "bakk",
: "gabs",
붓꽃: "buskkoch",
먹는: "meogneun",
독립: "doglib",
문리: "munli",
// 물엿: "mul-yeos",
// 굳이: "gud-i",
좋다: "johda",
가곡: "gagog",
조랑말: "jolangmal"
// 없었습니다: "eobs-eoss-seubnida"
};

describe("romanizeWord function", () => {
Object.entries(testWords).forEach(([hangeul, romaja]) => {
test(`should romanize ${hangeul} to ${romaja}`, () => {
expect(romanizeWord(hangeul)).toBe(romaja);
describe("should romanize simple words", () => {
Object.entries(simpleWords).forEach(([hangulWord, expectedRomaja]) => {
test(`${hangulWord} to ${expectedRomaja}`, () => {
expect(romanizeWord(hangulWord)).toBe(expectedRomaja);
});
});
});

describe("should transcribe plosives/stops ㄱ, ㄷ, and ㅂ as 'g', 'd', and 'b' before a vowel and as 'k', 't', and 'p' when before another consonant or as the last sound of a word", () => {
Object.entries(plosiveCases).forEach(([hangulWord, expectedRomaja]) => {
test(`${hangulWord} to ${expectedRomaja}`, () => {
expect(romanizeWord(hangulWord)).toBe(expectedRomaja.toLowerCase());
});
});
});

describe("should transliterate", () => {
Object.entries(transliterationCases).forEach(
([hangulWord, expectedRomaja]) => {
test(`${hangulWord} to ${expectedRomaja}`, () => {
expect(romanizeWord(hangulWord, "RRT")).toBe(expectedRomaja);
});
}
);
});

// describe("should romanize adjacent consonant assimilation", () => {
// Object.entries(wordsWithAdjacentConsonantAssimilation).forEach(
// ([hangulWord, expectedRomaja]) => {
// test(`in ${hangulWord} to ${expectedRomaja.toLowerCase()}`, () => {
// expect(romanizeWord(hangulWord)).toBe(expectedRomaja.toLowerCase());
// });
// }
// );
// });
});

describe("romanize function", () => {
// test("should romanize Hangul string with spaces", () => {
// expect(romanize("국어의 로마자 표기법")).toBe("gugeoui romaja pyogibeop");
// });

test("should romanize 로마자 as romaja", () => {
expect(romanize("로마자 is the Korean word for Latin letters.")).toBe(
"romaja is the Korean word for Latin letters."
expect(romanize("The Korean word for Latin letters is 로마자.")).toBe(
"The Korean word for Latin letters is romaja."
);
});
});

0 comments on commit f649e80

Please sign in to comment.