Add: lang_KO support

Added Korean language support and tests
savoirfairelinux · Nov 2, 2018 · 9605751 · 9605751
1 parent 411a92a
commit 9605751
Show file tree

Hide file tree

Showing 4 changed files with 254 additions and 0 deletions.
diff --git a/README.rst b/README.rst
@@ -92,6 +92,7 @@ Besides the numerical argument, there are two main optional arguments.
 * ``id`` (Indonesian)
 * ``it`` (Italian)
 * ``ja`` (Japanese)
+* ``ko`` (Korean)
 * ``lt`` (Lithuanian)
 * ``lv`` (Latvian)
 * ``no`` (Norwegian)

diff --git a/num2words/__init__.py b/num2words/__init__.py
@@ -46,6 +46,7 @@
 from . import lang_UK
 from . import lang_SL
 from . import lang_TH
+from . import lang_KO
 
 CONVERTER_CLASSES = {
     'ar': lang_AR.Num2Word_AR(),
@@ -63,6 +64,7 @@
     'es_VE': lang_ES_VE.Num2Word_ES_VE(),
     'id': lang_ID.Num2Word_ID(),
     'ja': lang_JA.Num2Word_JA(),
+    'ko': lang_KO.Num2Word_KO(),
     'lt': lang_LT.Num2Word_LT(),
     'lv': lang_LV.Num2Word_LV(),
     'pl': lang_PL.Num2Word_PL(),

diff --git a/num2words/lang_KO.py b/num2words/lang_KO.py
@@ -0,0 +1,159 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) 2003, Taro Ogawa.  All Rights Reserved.
+# Copyright (c) 2013, Savoir-faire Linux inc.  All Rights Reserved.
+
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA 02110-1301 USA
+
+from __future__ import division, print_function, unicode_literals
+
+from .base import Num2Word_Base
+from .currency import parse_currency_parts
+
+
+class Num2Word_KO(Num2Word_Base):
+    CURRENCY_FORMS = {
+        'KRW': ('원', None),
+        'USD': ('달러', '센트'),
+        'JPY': ('엔', None)
+    }
+
+    def set_high_numwords(self, high):
+        max = 4 * len(high)
+        for word, n in zip(high, range(max, 0, -4)):
+            self.cards[10 ** n] = word
+
+    def setup(self):
+        super(Num2Word_KO, self).setup()
+
+        self.negword = "마이너스 "
+        self.pointword = "점"
+
+        self.high_numwords = [
+            '무량대수',
+            '불가사의',
+            '나유타',
+            '아승기',
+            '항하사',
+            '극',
+            '재',
+            '정',
+            '간',
+            '구',
+            '양',
+            '자',
+            '해',
+            '경',
+            '조',
+            '억',
+            '만']
+        self.mid_numwords = [(1000, "천"), (100, "백")]
+        self.low_numwords = ["십", "구", "팔", "칠", "육", "오", "사", "삼", "이",
+                             "일", "영"]
+        self.ords = {"일": "한",
+                     "이": "두",
+                     "삼": "세",
+                     "사": "네",
+                     "오": "다섯",
+                     "육": "여섯",
+                     "칠": "일곱",
+                     "팔": "여덟",
+                     "구": "아홉",
+                     "십": "열",
+                     "이십": "스물",
+                     "삼십": "서른",
+                     "사십": "마흔",
+                     "오십": "쉰",
+                     "육십": "예순",
+                     "칠십": "일흔",
+                     "팔십": "여든",
+                     "구십": "아흔"}
+
+    # def set_mid_numwords(self, mid):
+    #     super(Num2Word_KO, self).set_mid_numwords(mid)
+    #     for i in range(2, 10):
+    #         self.cards[i*100] = self.low_numwords[i] + self.cards[100]
+    #
+    # def set_low_numwords(self, numwords):
+    #     super(Num2Word_KO, self).set_low_numwords(numwords)
+    #     for i in range(2, 10):
+    #         self.cards[10+i] = self.cards[10] + self.low_numwords[i]
+    #         self.cards[i*10] = self.low_numwords[i] + self.cards[10]
+
+    def merge(self, lpair, rpair):
+        ltext, lnum = lpair
+        rtext, rnum = rpair
+        if lnum == 1 and rnum <= 10000:
+            return rpair
+        elif 10000 > lnum > rnum:
+            return ("%s%s" % (ltext, rtext), lnum + rnum)
+        elif lnum >= 10000 and lnum > rnum:
+            return ("%s %s" % (ltext, rtext), lnum + rnum)
+        else:
+            return ("%s%s" % (ltext, rtext), lnum * rnum)
+
+    def to_ordinal(self, value):
+        self.verify_ordinal(value)
+        if(value == 1):
+            return "첫 번째"
+        outwords = self.to_cardinal(value).split(" ")
+        lastwords = outwords[-1].split("백")
+        if "십" in lastwords[-1]:
+            ten_one = lastwords[-1].split("십")
+            ten_one[0] = self.ords[ten_one[0] + "십"]
+            try:
+                ten_one[1] = self.ords[ten_one[1]]
+                ten_one[0] = ten_one[0].replace("스무", "스물")
+            except KeyError:
+                pass
+            lastwords[-1] = ''.join(ten_one)
+        else:
+            lastwords[-1] = self.ords[lastwords[-1]]
+        outwords[-1] = "백 ".join(lastwords)
+        return " ".join(outwords) + " 번째"
+
+    def to_ordinal_num(self, value):
+        self.verify_ordinal(value)
+        return "%s 번째" % (value)
+
+    def to_year(self, val, suffix=None, longval=True):
+        if val < 0:
+            val = abs(val)
+            suffix = '기원전' if not suffix else suffix
+        valtext = self.to_cardinal(val)
+        return ("%s년" % valtext if not suffix
+                else "%s %s년" % (suffix, valtext))
+
+    def to_currency(self, val, currency="KRW", cents=False, seperator="",
+                    adjective=False):
+        left, right, is_negative = parse_currency_parts(
+            val, is_int_with_cents=cents)
+
+        try:
+            cr1, cr2 = self.CURRENCY_FORMS[currency]
+            if (cents or right) and not cr2:
+                raise ValueError('Decimals not supported for "%s"' % currency)
+        except KeyError:
+            raise NotImplementedError(
+                'Currency code "%s" not implemented for "%s"' %
+                (currency, self.__class__.__name__))
+
+        minus_str = self.negword if is_negative else ""
+        return '%s%s%s%s%s' % (
+            minus_str,
+            ''.join(self.to_cardinal(left).split()),
+            cr1,
+            ' ' + self.to_cardinal(right)
+            if cr2 else '',
+            cr2 if cr2 else '',
+        )
diff --git a/tests/test_ko.py b/tests/test_ko.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) 2013, Savoir-faire Linux inc.  All Rights Reserved.
+
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA 02110-1301 USA
+
+from __future__ import division, print_function, unicode_literals
+
+from unittest import TestCase
+
+from num2words import num2words
+
+
+def n2k(*args, **kwargs):
+    return num2words(*args, lang='ko', **kwargs)
+
+
+class Num2WordsKOTest(TestCase):
+    def test_low(self):
+        cases = [(0, "영"), (1, "일"), (2, "이"), (3, "삼"), (4, "사"), (5, "오"),
+                 (6, "육"), (7, "칠"), (8, "팔"), (9, "구"), (10, "십"),
+                 (11, "십일"), (12, "십이"), (13, "십삼"), (14, "십사"),
+                 (15, "십오"), (16, "십육"), (17, "십칠"),
+                 (18, "십팔"), (19, "십구"), (20, "이십"), (25, "이십오"),
+                 (31, "삼십일"), (42, "사십이"), (54, "오십사"), (63, "육십삼"),
+                 (76, "칠십육"), (89, "팔십구"), (98, "구십팔")]
+        for num, out in cases:
+            self.assertEqual(n2k(num), out)
+
+    def test_mid(self):
+        cases = [(100, "백"), (121, "백이십일"), (160, "백육십"), (256, "이백오십육"),
+                 (285, "이백팔십오"), (486, "사백팔십육"), (627, "육백이십칠"),
+                 (808, "팔백팔"), (999, "구백구십구"), (1004, "천사"),
+                 (2018, "이천십팔"), (7063, "칠천육십삼")]
+        for num, out in cases:
+            self.assertEqual(n2k(num), out)
+
+    def test_high(self):
+        cases = [(10000, "만"), (11020, "만 천이십"), (25891, "이만 오천팔백구십일"),
+                 (64237, "육만 사천이백삼십칠"), (241572, "이십사만 천오백칠십이"),
+                 (100000000, "일억"), (5000500000000, "오조 오억")]
+        for num, out in cases:
+            self.assertEqual(n2k(num), out)
+
+    def test_negative(self):
+        cases = [(-11, "마이너스 십일"), (-15, "마이너스 십오"),
+                 (-18, "마이너스 십팔"), (-241572, "마이너스 이십사만 천오백칠십이")]
+        for num, out in cases:
+            self.assertEqual(n2k(num), out)
+
+    def test_year(self):
+        cases = [(2000, "이천년"), (2002, "이천이년"), (2018, "이천십팔년"),
+                 (1954, "천구백오십사년"), (1910, "천구백십년"), (-1000, "기원전 천년")]
+        for num, out in cases:
+            self.assertEqual(n2k(num, to="year"), out)
+
+    def test_currency(self):
+        cases_krw = [(8350, "팔천삼백오십원"), (14980, "만사천구백팔십원"),
+                     (250004000, "이억오천만사천원")]
+        cases_usd = [(4, "사달러 영센트"), (19.55, "십구달러 오십오센트")]
+        cases_jpy = [(15, "십오엔"), (50, "오십엔")]
+        for num, out in cases_krw:
+            self.assertEqual(n2k(num, to="currency"), out)
+        for num, out in cases_usd:
+            self.assertEqual(n2k(num, to="currency", currency="USD"), out)
+        for num, out in cases_jpy:
+            self.assertEqual(n2k(num, to="currency", currency="JPY"), out)
+        with self.assertRaises(ValueError):
+            n2k(190.55, to="currency")
+        with self.assertRaises(NotImplementedError):
+            n2k(4, to="currency", currency="EUR")
+
+    def test_ordinal(self):
+        cases = [(1, "첫 번째"), (101, "백 한 번째"), (2, "두 번째"), (5, "다섯 번째"),
+                 (10, "열 번째"), (25, "스물다섯 번째"), (137, "백 서른일곱 번째")]
+        for num, out in cases:
+            self.assertEqual(n2k(num, to="ordinal"), out)
+
+    def test_ordinal_num(self):
+        cases = [(1, "1 번째"), (101, "101 번째"), (25, "25 번째")]
+        for num, out in cases:
+            self.assertEqual(n2k(num, to="ordinal_num"), out)