Extracted all functions to the module level; no more classes

suminb · Sep 22, 2014 · 18edce3 · 18edce3
1 parent 1ea7fb9
commit 18edce3
Show file tree

Hide file tree

Showing 7 changed files with 188 additions and 127 deletions.
diff --git a/README.rst b/README.rst
@@ -11,7 +11,7 @@ Installation
 
 .. code-block:: console
 
-   sudo pip install hanja
+   pip install hanja
 
 
 Usage
@@ -20,14 +20,14 @@ Usage
 한글 초성, 중성, 종성 분리
 ``````````````````````````
 
->>> Hangul.separate(u'가')
+>>> hangul.separate(u'가')
 (0, 0, 0)
->>> Hangul.separate(u'까')
+>>> hangul.separate(u'까')
 (1, 0, 0)
 
 튜플(tuple)의 마지막 원소가 0이면 종성이 없는 글자라고 판단할 수 있다.
 
->>> Hangul.separate(u'한')
+>>> hangul.separate(u'한')
 (18, 0, 4)
 
 'ㅎ'은 19번째 자음, 'ㅏ'는 첫번째 모음, 'ㄴ'은 다섯번째 자음이라는 것을 알 수 있다.
@@ -36,18 +36,18 @@ Usage
 초성, 중성, 종성을 조합하여 한 글자를 만듦
 ``````````````````````````````````````````
 
->>> Hangul.synthesize(0, 0, 0)
+>>> hangul.build(0, 0, 0)
 u'\uac00'
->>> print Hangul.synthesize(0, 0, 0)
+>>> print Hangul.build(0, 0, 0)
 가
 
 
 주어진 글자가 한글인지의 여부를 판별
 ````````````````````````````````````
 
->>> Hangul.is_hangul(u'가')
+>>> hangul.is_hangul(u'가')
 True
->>> Hangul.is_hangul(u'a')
+>>> hangul.is_hangul(u'a')
 False
 
 
@@ -56,30 +56,30 @@ False
 
 리스트가 아닌 제네레이터(generator)를 반환한다.
 
->>> '|'.join(Hanja.split_hanja(u'大韓民國은 民主共和國이다.'))
+>>> '|'.join(hanja.split_hanja(u'大韓民國은 民主共和國이다.'))
 大韓民國|은 |民主共和國|이다.
 
->>> [x for x in Hanja.split_hanja(u'大韓民國은 民主共和國이다.')]
+>>> [x for x in hanja.split_hanja(u'大韓民國은 民主共和國이다.')]
 [u'\u5927\u97d3\u6c11\u570b', u'\uc740 ', u'\u6c11\u4e3b\u5171\u548c\u570b', u'\uc774\ub2e4.']
 
 주어진 글자가 한자인지의 여부를 판별
 ````````````````````````````````````
 
->>> Hanja.is_hanja(u'韓')
+>>> hanja.is_hanja(u'韓')
 True
 
->>> Hanja.is_hanja(u'한')
+>>> hanja.is_hanja(u'한')
 False
 
 문장 변환
 `````````
 
 치환 모드 변환:
 
->>> Hanja.translate(u'大韓民國은 民主共和國이다.', 'substitution')
+>>> hanja.translate(u'大韓民國은 民主共和國이다.', 'substitution')
 대한민국은 민주공화국이다.
 
 혼용 모드 변환:
 
->>> Hanja.translate(u'大韓民國은 民主共和國이다.', 'combination')
+>>> hanja.translate(u'大韓民國은 民主共和國이다.', 'combination')
 <span class="hanja">大韓民國</span><span class="hangul">(대한민국)</span>은 <span class="hanja">民主共和國</span><span class="hangul">(민주공화국)</span>이다.
diff --git a/hanja/__init__.py b/hanja/__init__.py
@@ -2,106 +2,20 @@
 
 __author__ = 'Sumin Byeon'
 __email__ = 'suminb@gmail.com'
-__version__ = '0.9.0'
-
-from pairs import table as hanja_table
-
-class Hangul:
-    @staticmethod
-    def separate(ch):
-        """한글 자모 분리. 주어진 한글 한 글자의 초성, 중성 초성을 반환함."""
-        uindex = ord(ch) - 0xac00
-        jongseong = uindex % 28
-        joongseong = ((uindex - jongseong) / 28) % 21
-        choseong = ((uindex - jongseong) / 28) / 21
-
-        return (choseong, joongseong, jongseong)
-
-    @staticmethod
-    def synthesize(choseong, joongseong, jongseong):
-        """초성, 중성, 종성을 조합하여 완성형 한 글자를 만듦. 'choseong', 'joongseong', 'jongseong' are offsets. For example, 'ㄱ' is 0, 'ㄲ' is 1, 'ㄴ' is 2, and so on and so fourth."""
-        return unichr(((((choseong) * 21) + joongseong) * 28) + jongseong + 0xac00)
-
-    @staticmethod
-    def dooeum(previous, current):
-        """두음법칙을 적용하기 위한 함수."""
-        p, c = Hangul.separate(previous), Hangul.separate(current)
-        offset = 0
-
-        # 한자음 '녀, 뇨, 뉴, 니', '랴, 려, 례, 료, 류, 리'가 단어 첫머리에 올 때 '여, 요, 유, 이', '야, 여, 예, 요, 유, 이'로 발음한다.
-        if current in (u'녀', u'뇨', u'뉴', u'니'):
-            offset = 9
-        elif current in (u'랴', u'려', u'례', u'료', u'류', u'리'):
-            offset = 6
-        # 한자음 '라, 래, 로, 뢰, 루, 르'가 단어 첫머리에 올 때 '나, 내, 노, 뇌, 누, 느'로 발음한다.
-        elif current in (u'라', u'래', u'로', u'뢰', u'루', u'르'):
-            offset = -3
-        # 모음이나 ㄴ 받침 뒤에 이어지는 '렬, 률'은 '열, 율'로 발음한다.
-        elif current in (u'렬', u'률') and p[2] in (0, 2):
-            offset = 6
-
-        return Hangul.synthesize(c[0]+offset, c[1], c[2])
-
-    @staticmethod
-    def is_hangul(ch):
-        return ord(ch) >= 0xac00 and ord(ch) <= 0xd7a3
-
-class Hanja:
-    """두음법칙에 관련된 내용은 http://ko.wikipedia.org/wiki/%EB%91%90%EC%9D%8C_%EB%B2%95%EC%B9%99 를 참고."""
-
-    @staticmethod
-    def translate_syllable(previous, current):
-        if current in hanja_table:
-            if previous in hanja_table: 
-                return hanja_table[current]
-            else:
-                return Hangul.dooeum(previous, hanja_table[current])
-
-        return current
-
-    @staticmethod
-    def split_hanja(text):
-        """주어진 문장을 한자로 된 구역과 그 이외의 문자로 된 구역으로 분리"""
-
-        # TODO: Can we make this a bit prettier?
-        if len(text) == 0:
-            yield text
-        else:
-            ch = text[0]
-            bucket = [ch]
-            prev_state = Hanja.is_hanja(ch)
-
-            for ch in text[1:]:
-                state = Hanja.is_hanja(ch)
-
-                if prev_state != state:
-                    yield ''.join(bucket)
-                    bucket = [ch]
-                else:
-                    bucket.append(ch)
-
-                prev_state = state
-
-            yield ''.join(bucket)
-
-
-    @staticmethod
-    def translate(text, mode):
-        return ''.join(map(lambda w: Hanja.translate_word(w, mode), Hanja.split_hanja(text)))
-
-    @staticmethod
-    def translate_word(word, mode, format='<span class="hanja">%s</span><span class="hangul">(%s)</span>'):
-        """
-        ``mode``: combination | substitution
-        """
-        tw = ''.join(map(Hanja.translate_syllable, u' '+word[:-1], word))
-
-        if mode == 'combination' and Hanja.is_hanja(word[0]) == 1:
-            return format % (word, tw)
-        else:
-            return tw
-
-    @staticmethod
-    def is_hanja(ch):
-        """Determines if a given character ``ch`` is a Chinese character."""
-        return ch in hanja_table
+__version__ = '0.10.0'
+
+import warnings
+
+# Copied from https://wiki.python.org/moin/PythonDecoratorLibrary
+def deprecated(func):
+    '''This is a decorator which can be used to mark functions
+    as deprecated. It will result in a warning being emitted
+    when the function is used.'''
+    def new_func(*args, **kwargs):
+        warnings.warn("Call to deprecated function {}.".format(func.__name__),
+                      category=DeprecationWarning)
+        return func(*args, **kwargs)
+    new_func.__name__ = func.__name__
+    new_func.__doc__ = func.__doc__
+    new_func.__dict__.update(func.__dict__)
+    return new_func
diff --git a/hanja/hangul.py b/hanja/hangul.py
@@ -0,0 +1,59 @@
+# -*- coding: utf8 -*-
+
+from __init__ import deprecated
+
+
+def separate(ch):
+    """한글 자모 분리. 주어진 한글 한 글자의 초성, 중성 초성을 반환함."""
+    uindex = ord(ch) - 0xac00
+    jongseong = uindex % 28
+    joongseong = ((uindex - jongseong) / 28) % 21
+    choseong = ((uindex - jongseong) / 28) / 21
+
+    return (choseong, joongseong, jongseong)
+
+
+@deprecated
+def synthesize(choseong, joongseong, jongseong):
+    return build(choseong, joongseong, jongseong)
+
+
+def build(choseong, joongseong, jongseong):
+    """초성, 중성, 종성을 조합하여 완성형 한 글자를 만듦. 'choseong',
+    'joongseong', 'jongseong' are offsets. For example, 'ㄱ' is 0, 'ㄲ' is 1,
+    'ㄴ' is 2, and so on and so fourth."""
+    return unichr(((((choseong) * 21) + joongseong) * 28) + jongseong + 0xac00)
+
+
+def dooeum(previous, current):
+    """두음법칙을 적용하기 위한 함수."""
+    p, c = Hangul.separate(previous), Hangul.separate(current)
+    offset = 0
+
+    # 한자음 '녀, 뇨, 뉴, 니', '랴, 려, 례, 료, 류, 리'가 단어 첫머리에 올 때
+    # '여, 요, 유, 이', '야, 여, 예, 요, 유, 이'로 발음한다.
+    if current in (u'녀', u'뇨', u'뉴', u'니'):
+        offset = 9
+    elif current in (u'랴', u'려', u'례', u'료', u'류', u'리'):
+        offset = 6
+    # 한자음 '라, 래, 로, 뢰, 루, 르'가 단어 첫머리에 올 때 '나, 내, 노, 뇌,
+    # 누, 느'로 발음한다.
+    elif current in (u'라', u'래', u'로', u'뢰', u'루', u'르'):
+        offset = -3
+    # 모음이나 ㄴ 받침 뒤에 이어지는 '렬, 률'은 '열, 율'로 발음한다.
+    elif current in (u'렬', u'률') and p[2] in (0, 2):
+        offset = 6
+
+    return build(c[0]+offset, c[1], c[2])
+
+
+def is_hangul(ch):
+    if ch is None:
+        return False
+    else:
+        return ord(ch) >= 0xac00 and ord(ch) <= 0xd7a3
+
+
+def contains_hangul(text):
+    # NOTE: Probably not an ideal solution in terms of performance
+    return reduce(lambda x, y: x or y, map(lambda c: is_hangul(c), text))
diff --git a/hanja/hanja.py b/hanja/hanja.py
@@ -0,0 +1,64 @@
+# -*- coding: utf8 -*-
+"""두음법칙에 관련된 내용은
+http://ko.wikipedia.org/wiki/%EB%91%90%EC%9D%8C_%EB%B2%95%EC%B9%99 를 참고."""
+
+from pairs import table as hanja_table
+from hangul import dooeum
+
+
+def translate_syllable(previous, current):
+    if current in hanja_table:
+        if previous in hanja_table:
+            return hanja_table[current]
+        else:
+            return dooeum(previous, hanja_table[current])
+
+    return current
+
+
+def split_hanja(text):
+    """주어진 문장을 한자로 된 구역과 그 이외의 문자로 된 구역으로 분리"""
+
+    # TODO: Can we make this a bit prettier?
+    if len(text) == 0:
+        yield text
+    else:
+        ch = text[0]
+        bucket = [ch]
+        prev_state = Hanja.is_hanja(ch)
+
+        for ch in text[1:]:
+            state = Hanja.is_hanja(ch)
+
+            if prev_state != state:
+                yield ''.join(bucket)
+                bucket = [ch]
+            else:
+                bucket.append(ch)
+
+            prev_state = state
+
+        yield ''.join(bucket)
+
+
+def translate(text, mode):
+    return ''.join(map(lambda w: translate_word(w, mode),
+        split_hanja(text)))
+
+
+def translate_word(word, mode,
+    format='<span class="hanja">%s</span><span class="hangul">(%s)</span>'):
+    """
+    :param mode: combination | substitution
+    """
+    tw = ''.join(map(translate_syllable, u' '+word[:-1], word))
+
+    if mode == 'combination' and is_hanja(word[0]) == 1:
+        return format % (word, tw)
+    else:
+        return tw
+
+
+def is_hanja(ch):
+    """Determines if a given character ``ch`` is a Chinese character."""
+    return ch in hanja_table
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,3 @@
+pytest
+pytest-cov
+coveralls
diff --git a/setup.py b/setup.py
@@ -5,15 +5,8 @@
 
 
 def readme():
-    try:
-        f = open('README.rst')
-        content = f.read()
-        f.close()
-        return content
-    except IOError:
-        pass
-    except OSError:
-        pass
+    with open('README.rst') as f:
+        return f.read()
 
 
 setup(name='hanja',
@@ -25,4 +18,4 @@ def readme():
       author_email=hanja.__email__,
       url='http://github.com/suminb/hanja',
       packages=[],
-     )
+)
diff --git a/tests/test_basic.py b/tests/test_basic.py
@@ -0,0 +1,28 @@
+# -*- coding: utf8 -*-
+import pytest
+from hanja import hangul, hanja
+
+
+def test_separation():
+    assert hangul.separate(u'가') == (0, 0, 0)
+    assert hangul.separate(u'까') == (1, 0, 0)
+    assert hangul.separate(u'갸') == (0, 2, 0)
+    assert hangul.separate(u'각') == (0, 0, 1)
+
+
+def test_build():
+    assert hangul.build(0, 0, 0) == u'가'
+
+
+def test_is_hangul():
+    assert hangul.is_hangul(u'한') == True
+    assert hangul.is_hangul('A') == False
+    assert hangul.is_hangul('1') == False
+    assert hangul.is_hangul(None) == False
+
+
+def test_contains_hangul():
+    assert hangul.contains_hangul(u'한국어') == True
+    assert hangul.contains_hangul(u'한ABC국어') == True
+    assert hangul.contains_hangul(u"Yo, what's up bro?") == False
+    assert hangul.contains_hangul(u'1234567890') == False