Skip to content

Commit

Permalink
tone_convert 新增 to_initials 和 to_finals 函数
Browse files Browse the repository at this point in the history
用于获取拼音中的声母和韵母
  • Loading branch information
mozillazg committed Jan 23, 2022
1 parent 7e02fbe commit 0fdbdbd
Show file tree
Hide file tree
Showing 10 changed files with 140 additions and 10 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@ Changelog
---------


`0.45.0`_ (2022-01-23)
+++++++++++++++++++++++++

* **[Bugfixed]** 修复韵母相关拼音风格在 ``strict=True`` 时未按预期只返回拼音标准中定义过的韵母。
(Fixes `#266`_, Closes `#80`_)
* **[New]** ``pypinyin.contrib.tone_convert`` 模块新增 ``to_initials`` 和 ``to_finals`` 函数,
用于将拼音转换为 ``Style.INITIALS`` 和 ``Style.FINALS`` 风格的结果。


`0.44.0`_ (2021-11-14)
+++++++++++++++++++++++++

Expand Down Expand Up @@ -859,6 +868,8 @@ __ https://github.com/mozillazg/python-pinyin/issues/8
.. _#139: https://github.com/mozillazg/python-pinyin/issues/139
.. _#205: https://github.com/mozillazg/python-pinyin/issues/205
.. _#251: https://github.com/mozillazg/python-pinyin/issues/251
.. _#266: https://github.com/mozillazg/python-pinyin/issues/266
.. _#80: https://github.com/mozillazg/python-pinyin/issues/80
.. _#164: https://github.com/mozillazg/python-pinyin/pull/164
.. _#176: https://github.com/mozillazg/python-pinyin/pull/176
.. _@hanabi1224: https://github.com/hanabi1224
Expand Down Expand Up @@ -948,3 +959,4 @@ __ https://github.com/mozillazg/python-pinyin/issues/8
.. _0.42.1: https://github.com/mozillazg/python-pinyin/compare/v0.42.0...v0.42.1
.. _0.43.0: https://github.com/mozillazg/python-pinyin/compare/v0.42.1...v0.43.0
.. _0.44.0: https://github.com/mozillazg/python-pinyin/compare/v0.43.0...v0.44.0
.. _0.45.0: https://github.com/mozillazg/python-pinyin/compare/v0.44.0...v0.45.0
2 changes: 2 additions & 0 deletions docs/contrib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ contrib
.. autofunction:: pypinyin.contrib.tone_convert.to_tone
.. autofunction:: pypinyin.contrib.tone_convert.to_tone2
.. autofunction:: pypinyin.contrib.tone_convert.to_tone3
.. autofunction:: pypinyin.contrib.tone_convert.to_initials
.. autofunction:: pypinyin.contrib.tone_convert.to_finals

.. autofunction:: pypinyin.contrib.tone_convert.tone_to_normal
.. autofunction:: pypinyin.contrib.tone_convert.tone_to_tone2
Expand Down
2 changes: 2 additions & 0 deletions pypinyin/contrib/tone_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
to_tone,
to_tone2,
to_tone3,
to_initials,
to_finals,
tone_to_normal,
tone_to_tone2,
tone_to_tone3,
Expand Down
4 changes: 4 additions & 0 deletions pypinyin/contrib/tone_convert.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ def to_tone2(pinyin: Text, v_to_u: bool = ..., neutral_tone_with_5: bool = ...)

def to_tone3(pinyin: Text, v_to_u: bool = ..., neutral_tone_with_5: bool = ...) -> Text: ...

def to_initials(pinyin: Text, strict: bool = ...) -> Text: ...

def to_finals(pinyin: Text, strict: bool = ..., v_to_u: bool = ...) -> Text: ...

def tone_to_normal(tone: Text, v_to_u: bool = ...) -> Text: ...

def tone_to_tone2(tone: Text, v_to_u: bool = ..., neutral_tone_with_5: bool = ...) -> Text: ...
Expand Down
59 changes: 59 additions & 0 deletions pypinyin/style/_tone_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
from pypinyin.style._tone_rule import right_mark_index
from pypinyin.style._constants import RE_TONE3, RE_TONE2
from pypinyin.style.tone import converter
from pypinyin.style._utils import (
get_initials, replace_symbol_to_no_symbol,
get_finals
)

_re_number = re.compile(r'\d')

Expand Down Expand Up @@ -135,6 +139,61 @@ def to_tone3(pinyin, v_to_u=False, neutral_tone_with_5=False):
return _fix_v_u(pinyin, s, v_to_u)


def to_initials(pinyin, strict=True):
"""将 :py:attr:`~pypinyin.Style.TONE`、
:py:attr:`~pypinyin.Style.TONE2` 、
:py:attr:`~pypinyin.Style.TONE3` 或
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音转换为
:py:attr:`~pypinyin.Style.INITIALS` 风格的拼音
:param pinyin: :py:attr:`~pypinyin.Style.TONE`、
:py:attr:`~pypinyin.Style.TONE2` 、
:py:attr:`~pypinyin.Style.TONE3` 或
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
:param strict: 返回结果是否严格遵照《汉语拼音方案》来处理声母和韵母,
详见 :ref:`strict`
:return: :py:attr:`~pypinyin.Style.INITIALS` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import to_initials
>>> to_initials('zhōng')
'zh'
"""
return get_initials(pinyin, strict=strict)


def to_finals(pinyin, strict=True, v_to_u=False):
"""将 :py:attr:`~pypinyin.Style.TONE`、
:py:attr:`~pypinyin.Style.TONE2` 、
:py:attr:`~pypinyin.Style.TONE3` 或
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音转换为
:py:attr:`~pypinyin.Style.FINALS` 风格的拼音
:param pinyin: :py:attr:`~pypinyin.Style.TONE`、
:py:attr:`~pypinyin.Style.TONE2` 、
:py:attr:`~pypinyin.Style.TONE3` 或
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
:param strict: 返回结果是否严格遵照《汉语拼音方案》来处理声母和韵母,
详见 :ref:`strict`
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``,
当为 False 时结果中将使用 ``v`` 表示 ``ü``
:return: :py:attr:`~pypinyin.Style.FINALS` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import to_finals
>>> to_finals('zhōng')
'ong'
"""
new_pinyin = replace_symbol_to_no_symbol(pinyin).replace('v', 'ü')
finals = get_finals(new_pinyin, strict=strict)
finals = _fix_v_u(finals, finals, v_to_u)
return finals


def tone_to_normal(tone, v_to_u=False):
"""将 :py:attr:`~pypinyin.Style.TONE` 风格的拼音转换为
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
Expand Down
4 changes: 4 additions & 0 deletions pypinyin/style/_tone_convert.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ def to_tone2(pinyin: Text, v_to_u: bool = ..., neutral_tone_with_5: bool = ...)

def to_tone3(pinyin: Text, v_to_u: bool = ..., neutral_tone_with_5: bool = ...) -> Text: ...

def to_initials(pinyin: Text, strict: bool = ...) -> Text: ...

def to_finals(pinyin: Text, strict: bool = ..., v_to_u: bool = ...) -> Text: ...

def tone_to_normal(tone: Text, v_to_u: bool = ...) -> Text: ...

def tone_to_tone2(tone: Text, v_to_u: bool = ..., neutral_tone_with_5: bool = ...) -> Text: ...
Expand Down
13 changes: 4 additions & 9 deletions pypinyin/style/finals.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,16 @@
from pypinyin.constants import Style
from pypinyin.style import register
from pypinyin.style._constants import RE_NUMBER
from pypinyin.style._tone_convert import tone3_to_tone2, tone2_to_tone
from pypinyin.style._utils import (
get_finals, replace_symbol_to_number, replace_symbol_to_no_symbol
from pypinyin.style._tone_convert import (
tone3_to_tone2, tone2_to_tone, to_finals
)
from pypinyin.style._utils import replace_symbol_to_number


class FinalsConverter(object):
def to_finals(self, pinyin, **kwargs):
"""无声调韵母"""
# 替换声调字符为无声调字符
pinyin = replace_symbol_to_no_symbol(pinyin).replace('v', 'ü')

# 获取韵母部分
return get_finals(pinyin, strict=kwargs.get('strict')
).replace('ü', 'v')
return to_finals(pinyin, strict=kwargs.get('strict', True))

def to_finals_tone(self, pinyin, **kwargs):
"""声调在韵母头上"""
Expand Down
2 changes: 1 addition & 1 deletion pypinyin/style/initials.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@

@register(Style.INITIALS)
def convert(pinyin, **kwargs):
strict = kwargs.get('strict')
strict = kwargs.get('strict', True)
return get_initials(pinyin, strict)
49 changes: 49 additions & 0 deletions tests/contrib/test_tone_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
to_tone,
to_tone2,
to_tone3,
to_initials,
to_finals
)


Expand Down Expand Up @@ -231,6 +233,53 @@ def test_tone3_to_tone2_with_v_to_u(pinyin, v_to_u, result):
assert tone3_to_tone2(pinyin, v_to_u=v_to_u) == result


@mark.parametrize('pinyin,strict,result', [
['zhōng', True, 'zh'],
['zhōng', False, 'zh'],
['zho1ng', True, 'zh'],
['zho1ng', False, 'zh'],
['zhong1', True, 'zh'],
['zhong1', False, 'zh'],
['zhong', True, 'zh'],
['zhong', False, 'zh'],
['yu', True, ''],
['yu', False, 'y'],
])
def test_to_initials(pinyin, strict, result):
assert to_initials(pinyin, strict=strict) == result


@mark.parametrize('pinyin,strict,v_to_u,result', [
['zhōng', True, False, 'ong'],
['zhōng', False, False, 'ong'],
['zho1ng', True, False, 'ong'],
['zho1ng', False, False, 'ong'],
['zhong1', True, False, 'ong'],
['zhong1', False, False, 'ong'],
['zhong', True, False, 'ong'],
['zhong', False, False, 'ong'],
['nǚ', True, False, 'v'],
['nv', True, False, 'v'],
['nü', True, False, 'v'],
['nǚ', True, True, 'ü'],
['nü', True, True, 'ü'],
['nv', True, True, 'ü'],
['gui', True, False, 'uei'],
['gui', False, False, 'ui'],
])
def test_to_finals(pinyin, strict, v_to_u, result):
assert to_finals(pinyin, strict=strict, v_to_u=v_to_u) == result


# 所有拼音转换为 tone2 或 tone3 风格后,都可以再转换回原始的拼音
def test_tone_to_tone2_tone3_to_tone():
pinyin_set = set()
Expand Down
3 changes: 3 additions & 0 deletions tests/test_standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
)
from pypinyin.compat import SUPPORT_UCS4, PY2
from pypinyin.style._constants import _FINALS
from pypinyin.contrib.tone_convert import to_initials, to_finals

# test data from http://www.moe.edu.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html # noqa
# 声母表
Expand Down Expand Up @@ -46,6 +47,7 @@
def test_initials(hans, kwargs, result):
assert lazy_pinyin(hans, **kwargs) == result
assert list(chain(*pinyin(hans, **kwargs))) == result
assert to_initials(lazy_pinyin(hans)[0]) == result[0]


# 韵母表
Expand Down Expand Up @@ -95,6 +97,7 @@ def test_initials(hans, kwargs, result):
def test_finals(hans, kwargs, result):
if not kwargs.get('heteronym'):
assert lazy_pinyin(hans, **kwargs) == result
assert to_finals(lazy_pinyin(hans)[0]) == result[0]
assert pinyin(hans, **kwargs) == [result]


Expand Down

0 comments on commit 0fdbdbd

Please sign in to comment.