Permalink
Browse files

更新文档和测试

  • Loading branch information...
mozillazg committed Apr 22, 2018
1 parent e8fec9d commit 7fa0b879df47e8a7e5af5edb5f243dd4ea645410
Showing with 68 additions and 33 deletions.
  1. +2 −2 README.rst
  2. +5 −0 docs/api.rst
  3. +8 −1 docs/faq.rst
  4. +10 −0 docs/usage.rst
  5. +3 −3 pypinyin/compat.py
  6. +1 −1 pypinyin/constants.py
  7. +14 −13 pypinyin/contrib/mmseg.py
  8. +7 −6 pypinyin/core.py
  9. +1 −1 pytest.ini
  10. +11 −4 tests/test_pinyin.py
  11. +6 −2 tests/test_style.py
@@ -88,7 +88,7 @@ FAQ
>>> pinyin('步履蹒跚')
[[''], [''], ['pán'], ['shān']]
详见 `文档 <https://pypinyin.readthedocs.io/zh_CN/master/usage.html#id5>`__ 。
详见 `文档 <https://pypinyin.readthedocs.io/zh_CN/master/usage.html#custom-dict>`__ 。
为什么没有 y, w, yu 几个声母?
++++++++++++++++++++++++++++++++++++++++++++
@@ -132,7 +132,7 @@ y,w,ü (yu) 都不是声母。
如果对拼音的准确性不是特别在意的话,可以通过设置环境变量 ``PYPINYIN_NO_PHRASES``
``PYPINYIN_NO_DICT_COPY`` 来节省内存。
详见 `文档 <https://pypinyin.readthedocs.io/zh_CN/master/faq.html#id2>`__
详见 `文档 <https://pypinyin.readthedocs.io/zh_CN/master/faq.html#no-phrases>`__
更多 FAQ 详见文档中的
@@ -12,6 +12,8 @@ API
:member-order: bysource
.. _core_api:
核心 API
-------------
@@ -26,6 +28,8 @@ API
.. autofunction:: pypinyin.slug
.. _convert_style:
风格转换
-----------
@@ -34,6 +38,7 @@ API
.. autofunction:: pypinyin.style.convert
.. _seg:
分词
-------
@@ -2,12 +2,16 @@ FAQ
-----
.. _no_phrases:
如何禁用内置的“词组拼音库”
++++++++++++++++++++++++++++++++
设置环境变量 ``PYPINYIN_NO_PHRASES=true`` 即可
.. _no_dict_copy:
如何禁用默认的“拼音库”copy 操作
+++++++++++++++++++++++++++++++++++++++++++
@@ -16,16 +20,19 @@ FAQ
副作用: 用户的自定义拼音库出现问题时, 无法回退到自带的拼音库.
.. _limit_memory:
如何减少内存占用
+++++++++++++++++++++
如果对拼音正确性不在意的话,可以按照上面所说的设置环境变量 ``PYPINYIN_NO_PHRASES``
``PYPINYIN_NO_DICT_COPY`` 详见 `#13`_
.. _initials_problem:
``INITIALS`` 声母风格下,以 ``y``, ``w``, ``yu`` 开头的汉字返回空字符串
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
比如:
@@ -2,6 +2,8 @@
======
.. _example:
示例
-------
@@ -20,6 +22,8 @@
['zhong', 'xin']
.. _handle_no_pinyin:
处理不包含拼音的字符
---------------------
@@ -49,6 +53,8 @@
.. _单元测试代码: https://github.com/mozillazg/python-pinyin/blob/3d52fe821b7f55aecf5af9bad78380762484f4d9/tests/test_pinyin.py#L161-L166
.. _custom_dict:
自定义拼音库
------------
@@ -75,6 +81,8 @@
['ha2i', 'me2i']
.. _custom_style:
自定义拼音风格
----------------
@@ -148,6 +156,8 @@
具体差异可以查看 `tests/test_standard.py <https://github.com/mozillazg/python-pinyin/blob/master/tests/test_standard.py>`_ 中的对比结果测试用例
.. _cli:
命令行工具
------------
@@ -14,17 +14,17 @@
if subversion[0] in (
'IronPython',
):
PY2 = False
PY2 = False # pragma: no cover
if not PY2:
text_type = str
bytes_type = bytes
else:
text_type = unicode # noqa
text_type = unicode # noqa
bytes_type = str
try:
callable_check = callable # noqa
callable_check = callable # noqa
except NameError:
def callable_check(obj):
return hasattr(obj, '__call__')
@@ -44,7 +44,7 @@
r'])+$'
)
else:
RE_HANS = re.compile(
RE_HANS = re.compile( # pragma: no cover
r'^(?:['
r'\u3007' #
r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF]
@@ -71,21 +71,22 @@ def __contains__(self, key):
p_set = PrefixSet()
p_set.train(PHRASES_DICT.keys())
#: 基于内置词库的最大正向匹配分词器。使用::
#: 基于内置词库的最大正向匹配分词器。使用:
#:
#: >>> from pypinyin.contrib.mmseg import seg
#: >>> text = '你好,我是中国人,我爱我的祖国'
#: >>> seg.cut(text)
#: <generator object Seg.cut at 0x10b2df2b0>
#: .. code-block:: python
#:
#: >>> list(seg.cut(text))
#: ['你好', ',', '我', '是', '中国人', ',', '我', '爱',
#: '我的', '祖', '国']
#: >>> seg.train(['祖国', '我是'])
#: >>> list(seg.cut(text))
#: ['你好', ',', '我是', '中国人', ',', '我', '爱',
#: '我的', '祖国']
#: >>>
#: >>> from pypinyin.contrib.mmseg import seg
#: >>> text = '你好,我是中国人,我爱我的祖国'
#: >>> seg.cut(text)
#: <generator object Seg.cut at 0x10b2df2b0>
#: >>> list(seg.cut(text))
#: ['你好', ',', '我', '是', '中国人', ',', '我', '爱',
#: '我的', '祖', '国']
#: >>> seg.train(['祖国', '我是'])
#: >>> list(seg.cut(text))
#: ['你好', ',', '我是', '中国人', ',', '我', '爱',
#: '我的', '祖国']
#: >>>
seg = Seg(p_set)
@@ -164,19 +164,20 @@ def phrase_pinyin(phrase, style, heteronym, errors='default', strict=True):
def _pinyin(words, style, heteronym, errors, strict=True):
"""
:param words: 经过分词处理后的字符串,只包含中文字符或只包含非中文字符,
不存在混合的情况。
"""
pys = []
# 初步过滤没有拼音的字符
if RE_HANS.match(words):
pys = phrase_pinyin(words, style=style, heteronym=heteronym,
errors=errors, strict=strict)
return pys
for word in simple_seg(words):
if not (RE_HANS.match(word)):
py = handle_nopinyin(word, errors=errors)
pys.append(py) if py else None
else:
pys.extend(_pinyin(word, style, heteronym, errors, strict=strict))
py = handle_nopinyin(words, errors=errors)
if py:
pys.append(py)
return pys
@@ -2,5 +2,5 @@
python_files = test_*.py
python_classes = Test
python_functions = test
addopts = -slv --cov-report term-missing --tb=short --pdb
addopts = -slv --cov-report term-missing --tb=short --pdb --durations=10
norecursedirs = .git __pycache__
@@ -359,10 +359,10 @@ def test_simple_seg():
['侵略', {'style': CYRILLIC}, ['цинь1', 'люэ4']],
['', {'style': TONE}, ['líng']],
# 二次分词
['你要重新考虑', {'style': TONE}, [
'', 'yào', 'zhòng', 'xīn', 'kǎo', '']],
[['你要', '重新考虑'], {'style': TONE}, [
'', 'yào', 'chóng', 'xīn', 'kǎo', '']],
['你要重新考虑OK', {'style': TONE}, [
'', 'yào', 'zhòng', 'xīn', 'kǎo', '', 'OK']],
[['你要', '重新考虑OK'], {'style': TONE}, [
'', 'yào', 'chóng', 'xīn', 'kǎo', '', 'OK']],
]
@@ -403,6 +403,13 @@ def test_36():
assert lazy_pinyin(hans) == pys
def test_with_unknown_style():
assert lazy_pinyin('中国') == ['zhong', 'guo']
assert lazy_pinyin('中国', style='unknown') == ['zhōng', 'guó']
assert pinyin('中国') == [['zhōng'], ['guó']]
assert pinyin('中国', style='unknown') == [['zhōng'], ['guó']]
if __name__ == '__main__':
import pytest
pytest.cmdline.main()
@@ -2,8 +2,8 @@
from __future__ import unicode_literals
from copy import deepcopy
from pypinyin import pinyin
from pypinyin.style import register
from pypinyin import pinyin, Style
from pypinyin.style import register, convert
def test_custom_style_with_decorator():
@@ -41,6 +41,10 @@ def func(pinyin, **kwargs):
assert pinyin(hans, style=style_value) == expected_pinyin_s
def test_finals_tone3_no_final():
assert convert('ń', Style.FINALS_TONE3, True, None) == 'n2'
if __name__ == '__main__':
import pytest
pytest.cmdline.main()

0 comments on commit 7fa0b87

Please sign in to comment.