Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

修复ju, qu, xu, yu, yi和wu的韵母 #26

Merged
merged 4 commits into from Oct 11, 2015
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
29 changes: 28 additions & 1 deletion pypinyin/__init__.py
Expand Up @@ -196,6 +196,12 @@ def initial(pinyin):
return i
return ''

U_FINALS_EXCEPTIONS_MAP = {
u'ū': u'ǖ',
u'ú': u'ǘ',
u'ǔ': u'ǚ',
u'ù': u'ǜ',
}

def final(pinyin):
"""获取单个拼音中的韵母.
Expand All @@ -207,9 +213,30 @@ def final(pinyin):
"""
initial_ = initial(pinyin) or None
if not initial_:
return pinyin
return no_initial_final(pinyin)
# 特例 j/q/x
m = re.match(u'^(j|q|x)(ū|ú|ǔ|ù)$', pinyin)
if m:
return (U_FINALS_EXCEPTIONS_MAP[m.group(2)])
pinyin = re.sub('^(j|q|x)u(\d?)$', r'\1v\2', pinyin)

return ''.join(pinyin.split(initial_, 1))

def no_initial_final(pinyin):
# 特例 y/w
if pinyin.startswith('y'):
if pinyin.startswith('yu'):
pinyin = 'v' + pinyin[2:]
elif pinyin.startswith('yi'):
pinyin = pinyin[1:]
else:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

行尾有多余的空格。

pinyin = 'i' + pinyin[1:]
if pinyin.startswith('w'):
if pinyin.startswith('wu'):
pinyin = pinyin[1:]
else:
pinyin = 'u' + pinyin[1:]
return pinyin
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here.


def toFixed(pinyin, style):
"""根据拼音风格格式化带声调的拼音.
Expand Down
28 changes: 24 additions & 4 deletions tests/test_pinyin.py
Expand Up @@ -222,14 +222,34 @@ def test_simple_seg():
],
# 误把 yu 放到声母列表了
['鱼', {'style': TONE2}, ['yu2']],
['鱼', {'style': FINALS}, ['yu']],
['鱼', {'style': FINALS}, ['v']],
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个文件里有几个行的行尾也有一些多余的空格。

['雨', {'style': TONE2}, ['yu3']],
['雨', {'style': FINALS}, ['yu']],
['雨', {'style': FINALS}, ['v']],
['元', {'style': TONE2}, ['yua2n']],
['元', {'style': FINALS}, ['yuan']],
# y, w 也不是拼音
['元', {'style': FINALS}, ['van']],

# y, w 也不是拼音, yu的韵母是v, yi的韵母是i, wu的韵母是u
['呀', {'style': INITIALS}, ['']],
['呀', {'style': TONE2}, ['ya1']],
['呀', {'style': FINALS}, ['ia']],
['无', {'style': INITIALS}, ['']],
['无', {'style': TONE2}, ['wu2']],
['无', {'style': FINALS}, ['u']],
['衣', {'style': TONE2}, ['yi1']],
['衣', {'style': FINALS}, ['i']],
['万', {'style': TONE2}, ['wa4n']],
['万', {'style': FINALS}, ['uan']],
# ju, qu, xu 的韵母应该是 v
['具', {'style': FINALS_TONE}, ['ǜ']],
['具', {'style': FINALS_TONE2}, ['v4']],
['具', {'style': FINALS}, ['v']],
['取', {'style': FINALS_TONE}, ['ǚ']],
['取', {'style': FINALS_TONE2}, ['v3']],
['取', {'style': FINALS}, ['v']],
['徐', {'style': FINALS_TONE}, ['ǘ']],
['徐', {'style': FINALS_TONE2}, ['v2']],
['徐', {'style': FINALS}, ['v']],

]


Expand Down