From f6e9e0cbae0769c3ef7dbdfa512f0af7ced6c0c6 Mon Sep 17 00:00:00 2001 From: Weiming Chen Date: Fri, 9 Oct 2015 17:11:30 +1100 Subject: [PATCH 1/4] =?UTF-8?q?ju,=20qu,=20xu=E7=9A=84=E9=9F=B5=E6=AF=8D?= =?UTF-8?q?=E5=BA=94=E8=AF=A5=E6=98=AFv?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pypinyin/__init__.py | 12 ++++++++++++ tests/test_pinyin.py | 11 +++++++++++ 2 files changed, 23 insertions(+) diff --git a/pypinyin/__init__.py b/pypinyin/__init__.py index 1f14ef39..63bf4032 100644 --- a/pypinyin/__init__.py +++ b/pypinyin/__init__.py @@ -196,6 +196,12 @@ def initial(pinyin): return i return '' +U_FINALS_EXCEPTIONS_MAP = { + u'ū': u'ǖ', + u'ú': u'ǘ', + u'ǔ': u'ǚ', + u'ù': u'ǜ', +} def final(pinyin): """获取单个拼音中的韵母. @@ -208,6 +214,12 @@ def final(pinyin): initial_ = initial(pinyin) or None if not initial_: return pinyin + # 特例 1 + m = re.match(u'^(j|q|x)(ū|ú|ǔ|ù)$', pinyin) + if m: + return (U_FINALS_EXCEPTIONS_MAP[m.group(2)]) + # 特例 2 + pinyin = re.sub('^(j|q|x)u(\d?)$', r'\1v\2', pinyin) return ''.join(pinyin.split(initial_, 1)) diff --git a/tests/test_pinyin.py b/tests/test_pinyin.py index 3f19c2d3..d729a805 100644 --- a/tests/test_pinyin.py +++ b/tests/test_pinyin.py @@ -230,6 +230,17 @@ def test_simple_seg(): # y, w 也不是拼音 ['呀', {'style': INITIALS}, ['']], ['无', {'style': INITIALS}, ['']], + # ju, qu, xu 的韵母应该是 v + ['具', {'style': FINALS_TONE}, ['ǜ']], + ['具', {'style': FINALS_TONE2}, ['v4']], + ['具', {'style': FINALS}, ['v']], + ['取', {'style': FINALS_TONE}, ['ǚ']], + ['取', {'style': FINALS_TONE2}, ['v3']], + ['取', {'style': FINALS}, ['v']], + ['徐', {'style': FINALS_TONE}, ['ǘ']], + ['徐', {'style': FINALS_TONE2}, ['v2']], + ['徐', {'style': FINALS}, ['v']], + ] From feb67cda05250dab56c0f2d58833e9f17118c74b Mon Sep 17 00:00:00 2001 From: Weiming Chen Date: Fri, 9 Oct 2015 17:32:30 +1100 Subject: [PATCH 2/4] =?UTF-8?q?yu=E7=9A=84=E9=9F=B5=E6=AF=8D=E6=98=AFv,=20?= =?UTF-8?q?yi=E7=9A=84=E9=9F=B5=E6=AF=8D=E6=98=AFi,=20wu=E7=9A=84=E9=9F=B5?= =?UTF-8?q?=E6=AF=8D=E6=98=AFu?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pypinyin/__init__.py | 15 +++++++++++++-- tests/test_pinyin.py | 17 ++++++++++++----- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/pypinyin/__init__.py b/pypinyin/__init__.py index 63bf4032..78cd4a05 100644 --- a/pypinyin/__init__.py +++ b/pypinyin/__init__.py @@ -213,13 +213,24 @@ def final(pinyin): """ initial_ = initial(pinyin) or None if not initial_: + # 特例 y/w + if pinyin.startswith('y'): + if pinyin.startswith('yu'): + pinyin = 'v' + pinyin[2:] + else: + pinyin = 'i' + pinyin[1:] + if pinyin.startswith('w'): + if pinyin.startswith('wu'): + pinyin = 'u' + pinyin[2:] + else: + pinyin = 'u' + pinyin[1:] return pinyin - # 特例 1 + # 特例 j/q/x m = re.match(u'^(j|q|x)(ū|ú|ǔ|ù)$', pinyin) if m: return (U_FINALS_EXCEPTIONS_MAP[m.group(2)]) - # 特例 2 pinyin = re.sub('^(j|q|x)u(\d?)$', r'\1v\2', pinyin) + return ''.join(pinyin.split(initial_, 1)) diff --git a/tests/test_pinyin.py b/tests/test_pinyin.py index d729a805..f3c96b77 100644 --- a/tests/test_pinyin.py +++ b/tests/test_pinyin.py @@ -222,21 +222,28 @@ def test_simple_seg(): ], # 误把 yu 放到声母列表了 ['鱼', {'style': TONE2}, ['yu2']], - ['鱼', {'style': FINALS}, ['yu']], + ['鱼', {'style': FINALS}, ['v']], ['雨', {'style': TONE2}, ['yu3']], - ['雨', {'style': FINALS}, ['yu']], + ['雨', {'style': FINALS}, ['v']], ['元', {'style': TONE2}, ['yua2n']], - ['元', {'style': FINALS}, ['yuan']], - # y, w 也不是拼音 + ['元', {'style': FINALS}, ['van']], + + # y, w 也不是拼音, yu的韵母是v, yi的韵母是i, wu的韵母是u ['呀', {'style': INITIALS}, ['']], + ['呀', {'style': TONE2}, ['ya1']], + ['呀', {'style': FINALS}, ['ia']], ['无', {'style': INITIALS}, ['']], + ['无', {'style': TONE2}, ['wu2']], + ['无', {'style': FINALS}, ['u']], + ['万', {'style': TONE2}, ['wa4n']], + ['万', {'style': FINALS}, ['uan']], # ju, qu, xu 的韵母应该是 v ['具', {'style': FINALS_TONE}, ['ǜ']], ['具', {'style': FINALS_TONE2}, ['v4']], ['具', {'style': FINALS}, ['v']], ['取', {'style': FINALS_TONE}, ['ǚ']], ['取', {'style': FINALS_TONE2}, ['v3']], - ['取', {'style': FINALS}, ['v']], + ['取', {'style': FINALS}, ['v']], ['徐', {'style': FINALS_TONE}, ['ǘ']], ['徐', {'style': FINALS_TONE2}, ['v2']], ['徐', {'style': FINALS}, ['v']], From 601364c0f4e588c204dc722e8f40facdd5a8102f Mon Sep 17 00:00:00 2001 From: Weiming Chen Date: Fri, 9 Oct 2015 17:41:55 +1100 Subject: [PATCH 3/4] =?UTF-8?q?=E4=BF=AE=E5=A4=8DBug:=20yi=E7=9A=84?= =?UTF-8?q?=E9=9F=B5=E6=AF=8D=E6=98=AFi?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pypinyin/__init__.py | 28 ++++++++++++++++------------ tests/test_pinyin.py | 2 ++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/pypinyin/__init__.py b/pypinyin/__init__.py index 78cd4a05..afe6ccd9 100644 --- a/pypinyin/__init__.py +++ b/pypinyin/__init__.py @@ -213,18 +213,7 @@ def final(pinyin): """ initial_ = initial(pinyin) or None if not initial_: - # 特例 y/w - if pinyin.startswith('y'): - if pinyin.startswith('yu'): - pinyin = 'v' + pinyin[2:] - else: - pinyin = 'i' + pinyin[1:] - if pinyin.startswith('w'): - if pinyin.startswith('wu'): - pinyin = 'u' + pinyin[2:] - else: - pinyin = 'u' + pinyin[1:] - return pinyin + return no_initial_final(pinyin) # 特例 j/q/x m = re.match(u'^(j|q|x)(ū|ú|ǔ|ù)$', pinyin) if m: @@ -233,6 +222,21 @@ def final(pinyin): return ''.join(pinyin.split(initial_, 1)) +def no_initial_final(pinyin): + # 特例 y/w + if pinyin.startswith('y'): + if pinyin.startswith('yu'): + pinyin = 'v' + pinyin[2:] + elif pinyin.startswith('yi'): + pinyin = pinyin[1:] + else: + pinyin = 'i' + pinyin[1:] + if pinyin.startswith('w'): + if pinyin.startswith('wu'): + pinyin = pinyin[1:] + else: + pinyin = 'u' + pinyin[1:] + return pinyin def toFixed(pinyin, style): """根据拼音风格格式化带声调的拼音. diff --git a/tests/test_pinyin.py b/tests/test_pinyin.py index f3c96b77..1723d0f3 100644 --- a/tests/test_pinyin.py +++ b/tests/test_pinyin.py @@ -235,6 +235,8 @@ def test_simple_seg(): ['无', {'style': INITIALS}, ['']], ['无', {'style': TONE2}, ['wu2']], ['无', {'style': FINALS}, ['u']], + ['衣', {'style': TONE2}, ['yi1']], + ['衣', {'style': FINALS}, ['i']], ['万', {'style': TONE2}, ['wa4n']], ['万', {'style': FINALS}, ['uan']], # ju, qu, xu 的韵母应该是 v From 64035210a298fb368ce24d6d827a614f80031908 Mon Sep 17 00:00:00 2001 From: Weiming Chen Date: Sun, 11 Oct 2015 07:57:48 +1100 Subject: [PATCH 4/4] =?UTF-8?q?=E7=A7=BB=E9=99=A4=E5=A4=9A=E4=BD=99?= =?UTF-8?q?=E7=9A=84=E7=A9=BA=E6=A0=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pypinyin/__init__.py | 5 ++--- tests/test_pinyin.py | 15 +++++++-------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pypinyin/__init__.py b/pypinyin/__init__.py index afe6ccd9..a54d66f8 100644 --- a/pypinyin/__init__.py +++ b/pypinyin/__init__.py @@ -219,7 +219,6 @@ def final(pinyin): if m: return (U_FINALS_EXCEPTIONS_MAP[m.group(2)]) pinyin = re.sub('^(j|q|x)u(\d?)$', r'\1v\2', pinyin) - return ''.join(pinyin.split(initial_, 1)) def no_initial_final(pinyin): @@ -229,14 +228,14 @@ def no_initial_final(pinyin): pinyin = 'v' + pinyin[2:] elif pinyin.startswith('yi'): pinyin = pinyin[1:] - else: + else: pinyin = 'i' + pinyin[1:] if pinyin.startswith('w'): if pinyin.startswith('wu'): pinyin = pinyin[1:] else: pinyin = 'u' + pinyin[1:] - return pinyin + return pinyin def toFixed(pinyin, style): """根据拼音风格格式化带声调的拼音. diff --git a/tests/test_pinyin.py b/tests/test_pinyin.py index 1723d0f3..cf787341 100644 --- a/tests/test_pinyin.py +++ b/tests/test_pinyin.py @@ -222,30 +222,29 @@ def test_simple_seg(): ], # 误把 yu 放到声母列表了 ['鱼', {'style': TONE2}, ['yu2']], - ['鱼', {'style': FINALS}, ['v']], + ['鱼', {'style': FINALS}, ['v']], ['雨', {'style': TONE2}, ['yu3']], - ['雨', {'style': FINALS}, ['v']], + ['雨', {'style': FINALS}, ['v']], ['元', {'style': TONE2}, ['yua2n']], ['元', {'style': FINALS}, ['van']], - # y, w 也不是拼音, yu的韵母是v, yi的韵母是i, wu的韵母是u ['呀', {'style': INITIALS}, ['']], ['呀', {'style': TONE2}, ['ya1']], - ['呀', {'style': FINALS}, ['ia']], + ['呀', {'style': FINALS}, ['ia']], ['无', {'style': INITIALS}, ['']], ['无', {'style': TONE2}, ['wu2']], ['无', {'style': FINALS}, ['u']], ['衣', {'style': TONE2}, ['yi1']], - ['衣', {'style': FINALS}, ['i']], + ['衣', {'style': FINALS}, ['i']], ['万', {'style': TONE2}, ['wa4n']], - ['万', {'style': FINALS}, ['uan']], + ['万', {'style': FINALS}, ['uan']], # ju, qu, xu 的韵母应该是 v ['具', {'style': FINALS_TONE}, ['ǜ']], ['具', {'style': FINALS_TONE2}, ['v4']], ['具', {'style': FINALS}, ['v']], - ['取', {'style': FINALS_TONE}, ['ǚ']], + ['取', {'style': FINALS_TONE}, ['ǚ']], ['取', {'style': FINALS_TONE2}, ['v3']], - ['取', {'style': FINALS}, ['v']], + ['取', {'style': FINALS}, ['v']], ['徐', {'style': FINALS_TONE}, ['ǘ']], ['徐', {'style': FINALS_TONE2}, ['v2']], ['徐', {'style': FINALS}, ['v']],