Skip to content

Commit

Permalink
#15 ti35848
Browse files Browse the repository at this point in the history
  • Loading branch information
nishimotz committed Dec 18, 2015
1 parent 3c0f450 commit ec068b4
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 9 deletions.
5 changes: 5 additions & 0 deletions jptools/harness.py
Expand Up @@ -570,6 +570,11 @@
'input': '”’”',
'output': '⠶⠂⠶⠂⠶⠂',
},
{
'text': "u'コーテーション' ",
'input': "u’コーテーション’ ",
'output': '⠰⠥⠶⠂⠪⠒⠟⠒⠈⠺⠴⠶⠂ ',
},
{
'input': 'アー',
'output': '⠁⠒',
Expand Down
5 changes: 4 additions & 1 deletion jptools/mecabHarness.py
Expand Up @@ -462,7 +462,7 @@
{'text':'reserved', 'speech':'リザーブド'},

{'text':"for each author's protection", 'speech':'フォー イーチ オーサーズ プロテクション'},
{'text':"for each authors' protection", 'speech':'フォー イーチ オーサーズ プロテクション'},
{'text':"for each authors' protection", 'speech':"フォー イーチ オーサーズ プロテクション"},
{'text':"using it", 'speech':'ユージング イットゥ'},
{'text':"distributable", 'speech':'ディストリビュータブル'},
{'text':"usa", 'speech':'ユーエスエー'},
Expand Down Expand Up @@ -553,4 +553,7 @@
{'text':'git', 'speech':'ギットゥ'},
{'text':'automation', 'speech':'オートメイション'},
{'text':'ui automation', 'speech':'ユーアイ オートメイション'},
{'text': "u' コーテーション' ",
'speech':"ユー’ コーテーション’ ",
'braille':"ユー/’/ /コーテーション/’"},
]
36 changes: 36 additions & 0 deletions jptools/nabccHarness.py
Expand Up @@ -145,4 +145,40 @@
'input':'3/03',
'output': '⠒⠌⠴⠒',
},
{
'mode': 'NABCC',
'text': '"\'"',
'input': '"\'"',
'output': '⠐⠄⠐',
},
{
'mode': 'NABCC',
'text': '"\'" ',
'input': '"\'" ',
'output': '⠐⠄⠐ ',
},
{
'mode': 'NABCC',
'text': "u'コーテーション",
'input': "u' コーテーション",
'output': '⠥⠄ ⠪⠒⠟⠒⠈⠺⠴',
},
{
'mode': 'NABCC',
'text': "u'コーテーション'",
'input': "u' コーテーション '",
'output': '⠥⠄ ⠪⠒⠟⠒⠈⠺⠴ ⠄',
},
{
'mode': 'NABCC',
'text': "u' コーテーション' ",
'input': "u' コーテーション ' ",
'output': '⠥⠄ ⠪⠒⠟⠒⠈⠺⠴ ⠄ ',
},
{
'mode': 'NABCC',
'text': "u'コーテーション' ",
'input': "u' コーテーション ' ",
'output': '⠥⠄ ⠪⠒⠟⠒⠈⠺⠴ ⠄ ',
},
]
6 changes: 1 addition & 5 deletions source/synthDrivers/jtalk/mecab.py
Expand Up @@ -334,7 +334,7 @@ def Mecab_correctFeatures(mf, CODE_ = CODE):
Mecab_setFeature(mf, pos-2, feature, CODE_=CODE_)
elif ar2 and ar[0] in (u's', u'd', u'ed', u'r'):
# pattern 5
if ar3 and ar2[0] == u"’":
if ar3 and ar2[0] in ("'", u"’"):
# PATTERN 5 "author's"
# before:
# 0 author,名詞,一般,*,*,*,*,author,オーサー,オーサー,1/4,C0
Expand All @@ -361,10 +361,6 @@ def Mecab_correctFeatures(mf, CODE_ = CODE):
Mecab_setFeature(mf, pos - 1, ',,,*,*,*,*', CODE_=CODE_)
f = _makeFeatureFromLatinWordAndPostfix(ar[0], ar2)
Mecab_setFeature(mf, pos, f, CODE_=CODE_)
elif ar2 and ar[0] == u" " and ar[1] == u'記号' and ar[2] == u'空白':
# remove single quote in cases such as "authors' protection"
if ar2[0] == u"’":
Mecab_setFeature(mf, pos-1, ',,,*,*,*,*', CODE_=CODE_)
elif ar2 and RE_FULLSHAPE_ALPHA.match(ar[0]) and RE_FULLSHAPE_ALPHA.match(ar2[0]):
# 0 shi,名詞,一般,*,*,*,*,shi,シ,シ,1/1,C0
# 1 mane,名詞,一般,*,*,*,*,mane,メイン,メイン,1/3,C0
Expand Down
6 changes: 3 additions & 3 deletions source/synthDrivers/jtalk/translator2.py
Expand Up @@ -484,7 +484,7 @@ def fix_japanese_date_morphs(li):
new_li.append(li[i])
return new_li

def should_separate(prev2_mo, prev_mo, mo, next_mo, nabcc=False):
def should_separate(prev2_mo, prev_mo, mo, next_mo, nabcc=False, logwrite=_logwrite):
if mo.hyouki == 'ー': return False
if prev_mo.hyouki == 'ー': return False
if mo.hyouki in 'ぁぃぅぇぉっゃゅょゎァィゥェォッャュョヮヵヶ': return False
Expand Down Expand Up @@ -540,7 +540,7 @@ def should_separate(prev2_mo, prev_mo, mo, next_mo, nabcc=False):
return True

# 1月/1日
if prev_mo.nhyouki[0].isdigit() and prev_mo.nhyouki[-1] == '月' and mo.output.isdigit():
if prev_mo.nhyouki and prev_mo.nhyouki[0].isdigit() and prev_mo.nhyouki[-1] == '月' and mo.output.isdigit():
return True
# 0/4月 -> 04月
if prev_mo.output.isdigit() and mo.nhyouki[0].isdigit():
Expand Down Expand Up @@ -1132,7 +1132,7 @@ def japanese_braille_separate(inbuf, logwrite, nabcc=False):
prev2_mo = li[i-2] if i-2 >= 0 else None
prev_mo = li[i-1]
next_mo = li[i+1] if i+1 < len(li) else None
li[i-1].sepflag = should_separate(prev2_mo, prev_mo, li[i], next_mo, nabcc=nabcc)
li[i-1].sepflag = should_separate(prev2_mo, prev_mo, li[i], next_mo, nabcc=nabcc, logwrite=logwrite)

# do not translate if string is unicode braille
for i in xrange(0, len(li)):
Expand Down

0 comments on commit ec068b4

Please sign in to comment.