#15 ti35848

nvdajp · Dec 18, 2015 · ec068b4 · ec068b4
1 parent 3c0f450
commit ec068b4
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 9 deletions.
diff --git a/jptools/harness.py b/jptools/harness.py
@@ -570,6 +570,11 @@
 		'input': '”’”',
 		'output': '⠶⠂⠶⠂⠶⠂',
 	},
+	{
+		'text': "u'コーテーション' ",
+		'input': "u’コーテーション’ ",
+		'output': '⠰⠥⠶⠂⠪⠒⠟⠒⠈⠺⠴⠶⠂ ',
+	},
 	{
 		'input': 'アー',
 		'output': '⠁⠒',

diff --git a/jptools/mecabHarness.py b/jptools/mecabHarness.py
@@ -462,7 +462,7 @@
 	{'text':'reserved', 'speech':'リザーブド'},
 
 	{'text':"for each author's protection", 'speech':'フォー イーチ オーサーズ プロテクション'},
-	{'text':"for each authors' protection", 'speech':'フォー イーチ オーサーズ プロテクション'},
+	{'text':"for each authors' protection", 'speech':"フォー イーチ オーサーズ’ プロテクション"},
 	{'text':"using it", 'speech':'ユージング イットゥ'},
 	{'text':"distributable", 'speech':'ディストリビュータブル'},
 	{'text':"usa", 'speech':'ユーエスエー'},
@@ -553,4 +553,7 @@
 	{'text':'git', 'speech':'ギットゥ'},
 	{'text':'automation', 'speech':'オートメイション'},
 	{'text':'ui automation', 'speech':'ユーアイ オートメイション'},
+	{'text': "u' コーテーション' ",
+	 'speech':"ユー’ コーテーション’ ",
+	 'braille':"ユー/’/ /コーテーション/’"},
 ]
diff --git a/jptools/nabccHarness.py b/jptools/nabccHarness.py
@@ -145,4 +145,40 @@
 		'input':'3/03',
 		'output': '⠒⠌⠴⠒',
 	},
+	{
+		'mode':   'NABCC',
+		'text': '"\'"',
+		'input': '"\'"',
+		'output': '⠐⠄⠐',
+	},
+	{
+		'mode':   'NABCC',
+		'text': '"\'" ',
+		'input': '"\'" ',
+		'output': '⠐⠄⠐ ',
+	},
+	{
+		'mode':   'NABCC',
+		'text': "u'コーテーション",
+		'input': "u' コーテーション",
+		'output': '⠥⠄ ⠪⠒⠟⠒⠈⠺⠴',
+	},
+	{
+		'mode':   'NABCC',
+		'text': "u'コーテーション'",
+		'input': "u' コーテーション '",
+		'output': '⠥⠄ ⠪⠒⠟⠒⠈⠺⠴ ⠄',
+	},
+	{
+		'mode':   'NABCC',
+		'text': "u' コーテーション' ",
+		'input': "u' コーテーション ' ",
+		'output': '⠥⠄ ⠪⠒⠟⠒⠈⠺⠴ ⠄ ',
+	},
+	{
+		'mode':   'NABCC',
+		'text': "u'コーテーション' ",
+		'input': "u' コーテーション ' ",
+		'output': '⠥⠄ ⠪⠒⠟⠒⠈⠺⠴ ⠄ ',
+	},
 	]
diff --git a/source/synthDrivers/jtalk/mecab.py b/source/synthDrivers/jtalk/mecab.py
@@ -334,7 +334,7 @@ def Mecab_correctFeatures(mf, CODE_ = CODE):
 				Mecab_setFeature(mf, pos-2, feature, CODE_=CODE_)
 		elif ar2 and ar[0] in (u'ｓ', u'ｄ', u'ｅｄ', u'ｒ'):
 			# pattern 5
-			if ar3 and ar2[0] == u"’":
+			if ar3 and ar2[0] in ("'", u"’"):
 				# PATTERN 5 "author's"
 				# before:
 				# 0 ａｕｔｈｏｒ,名詞,一般,*,*,*,*,ａｕｔｈｏｒ,オーサー,オーサー,1/4,C0
@@ -361,10 +361,6 @@ def Mecab_correctFeatures(mf, CODE_ = CODE):
 				Mecab_setFeature(mf, pos - 1, ',,,*,*,*,*', CODE_=CODE_)
 				f = _makeFeatureFromLatinWordAndPostfix(ar[0], ar2)
 				Mecab_setFeature(mf, pos, f, CODE_=CODE_)
-		elif ar2 and ar[0] == u"　" and ar[1] == u'記号' and ar[2] == u'空白':
-			# remove single quote in cases such as "authors' protection"
-			if ar2[0] == u"’":
-				Mecab_setFeature(mf, pos-1, ',,,*,*,*,*', CODE_=CODE_)
 		elif ar2 and RE_FULLSHAPE_ALPHA.match(ar[0]) and RE_FULLSHAPE_ALPHA.match(ar2[0]):
 			# 0 ｓｈｉ,名詞,一般,*,*,*,*,ｓｈｉ,シ,シ,1/1,C0
 			# 1 ｍａｎｅ,名詞,一般,*,*,*,*,ｍａｎｅ,メイン,メイン,1/3,C0

diff --git a/source/synthDrivers/jtalk/translator2.py b/source/synthDrivers/jtalk/translator2.py
@@ -484,7 +484,7 @@ def fix_japanese_date_morphs(li):
 			new_li.append(li[i])
 	return new_li
 
-def should_separate(prev2_mo, prev_mo, mo, next_mo, nabcc=False):
+def should_separate(prev2_mo, prev_mo, mo, next_mo, nabcc=False, logwrite=_logwrite):
 	if mo.hyouki == 'ー': return False
 	if prev_mo.hyouki == 'ー': return False
 	if mo.hyouki in 'ぁぃぅぇぉっゃゅょゎァィゥェォッャュョヮヵヶ': return False
@@ -540,7 +540,7 @@ def should_separate(prev2_mo, prev_mo, mo, next_mo, nabcc=False):
 		return True
 
 	# 1月/1日
-	if prev_mo.nhyouki[0].isdigit() and prev_mo.nhyouki[-1] == '月' and mo.output.isdigit():
+	if prev_mo.nhyouki and prev_mo.nhyouki[0].isdigit() and prev_mo.nhyouki[-1] == '月' and mo.output.isdigit():
 		return True
 	# 0/4月 -> 04月
 	if prev_mo.output.isdigit() and mo.nhyouki[0].isdigit():
@@ -1132,7 +1132,7 @@ def japanese_braille_separate(inbuf, logwrite, nabcc=False):
 		prev2_mo = li[i-2] if i-2 >= 0 else None
 		prev_mo = li[i-1]
 		next_mo = li[i+1] if i+1 < len(li) else None
-		li[i-1].sepflag = should_separate(prev2_mo, prev_mo, li[i], next_mo, nabcc=nabcc)
+		li[i-1].sepflag = should_separate(prev2_mo, prev_mo, li[i], next_mo, nabcc=nabcc, logwrite=logwrite)
 
 	# do not translate if string is unicode braille
 	for i in xrange(0, len(li)):