2
2
from .utils .tree_manipulation import translate_tree_grammar
3
3
from .utils .misc import remove_trailing_space
4
4
import nltk
5
- from nltk import RecursiveDescentParser as Parser
5
+ from nltk . parse . chart import BottomUpLeftCornerChartParser as Parser
6
6
7
7
class URBAMT_Translator :
8
8
""""""
@@ -11,7 +11,6 @@ def __init__(self,
11
11
src_to_tgt_grammar : Dict ,
12
12
src_to_tgt_dictionary : Dict ):
13
13
"""Initialize the translator
14
-
15
14
Args:
16
15
src_grammar (str): source language grammar written in nltk style
17
16
E.g: src_grammar = \" ""
@@ -28,14 +27,10 @@ def __init__(self,
28
27
JJ -> 'good'
29
28
NN -> 'school'
30
29
\" ""
31
-
32
30
src_to_tgt_grammar (Dict): Transition between source grammar and target grammar as a dict
33
31
E.g: src2target_grammar = {
34
32
"NP1 -> JJ NN": "NP1 -> NN JJ"
35
33
}
36
-
37
-
38
-
39
34
src_to_tgt_dictionary (Dict): Dictionary of word-by-word transition from src language to target language
40
35
E.g: en_to_vi_dict = {
41
36
"I":"tôi",
@@ -54,31 +49,47 @@ def __init__(self,
54
49
def __process_text_input (txt ):
55
50
return remove_trailing_space (txt )
56
51
57
- def translate (self , sentences : List [str ] or str ):
52
+ def translate (self , sentences : List [str ] or str , allow_multiple_translation = False ):
58
53
"""Translate a list of sentences
59
-
60
54
Args:
61
55
sentences (List[str]): A list of str-typed sentences
62
-
63
56
Returns:
64
57
List[str]: A list of translated sentences
65
58
"""
66
59
if isinstance (sentences ,str ):
67
60
sentences = [sentences ]
68
61
69
- translated_sentence = []
62
+ translated_sentences = []
63
+ failed_sentences = []
64
+
70
65
for sentence in sentences :
71
66
sentence = self .__process_text_input (sentence )
72
67
trees = self .parser .parse (sentence .split ())
73
68
69
+ # Flag to check if there are trees in generator (grammar matched)
70
+ translated = False
71
+
74
72
for t in trees :
73
+ translated = True
75
74
76
75
# Translate grammar
77
76
trans_gram_sentence = translate_tree_grammar (t ,self .src_to_tgt_grammar )
78
77
79
78
# Translate words
80
79
trans_lang_sentence = ' ' .join ([self .src_to_tgt_dictionary .get (word ,word ) for word in trans_gram_sentence .split ()])
81
80
82
- translated_sentence .append (trans_lang_sentence )
81
+ translated_sentences .append (trans_lang_sentence )
82
+
83
+ # Get 1 sentence only, will support multi sentence
84
+ break
85
+
86
+ if translated == False :
87
+ failed_sentences .append (sentence )
88
+
89
+ # String to display failed sentence
90
+ failed_sentences = '\n ' .join (failed_sentences )
91
+
92
+ if len (failed_sentences ) > 0 :
93
+ raise ValueError (f"Please check your grammar again, failed to translated these sentence \n { failed_sentences } " )
83
94
84
- return translated_sentence
95
+ return translated_sentences
0 commit comments