Skip to content

Commit 9167792

Browse files
authored
Remove recursive parser (#12)
* remove recursive parser * fix 🐛
1 parent e136f7c commit 9167792

File tree

3 files changed

+24
-14
lines changed

3 files changed

+24
-14
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name='urbamt',
8-
version='0.0.1-b1',
8+
version='0.0.1-b2',
99
author="Patrick Phat Nguyen",
1010
author_email="me@patrickphat.com",
1111
description="Universal Rule-based Machine Translation Toolkit (URBaMT)",

urbamt/translator.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from .utils.tree_manipulation import translate_tree_grammar
33
from .utils.misc import remove_trailing_space
44
import nltk
5-
from nltk import RecursiveDescentParser as Parser
5+
from nltk.parse.chart import BottomUpLeftCornerChartParser as Parser
66

77
class URBAMT_Translator:
88
""""""
@@ -11,7 +11,6 @@ def __init__(self,
1111
src_to_tgt_grammar: Dict,
1212
src_to_tgt_dictionary: Dict):
1313
"""Initialize the translator
14-
1514
Args:
1615
src_grammar (str): source language grammar written in nltk style
1716
E.g: src_grammar = \"""
@@ -28,14 +27,10 @@ def __init__(self,
2827
JJ -> 'good'
2928
NN -> 'school'
3029
\"""
31-
3230
src_to_tgt_grammar (Dict): Transition between source grammar and target grammar as a dict
3331
E.g: src2target_grammar = {
3432
"NP1 -> JJ NN": "NP1 -> NN JJ"
3533
}
36-
37-
38-
3934
src_to_tgt_dictionary (Dict): Dictionary of word-by-word transition from src language to target language
4035
E.g: en_to_vi_dict = {
4136
"I":"tôi",
@@ -54,31 +49,47 @@ def __init__(self,
5449
def __process_text_input(txt):
5550
return remove_trailing_space(txt)
5651

57-
def translate(self, sentences: List[str] or str):
52+
def translate(self, sentences: List[str] or str, allow_multiple_translation = False):
5853
"""Translate a list of sentences
59-
6054
Args:
6155
sentences (List[str]): A list of str-typed sentences
62-
6356
Returns:
6457
List[str]: A list of translated sentences
6558
"""
6659
if isinstance(sentences,str):
6760
sentences = [sentences]
6861

69-
translated_sentence = []
62+
translated_sentences = []
63+
failed_sentences = []
64+
7065
for sentence in sentences:
7166
sentence = self.__process_text_input(sentence)
7267
trees = self.parser.parse(sentence.split())
7368

69+
# Flag to check if there are trees in generator (grammar matched)
70+
translated = False
71+
7472
for t in trees:
73+
translated = True
7574

7675
# Translate grammar
7776
trans_gram_sentence = translate_tree_grammar(t,self.src_to_tgt_grammar)
7877

7978
# Translate words
8079
trans_lang_sentence = ' '.join([self.src_to_tgt_dictionary.get(word,word) for word in trans_gram_sentence.split()])
8180

82-
translated_sentence.append(trans_lang_sentence)
81+
translated_sentences.append(trans_lang_sentence)
82+
83+
# Get 1 sentence only, will support multi sentence
84+
break
85+
86+
if translated == False:
87+
failed_sentences.append(sentence)
88+
89+
# String to display failed sentence
90+
failed_sentences = '\n'.join(failed_sentences)
91+
92+
if len(failed_sentences) > 0:
93+
raise ValueError(f"Please check your grammar again, failed to translated these sentence \n {failed_sentences}")
8394

84-
return translated_sentence
95+
return translated_sentences

urbamt/utils/tree_manipulation.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ def calculate_displacement(src_grammar, tgt_grammar):
7575
src_grammar_lst = src_grammar.split()
7676
tgt_grammar_lst = tgt_grammar.split()
7777

78-
print(src_grammar_lst.index("->"))
7978
src_grammar_lst = src_grammar_lst[src_grammar_lst.index("->")+1:]
8079
tgt_grammar_lst = tgt_grammar_lst[tgt_grammar_lst.index("->")+1:]
8180
displacement = []

0 commit comments

Comments
 (0)