Skip to content

Commit

Permalink
Sort terminals before converting to CONLLX.
Browse files Browse the repository at this point in the history
Since iterating of the terminals isn't guaranteed to yield the
linear order in the sentence, the terminals need to be sorted
before converting to CONLLX.
  • Loading branch information
sebpuetz committed May 28, 2019
1 parent a1809d1 commit 1e43028
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 16 deletions.
21 changes: 9 additions & 12 deletions lumberjack/src/io/conllx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,9 @@ impl From<Tree> for Sentence {
fn from(mut tree: Tree) -> Self {
let mut tokens = Vec::with_capacity(tree.n_terminals());

let terminals = tree.terminals().collect::<Vec<_>>();
let mut terminals = tree.terminals().collect::<Vec<_>>();
tree.sort_indices(&mut terminals);

for terminal in terminals {
let terminal = tree[terminal].terminal_mut().unwrap();
let mut token = Token::new(terminal.set_form(String::new()));
Expand All @@ -167,17 +169,12 @@ impl From<Tree> for Sentence {
}
impl<'a> From<&'a Tree> for Sentence {
fn from(tree: &'a Tree) -> Self {
let mut tokens = tree
.terminals()
.filter_map(|t| tree[t].terminal().map(|t| (t.into(), t.span().start)))
.collect::<Vec<_>>();

tokens.sort_by(|t0, t1| t0.1.cmp(&t1.1));
let mut sentence = Sentence::new();
for (token, _) in tokens {
sentence.push(token);
}
sentence
let mut terminals = tree.terminals().collect::<Vec<_>>();
tree.sort_indices(&mut terminals);
terminals
.into_iter()
.map(|t| tree[t].terminal().unwrap().into())
.collect()
}
}

Expand Down
12 changes: 8 additions & 4 deletions lumberjack/src/io/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ impl Encode for Tree {
if !self.is_projective() {
return Err(format_err!("Can't encode nonprojective tree."));
}
let terminals = self.terminals().collect::<Vec<_>>();
let mut terminals = self.terminals().collect::<Vec<_>>();
self.sort_indices(&mut terminals);
let mut encoding = Vec::with_capacity(terminals.len());
for terminal in terminals.into_iter() {
let common_nt = get_common(self, terminal)?
Expand All @@ -80,7 +81,8 @@ impl Encode for Tree {
if !self.is_projective() {
return Err(format_err!("Can't encode nonprojective tree."));
}
let terminals = self.terminals().collect::<Vec<_>>();
let mut terminals = self.terminals().collect::<Vec<_>>();
self.sort_indices(&mut terminals);
for terminal in terminals.into_iter() {
let mut string_rep = get_common(self, terminal)?
.map(|(common, n_common)| AbsoluteAncestor::new(n_common, common).to_string())
Expand All @@ -104,7 +106,8 @@ impl Encode for Tree {
return Err(format_err!("Can't encode nonprojective tree."));
}
let mut prev_n = 0;
let terminals = self.terminals().collect::<Vec<_>>();
let mut terminals = self.terminals().collect::<Vec<_>>();
self.sort_indices(&mut terminals);
let mut encoding = Vec::with_capacity(terminals.len());
for terminal in terminals.into_iter() {
let common_nt = match get_common(self, terminal)? {
Expand Down Expand Up @@ -132,7 +135,8 @@ impl Encode for Tree {
if !self.is_projective() {
return Err(format_err!("Can't encode nonprojective tree."));
}
let terminals = self.terminals().collect::<Vec<_>>();
let mut terminals = self.terminals().collect::<Vec<_>>();
self.sort_indices(&mut terminals);
let mut prev_n = 0;
for terminal in terminals.into_iter() {
let mut string_rep = get_common(self, terminal)?
Expand Down

0 comments on commit 1e43028

Please sign in to comment.