Permalink
Browse files

Ensure the typer is using token intervals.

  • Loading branch information...
1 parent 62211d8 commit 7e8ff0aef501562d0de6c14f3f6c3ecff9976515 @schmmd schmmd committed May 10, 2013
@@ -7,8 +7,8 @@ abstract class Typer[E <: Token](val name: String, val source: String) {
def apply(seq: Seq[E]): Seq[Type]
}
-case class Type(val name: String, val source: String, val interval: Interval, val text: String) {
- def matchText[E <: Token](seq: Seq[E]): String = seq.iterator.slice(interval.start, interval.end).map(_.string).mkString(" ")
+case class Type(val name: String, val source: String, val tokenInterval: Interval, val text: String) {
+ def matchText[E <: Token](seq: Seq[E]): String = seq.iterator.slice(tokenInterval.start, tokenInterval.end).map(_.string).mkString(" ")
- def tokens[E <: Token](seq: Seq[E]): Seq[E] = seq.slice(interval.start, interval.end)
+ def tokens[E <: Token](seq: Seq[E]): Seq[E] = seq.slice(tokenInterval.start, tokenInterval.end)
}
@@ -25,11 +25,11 @@ class StanfordNer(private val classifier: AbstractSequenceClassifier[_]) extends
val nerType = triple.first
// find actual token offsets from NER offsets
- val start = seq.find(_.offsets.start == nerInterval.start).map(_.offsets.start)
- val end = seq.find(_.offsets.end == nerInterval.end).map(_.offsets.end)
+ val start = seq.iterator.zipWithIndex.find(_._1.offsets.start == nerInterval.start).map(_._2)
+ val end = seq.iterator.zipWithIndex.find(_._1.offsets.end == nerInterval.end).map(_._2)
for (s <- start; e <- end) {
- val typ = new Type(this.name + nerType, "Stanford", Interval.open(s, e), text.substring(nerInterval.start, nerInterval.end))
+ val typ = new Type(this.name + nerType, "Stanford", Interval.closed(s, e), text.substring(nerInterval.start, nerInterval.end))
tags ::= typ
}
}

0 comments on commit 7e8ff0a

Please sign in to comment.