Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Handle empty strings for Malt Parser.

  • Loading branch information...
commit 99dac7a896ad7f776e982fbe0263852075175705 1 parent 6e15c7e
@schmmd schmmd authored
Showing with 40 additions and 36 deletions.
  1. +40 −36 parse/malt/src/main/scala/tool/parse/MaltParser.scala
View
76 parse/malt/src/main/scala/tool/parse/MaltParser.scala
@@ -63,42 +63,46 @@ class MaltParser(modelUrl: URL = new File("engmalt.linear-1.7.mco").toURI.toURL,
}
override def dependencies(sentence: String): Iterable[Dependency] = {
- val tokens = tagger.postag(sentence).iterator.zipWithIndex.map { case (t, i) =>
- new DependencyNode(t, Interval.singleton(i))
- }.toIndexedSeq
-
- val lemmatized = tokens.map(stemmer.stemToken)
-
- val maltTokens: Array[String] = lemmatized.iterator.zipWithIndex.map { case (ltok, i) =>
- Iterable(i+1,
- ltok.token.string,
- ltok.lemma,
- ltok.token.postag,
- ltok.token.postag,
- "-").mkString("\t")
- }.toArray[String]
- val structure = parser.parse(maltTokens)
-
- val tables = structure.getSymbolTables
-
- val deps: SortedSet[Dependency] = structure.getEdges.flatMap { edge =>
- if (edge.getSource.getIndex == 0 || edge.getTarget.getIndex == 0) {
- // skip the root
- None
- }
- else {
- val source = tokens(edge.getSource.getIndex - 1)
- val dest = tokens(edge.getTarget.getIndex - 1)
-
- val types = edge.getLabelTypes
- val labels = types.map(edge.getLabelSymbol)
- val label = labels.head
-
- Some(new Dependency(source, dest, label))
- }
- }(scala.collection.breakOut)
-
- deps
+ val trimmed = sentence.trim
+ if (trimmed.isEmpty) Iterable.empty
+ else {
+ val tokens = tagger.postag(trimmed).iterator.zipWithIndex.map { case (t, i) =>
+ new DependencyNode(t, Interval.singleton(i))
+ }.toIndexedSeq
+
+ val lemmatized = tokens.map(stemmer.stemToken)
+
+ val maltTokens: Array[String] = lemmatized.iterator.zipWithIndex.map { case (ltok, i) =>
+ Iterable(i+1,
+ ltok.token.string,
+ ltok.lemma,
+ ltok.token.postag,
+ ltok.token.postag,
+ "-").mkString("\t")
+ }.toArray[String]
+ val structure = parser.parse(maltTokens)
+
+ val tables = structure.getSymbolTables
+
+ val deps: SortedSet[Dependency] = structure.getEdges.flatMap { edge =>
+ if (edge.getSource.getIndex == 0 || edge.getTarget.getIndex == 0) {
+ // skip the root
+ None
+ }
+ else {
+ val source = tokens(edge.getSource.getIndex - 1)
+ val dest = tokens(edge.getTarget.getIndex - 1)
+
+ val types = edge.getLabelTypes
+ val labels = types.map(edge.getLabelSymbol)
+ val label = labels.head
+
+ Some(new Dependency(source, dest, label))
+ }
+ }(scala.collection.breakOut)
+
+ deps
+ }
}
override def dependencyGraph(sentence: String): DependencyGraph = {
Please sign in to comment.
Something went wrong with that request. Please try again.