updating the chunkedextractor version to v2.0.0

knowitall · Dec 29, 2015 · 0b6cb9e · 0b6cb9e
1 parent 2c33ae7
commit 0b6cb9e
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 11 deletions.
diff --git a/build.sbt b/build.sbt
@@ -18,7 +18,7 @@ resolvers += "Sonatype SNAPSHOTS" at "https://oss.sonatype.org/content/repositor
 libraryDependencies ++= Seq(
   // extractor components
   "edu.washington.cs.knowitall.srlie" %% "srlie" % "1.0.3",
-  "edu.washington.cs.knowitall.chunkedextractor" %% "chunkedextractor" % "1.0.9",
+  "edu.washington.cs.knowitall.chunkedextractor" %% "chunkedextractor" % "2.0.0",
   // for splitting sentences
   "edu.washington.cs.knowitall.nlptools" %% "nlptools-sentence-opennlp" % "2.4.5",
   // for remote components

diff --git a/src/main/scala/edu/knowitall/openie/OpenIE.scala b/src/main/scala/edu/knowitall/openie/OpenIE.scala
@@ -14,24 +14,27 @@ import edu.knowitall.tool.chunk.ChunkedToken
 import edu.knowitall.tool.chunk.OpenNlpChunker
 import edu.knowitall.tool.parse.DependencyParser
 import edu.knowitall.tool.postag.OpenNlpPostagger
+import edu.knowitall.tool.postag.ClearPostagger
 import edu.knowitall.tool.srl.Srl
 import edu.knowitall.tool.stem.MorphaStemmer
 import edu.knowitall.tool.tokenize.OpenNlpTokenizer
 import edu.knowitall.tool.parse.ClearParser
 import edu.knowitall.tool.srl.ClearSrl
+import edu.knowitall.tool.postag.ClearPostagger
+import edu.knowitall.tool.tokenize.ClearTokenizer
 
-class OpenIE(parser: DependencyParser = new ClearParser(), srl: Srl = new ClearSrl(), triples: Boolean = false) {
+class OpenIE(parser: DependencyParser = new ClearParser(), srl: Srl = new ClearSrl(), triples: Boolean = false, includeUnknownArg2: Boolean = false) {
   // confidence functions
   val srlieConf = SrlConfidenceFunction.loadDefaultClassifier()
   val relnounConf = RelnounConfidenceFunction.loadDefaultClassifier()
 
   // sentence pre-processors
-     val tokenizer = new OpenNlpTokenizer()
-     val postagger = new OpenNlpPostagger(tokenizer)
-     val chunkerOIE = new OpenNlpChunker(postagger)
+  val tokenizer = new ClearTokenizer()
+  val postagger = new ClearPostagger(tokenizer)
+  val chunkerOIE = new OpenNlpChunker(postagger)
 
   // subextractors
-  val relnoun = new Relnoun
+  val relnoun = new Relnoun(true, true, includeUnknownArg2)
   val srlie = new SrlExtractor(srl)
 
 

diff --git a/src/main/scala/edu/knowitall/openie/OpenIECli.scala b/src/main/scala/edu/knowitall/openie/OpenIECli.scala
@@ -93,7 +93,8 @@ object OpenIECli extends App {
     ignoreErrors: Boolean = false,
     showUsage: Boolean = false,
     binary: Boolean = false,
-    split: Boolean = false) {
+    split: Boolean = false,
+    includeUnknownArg2: Boolean = false) {
 
     /***
      * Create the input source from a file or stdin.
@@ -156,6 +157,9 @@ object OpenIECli extends App {
       flag("ignore-errors", "ignore errors") { config =>
         config.copy(ignoreErrors = true)
       },
+      flag("include-unknown-arg2", "includes arg2 [UNKNOWN] extractions from relnoun") { config =>
+        config.copy(includeUnknownArg2 = true)
+      },
       flag("b", "binary", "binary output") { config =>
         config.copy(binary = true)
       },
@@ -174,7 +178,7 @@ object OpenIECli extends App {
         case e: MalformedInputException =>
           System.err.println(
             "\nError: a MalformedInputException was thrown.\n" +
-            "This usually means there is a mismatch between what Ollie expects and the input file.\n" +
+            "This usually means there is a mismatch between what is expected and the input file.\n" +
             "Try changing the input file's character encoding to UTF-8 or specifying the correct character encoding for the input file with '--encoding'.\n")
           e.printStackTrace()
       }
@@ -186,7 +190,7 @@ object OpenIECli extends App {
    */
   def run(config: Config) {
     // the extractor system
-    val openie = new OpenIE(parser=config.createParser(), srl=config.createSrl(),config.binary)
+    val openie = new OpenIE(parser=config.createParser(), srl=config.createSrl(),config.binary, config.includeUnknownArg2)
 
     println("* * * * * * * * * * * * *")
     println("* OpenIE 4.1.x is ready *")

diff --git a/src/test/scala/edu/knowitall/openie/OpenIESpecTest.scala b/src/test/scala/edu/knowitall/openie/OpenIESpecTest.scala
@@ -15,7 +15,7 @@ class OpenIESpecTest extends FlatSpec with Matchers {
     val insts = openie("U.S. president Obama gave a speech")
 
     insts.size should be (2)
-    insts.map(_.extraction.toString).sorted should be (Seq("(Obama; [is] president [of]; U.S.)", "(U.S. president Obama; gave; a speech)"))
+    insts.map(_.extraction.toString).sorted should be (Seq("(Obama; [is] president [of]; United States)", "(U.S. president Obama; gave; a speech)"))
 
     // sentence pre-processors
     val tokenizertest = new OpenNlpTokenizer()
@@ -24,7 +24,7 @@ class OpenIESpecTest extends FlatSpec with Matchers {
 
     val inststest = openie.extract("U.S. president Obama gave a speech",chunkertest)
     inststest.size should be (2)
-    inststest.map(_.extraction.toString).sorted should be (Seq("(Obama; [is] president [of]; U.S.)", "(U.S. president Obama; gave; a speech)"))
+    inststest.map(_.extraction.toString).sorted should be (Seq("(Obama; [is] president [of]; United States)", "(U.S. president Obama; gave; a speech)"))
 
   }
 }