Skip to content

Commit

Permalink
updating the chunkedextractor version to v2.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
harrysethi committed Dec 29, 2015
1 parent 2c33ae7 commit 0b6cb9e
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 11 deletions.
2 changes: 1 addition & 1 deletion build.sbt
Expand Up @@ -18,7 +18,7 @@ resolvers += "Sonatype SNAPSHOTS" at "https://oss.sonatype.org/content/repositor
libraryDependencies ++= Seq(
// extractor components
"edu.washington.cs.knowitall.srlie" %% "srlie" % "1.0.3",
"edu.washington.cs.knowitall.chunkedextractor" %% "chunkedextractor" % "1.0.9",
"edu.washington.cs.knowitall.chunkedextractor" %% "chunkedextractor" % "2.0.0",
// for splitting sentences
"edu.washington.cs.knowitall.nlptools" %% "nlptools-sentence-opennlp" % "2.4.5",
// for remote components
Expand Down
13 changes: 8 additions & 5 deletions src/main/scala/edu/knowitall/openie/OpenIE.scala
Expand Up @@ -14,24 +14,27 @@ import edu.knowitall.tool.chunk.ChunkedToken
import edu.knowitall.tool.chunk.OpenNlpChunker
import edu.knowitall.tool.parse.DependencyParser
import edu.knowitall.tool.postag.OpenNlpPostagger
import edu.knowitall.tool.postag.ClearPostagger
import edu.knowitall.tool.srl.Srl
import edu.knowitall.tool.stem.MorphaStemmer
import edu.knowitall.tool.tokenize.OpenNlpTokenizer
import edu.knowitall.tool.parse.ClearParser
import edu.knowitall.tool.srl.ClearSrl
import edu.knowitall.tool.postag.ClearPostagger
import edu.knowitall.tool.tokenize.ClearTokenizer

class OpenIE(parser: DependencyParser = new ClearParser(), srl: Srl = new ClearSrl(), triples: Boolean = false) {
class OpenIE(parser: DependencyParser = new ClearParser(), srl: Srl = new ClearSrl(), triples: Boolean = false, includeUnknownArg2: Boolean = false) {
// confidence functions
val srlieConf = SrlConfidenceFunction.loadDefaultClassifier()
val relnounConf = RelnounConfidenceFunction.loadDefaultClassifier()

// sentence pre-processors
val tokenizer = new OpenNlpTokenizer()
val postagger = new OpenNlpPostagger(tokenizer)
val chunkerOIE = new OpenNlpChunker(postagger)
val tokenizer = new ClearTokenizer()
val postagger = new ClearPostagger(tokenizer)
val chunkerOIE = new OpenNlpChunker(postagger)

// subextractors
val relnoun = new Relnoun
val relnoun = new Relnoun(true, true, includeUnknownArg2)
val srlie = new SrlExtractor(srl)


Expand Down
10 changes: 7 additions & 3 deletions src/main/scala/edu/knowitall/openie/OpenIECli.scala
Expand Up @@ -93,7 +93,8 @@ object OpenIECli extends App {
ignoreErrors: Boolean = false,
showUsage: Boolean = false,
binary: Boolean = false,
split: Boolean = false) {
split: Boolean = false,
includeUnknownArg2: Boolean = false) {

/***
* Create the input source from a file or stdin.
Expand Down Expand Up @@ -156,6 +157,9 @@ object OpenIECli extends App {
flag("ignore-errors", "ignore errors") { config =>
config.copy(ignoreErrors = true)
},
flag("include-unknown-arg2", "includes arg2 [UNKNOWN] extractions from relnoun") { config =>
config.copy(includeUnknownArg2 = true)
},
flag("b", "binary", "binary output") { config =>
config.copy(binary = true)
},
Expand All @@ -174,7 +178,7 @@ object OpenIECli extends App {
case e: MalformedInputException =>
System.err.println(
"\nError: a MalformedInputException was thrown.\n" +
"This usually means there is a mismatch between what Ollie expects and the input file.\n" +
"This usually means there is a mismatch between what is expected and the input file.\n" +
"Try changing the input file's character encoding to UTF-8 or specifying the correct character encoding for the input file with '--encoding'.\n")
e.printStackTrace()
}
Expand All @@ -186,7 +190,7 @@ object OpenIECli extends App {
*/
def run(config: Config) {
// the extractor system
val openie = new OpenIE(parser=config.createParser(), srl=config.createSrl(),config.binary)
val openie = new OpenIE(parser=config.createParser(), srl=config.createSrl(),config.binary, config.includeUnknownArg2)

println("* * * * * * * * * * * * *")
println("* OpenIE 4.1.x is ready *")
Expand Down
4 changes: 2 additions & 2 deletions src/test/scala/edu/knowitall/openie/OpenIESpecTest.scala
Expand Up @@ -15,7 +15,7 @@ class OpenIESpecTest extends FlatSpec with Matchers {
val insts = openie("U.S. president Obama gave a speech")

insts.size should be (2)
insts.map(_.extraction.toString).sorted should be (Seq("(Obama; [is] president [of]; U.S.)", "(U.S. president Obama; gave; a speech)"))
insts.map(_.extraction.toString).sorted should be (Seq("(Obama; [is] president [of]; United States)", "(U.S. president Obama; gave; a speech)"))

// sentence pre-processors
val tokenizertest = new OpenNlpTokenizer()
Expand All @@ -24,7 +24,7 @@ class OpenIESpecTest extends FlatSpec with Matchers {

val inststest = openie.extract("U.S. president Obama gave a speech",chunkertest)
inststest.size should be (2)
inststest.map(_.extraction.toString).sorted should be (Seq("(Obama; [is] president [of]; U.S.)", "(U.S. president Obama; gave; a speech)"))
inststest.map(_.extraction.toString).sorted should be (Seq("(Obama; [is] president [of]; United States)", "(U.S. president Obama; gave; a speech)"))

}
}

0 comments on commit 0b6cb9e

Please sign in to comment.