Skip to content

Commit

Permalink
add warning span parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
jovanny endo committed Mar 11, 2017
1 parent 4e222d9 commit a105b99
Showing 1 changed file with 34 additions and 38 deletions.
72 changes: 34 additions & 38 deletions src/main/scala/jp.sujoyu.ruigoc/Main.scala
Expand Up @@ -7,69 +7,65 @@ import scala.collection.JavaConverters._
import edu.cmu.lti.jawjaw.pobj.POS

object Main {
def main(args: Array[String]): Unit = {
val text = args match {
case Array(t) => t
case _ => println("error: 引数は1つだよ。")
return
}

val posNameToId = Map(
val posNameToId = Map(
"名詞" -> POS.n,
"動詞" -> POS.v
)

def main(args: Array[String]): Unit = {
val (text, warnSpan) = args match {
case Array(t, s) => (t, s.toInt)
case _ => println("error: 引数は2つだよ。")
return
}

val tokenizer = new Tokenizer()
val tokens = tokenizer.tokenize(text).asScala

val targetTokens = tokens.filter(isTarget).toArray

if (targetTokens.length < 10) {
println("error: 名詞、動詞を11個以上書いて出直してきな")
if (targetTokens.length < warnSpan * 2) {
println(s"error: 名詞、動詞を${warnSpan * 2 + 1}個以上書いて出直してきな")
return
}

val warnSpan = 5

val result1 = for {
i <- 0 until targetTokens.length - warnSpan
j <- i to i + warnSpan
} yield {
(targetTokens(i),
containsSynonym(targetTokens(i).getBaseForm,
posNameToId(targetTokens(i).getPartOfSpeechLevel1),
val result = collection.mutable.Map.empty[Token, Int]
var count = 0
for {
i <- 0 until (targetTokens.length - warnSpan)
j <- i + 1 to (i + warnSpan)
if containsSynonym(targetTokens(i).getBaseForm,
targetTokens(i).getPartOfSpeechLevel1,
targetTokens(j).getBaseForm,
posNameToId(targetTokens(j).getPartOfSpeechLevel1)))
targetTokens(j).getPartOfSpeechLevel1)
} {
if (result.contains(targetTokens(i))) {
val id = result(targetTokens(i))
result(targetTokens(i)) = id
result(targetTokens(j)) = id
} else {
result(targetTokens(i)) = count
result(targetTokens(j)) = count
count += 1
}
}

val result2 = for {
i <- targetTokens.length - warnSpan * 2 until targetTokens.length - warnSpan
j <- i to i + warnSpan
} yield {
(targetTokens(j),
containsSynonym(targetTokens(i).getBaseForm,
posNameToId(targetTokens(i).getPartOfSpeechLevel1),
targetTokens(j).getBaseForm,
posNameToId(targetTokens(j).getPartOfSpeechLevel1)))
val poss = result.map { case (token, id) =>
Seq(token.getPosition, token.getSurface.length, id)
}

val result = result1 ++ result2

val poss = result.collect {
case (token, true) => Seq(token.getPosition, token.getSurface.length)
}.distinct

println(poss.map(_.mkString("\t")).mkString("\n"))
println(poss.map(_.mkString(",")).mkString("\n"))
}

def isTarget(token: Token): Boolean = {
token.getPartOfSpeechLevel1 == "名詞" || token.getPartOfSpeechLevel1 == "動詞"
}

def containsSynonym(word: String, pos: POS, target: String, tPos: POS): Boolean = {
def containsSynonym(word: String, pos: String, target: String, tPos: String): Boolean = {
pos == tPos &&
(JAWJAW.findHypernyms(word, pos).asScala ++
JAWJAW.findHyponyms(word, pos).asScala ++ Seq(word)).toSeq.distinct.contains(target)
(JAWJAW.findHypernyms(word, posNameToId(pos)).asScala ++
JAWJAW.findHyponyms(word, posNameToId(pos)).asScala ++ Seq(word)).toSeq.contains(target)
}

}
Expand Down

0 comments on commit a105b99

Please sign in to comment.