Skip to content
This repository
Browse code

Added NLP support from Stanford for tagging POS input from player

  • Loading branch information...
commit 042219e9961bfe04b5a2702b3bde8a22753262fd 1 parent b2e3ebc
Kevin Hoffman authored February 14, 2012
BIN  models/english-bidirectional-distsim.tagger
Binary file not shown
33  models/english-bidirectional-distsim.tagger.props
... ...
@@ -0,0 +1,33 @@
  1
+## tagger training invoked at Thu Dec 15 01:17:19 PST 2011 with arguments:
  2
+                   model = english-bidirectional-distsim.tagger
  3
+                    arch = bidirectional5words,naacl2003unknowns,allwordshapes(-1,1),distsim(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),distsimconjunction(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1)
  4
+               trainFile = /u/nlp/data/pos-tagger/english/train-wsj-0-18;/u/nlp/data/pos-tagger/english/train-extra-english
  5
+         closedClassTags = 
  6
+ closedClassTagThreshold = 40
  7
+ curWordMinFeatureThresh = 2
  8
+                   debug = false
  9
+             debugPrefix = 
  10
+            tagSeparator = _
  11
+                encoding = UTF-8
  12
+              iterations = 100
  13
+                    lang = english
  14
+    learnClosedClassTags = false
  15
+        minFeatureThresh = 2
  16
+           openClassTags = 
  17
+rareWordMinFeatureThresh = 5
  18
+          rareWordThresh = 5
  19
+                  search = owlqn
  20
+                    sgml = false
  21
+            sigmaSquared = 0.5
  22
+                   regL1 = 0.75
  23
+               tagInside = 
  24
+                tokenize = true
  25
+        tokenizerFactory = 
  26
+        tokenizerOptions = 
  27
+                 verbose = false
  28
+          verboseResults = true
  29
+    veryCommonWordThresh = 250
  30
+                xmlInput = 
  31
+              outputFile = 
  32
+            outputFormat = slashTags
  33
+     outputFormatOptions = 
8  pom.xml
@@ -33,9 +33,14 @@
33 33
             <version>2.9.1</version>
34 34
         </dependency>
35 35
 		<dependency>
  36
+		    <groupId>edu.stanford.nlp</groupId>
  37
+		    <artifactId>stanford-corenlp</artifactId>
  38
+		    <version>1.3.0</version>
  39
+		</dependency>
  40
+		<dependency>
36 41
 		  <groupId>com.typesafe.akka</groupId>
37 42
 		  <artifactId>akka-actor</artifactId>
38  
-		  <version>2.0-M3</version>
  43
+		  <version>2.0-RC1</version>
39 44
 		</dependency>
40 45
         <dependency>
41 46
             <groupId>junit</groupId>
@@ -49,6 +54,7 @@
49 54
             <plugin>
50 55
                 <groupId>org.scala-tools</groupId>
51 56
                 <artifactId>maven-scala-plugin</artifactId>
  57
+				<version>2.15.2</version>
52 58
                 <executions>
53 59
                     <execution>
54 60
                         <goals>
36  src/main/scala/core/Commander.scala
... ...
@@ -0,0 +1,36 @@
  1
+package com.kotancode.scalamud.core
  2
+
  3
+import akka.actor._
  4
+import akka.routing._
  5
+
  6
+import com.kotancode.scalamud.core.lang.EnrichedWord
  7
+import java.util.ArrayList
  8
+import edu.stanford.nlp.ling.Sentence
  9
+import edu.stanford.nlp.ling.TaggedWord
  10
+import edu.stanford.nlp.ling.HasWord
  11
+import edu.stanford.nlp.tagger.maxent.MaxentTagger
  12
+import scala.collection.JavaConverters._
  13
+
  14
+class Commander extends Actor {
  15
+	def receive = {
  16
+		case s:String => {
  17
+			val words = s.split(" ");
  18
+			val wordList = new java.util.ArrayList[String]();
  19
+			for (elem <- words) wordList.add(elem)
  20
+		    val sentence = Sentence.toWordList(wordList);
  21
+		    val taggedSentence = Commander.tagger.tagSentence(sentence).asScala.toList
  22
+		
  23
+			var enrichedWords = new ArrayList[EnrichedWord]
  24
+		    for (tw : TaggedWord <- taggedSentence) {
  25
+		//		println(tw.value + "/" + tw.tag)
  26
+				val ew = EnrichedWord(tw)
  27
+				println(ew)
  28
+				enrichedWords.add(ew)
  29
+			}
  30
+	}
  31
+  }
  32
+}
  33
+
  34
+object Commander {
  35
+	val tagger = new MaxentTagger("models/english-bidirectional-distsim.tagger")
  36
+}
2  src/main/scala/core/Player.scala
@@ -14,6 +14,7 @@ case class TextMessage(message:String)
14 14
 class Player extends Actor {
15 15
 	private var inReader: BufferedReader = null
16 16
 	private var outWriter: PrintWriter = null
  17
+	private val commander = context.actorOf(Props(new Commander), "commander")
17 18
 	
18 19
  	implicit def inputStreamWrapper(in: InputStream) =
19 20
   		new BufferedReader(new InputStreamReader(in))
@@ -59,6 +60,7 @@ class Player extends Actor {
59 60
 	   Game.server ! PlayerLoggedIn
60 61
 	   while (true) {
61 62
 		   val line = inReader.readLine()
  63
+		   commander ! line
62 64
 		   outWriter.println(playerName + ": " + line)
63 65
 		   outWriter.flush()
64 66
 	   }	
36  src/main/scala/core/lang/EnrichedWord.scala
... ...
@@ -0,0 +1,36 @@
  1
+package com.kotancode.scalamud.core.lang
  2
+
  3
+import edu.stanford.nlp.ling.TaggedWord
  4
+
  5
+case class PartOfSpeech
  6
+case object Noun extends PartOfSpeech
  7
+case object Verb extends PartOfSpeech
  8
+case object Adjective extends PartOfSpeech
  9
+case object DontCare extends PartOfSpeech
  10
+
  11
+class EnrichedWord(value:String, tag:String) extends TaggedWord(value, tag) {	
  12
+	private var _pos: PartOfSpeech = _
  13
+	
  14
+	def pos = _pos
  15
+	def pos_= (value:PartOfSpeech):Unit = _pos = value 
  16
+	
  17
+	
  18
+	override def toString = "[EnrichedWord: word=" + value +", tag=" + tag + ", pos=" + pos + "]"
  19
+}
  20
+
  21
+object EnrichedWord {
  22
+	def apply(hw: TaggedWord) = {
  23
+		val ew = new EnrichedWord(hw.value, hw.tag)		
  24
+		ew.pos = rootTypeOf(ew.tag)
  25
+		ew
  26
+	}
  27
+	
  28
+	def rootTypeOf(s:String) = {
  29
+		s match {
  30
+			case "VB" | "VBD" | "VBG" | "VBN" | "VBP" | "VBZ" => Verb
  31
+			case  "NN" | "NNS" | "NNP" | "NNPS" => Noun
  32
+			case "JJ" | "JJR" | "JJS" => Adjective
  33
+			case _ => DontCare
  34
+		}
  35
+	}
  36
+}

0 notes on commit 042219e

Please sign in to comment.
Something went wrong with that request. Please try again.