Skip to content

Commit

Permalink
Adding bug fixes and updating mains for adding relations
Browse files Browse the repository at this point in the history
  • Loading branch information
fozziethebeat committed Sep 16, 2011
1 parent 456d639 commit 5de7863
Show file tree
Hide file tree
Showing 21 changed files with 614 additions and 162 deletions.
Binary file modified lib/sspace-lib.jar
Binary file not shown.
25 changes: 20 additions & 5 deletions util/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,33 @@

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

<!-- testing -->
<cobertura.version>2.4</cobertura.version>
<junit.version>4.7</junit.version>

<!-- util only -->
<ant.version>1.6.5</ant.version>
<commons.version>3.0.1</commons.version>
<gson.version>1.7.1</gson.version>
<guava.version>r09</guava.version>
<hadoop.version>0.20.2</hadoop.version>
<liblinear.version>1.7</liblinear.version>
<libsvm.version>1.0</libsvm.version>
<log4j.version>1.2.9</log4j.version>
<malt.version>1.5.3</malt.version>
<opennlp.version>1.5.1-incubating</opennlp.version>
<sspace.version>2.0</sspace.version>
<stanford.version>2010-11-12</stanford.version>
<text.version>1.5.2</text.version>
</properties>

<dependencies>
<!-- our modules -->
<dependency>
<artifactId>extendOntology-data</artifactId>
<groupId>${project.groupId}</groupId>
<version>${project.version}</version>
</dependency>

<!-- third party -->
Expand Down Expand Up @@ -102,11 +122,6 @@
<artifactId>log4j</artifactId>
<version>${log4j.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>${hadoop.version}</version>
</dependency>
</dependencies>

<build>
Expand Down
2 changes: 1 addition & 1 deletion util/src/main/java/gov/llnl/ontology/text/Sentence.java
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ public DependencyTreeNode[] dependencyParseTree() {
*/
public StringPair[] taggedTokens() {
StringPair[] taggedTokens = new StringPair[tokenAnnotations.length];
for (int i = 0; i < taggedTokens.length; ++i)
for (int i = 0; i < taggedTokens.length; ++i)
taggedTokens[i] = new StringPair(
AnnotationUtil.word(tokenAnnotations[i]),
AnnotationUtil.pos(tokenAnnotations[i]));
Expand Down
7 changes: 7 additions & 0 deletions wordnet/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

<!-- general -->
<opennlp.version>1.5.1-incubating</opennlp.version>
<guava.version>r09</guava.version>
<logging.version>1.1.1</logging.version>
<sspace.version>2.0</sspace.version>
<stanford.version>2010-11-12</stanford.version>
</properties>

<dependencies>
Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import gov.llnl.ontology.text.Sentence;
import gov.llnl.ontology.text.corpora.SenseEvalAllWordsDocumentReader;
import gov.llnl.ontology.util.AnnotationUtil;
import gov.llnl.ontology.util.StringPair;
import gov.llnl.ontology.wordnet.OntologyReader;
import gov.llnl.ontology.wordnet.WordNetCorpusReader;
import gov.llnl.ontology.wordnet.wsd.WordSenseDisambiguation;
import gov.llnl.ontology.text.tag.OpenNlpMEPOSTagger;

import com.google.common.collect.Sets;

Expand All @@ -15,10 +15,12 @@
import edu.stanford.nlp.pipeline.Annotation;

import edu.ucla.sspace.util.ReflectionUtil;
import edu.ucla.sspace.util.WorkQueue;

import opennlp.tools.postag.POSTagger;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.PrintWriter;
import java.util.List;
import java.util.Set;


Expand All @@ -31,64 +33,86 @@ public static void main(String[] args) throws Exception {
String dataDir = args[0];
String algName = args[1];
String testCorpusName = args[2];
String outputPostFix = args[3];

POSTagger tagger = new OpenNlpMEPOSTagger();
String taggedCorpus = args[3];
String outputPostFix = args[4];

OntologyReader wordnet = WordNetCorpusReader.initialize(dataDir);

WordSenseDisambiguation wsd = ReflectionUtil.getObjectInstance(algName);
final WordSenseDisambiguation wsd = ReflectionUtil.getObjectInstance(algName);
wsd.setup(wordnet);

SenseEvalAllWordsDocumentReader reader =
new SenseEvalAllWordsDocumentReader();
reader.parse(testCorpusName);

PrintWriter writer = new PrintWriter(wsd.toString() + outputPostFix);
String prevId = null;
for (Sentence sentence : reader.sentences()) {
BufferedReader br = new BufferedReader(new FileReader(taggedCorpus));
String sentLine = br.readLine();
String tokenLine = br.readLine();
List<Sentence> taggedSentences = Sentence.readSentences(sentLine, tokenLine);
List<Sentence> sentences = reader.sentences();

final String[] output = new String[sentences.size()];
int s = 0;
WorkQueue workQueue = new WorkQueue();
Object key = workQueue.registerTaskGroup(sentences.size());
for (final Sentence sentence : sentences) {
final Set<Integer> focusIndices = Sets.newHashSet();
Sentence taggedSent = taggedSentences.get(s);
System.err.println("Reading sentence: " + s);
StringPair[] tags = taggedSent.taggedTokens();

int i = 0;
Set<Integer> focusIndices = Sets.newHashSet();
String[] tokens = new String[sentence.numTokens()];
for (Annotation annot : sentence) {
tokens[i] = AnnotationUtil.word(annot);
if (tokens[i].indexOf(" ") != -1)
tokens[i] = annot.get(StemAnnotation.class);

if (annot.get(ValueAnnotation.class) != null) {
if (annot.get(ValueAnnotation.class) != null)
focusIndices.add(i);
}
++i;
AnnotationUtil.setPos(annot, tags[i++].y);
}

i = 0;
String[] tags = tagger.tag(tokens);
for (Annotation annot : sentence)
AnnotationUtil.setPos(annot, tags[i++]);

i = 0;
Sentence disambiguated = wsd.disambiguate(sentence, focusIndices);
for (Annotation annot : disambiguated) {
String id = sentence.getAnnotation(i).get(
ValueAnnotation.class);
if (id != null) {
String sense = AnnotationUtil.wordSense(annot);
if (sense == null)
sense = "U";

if (prevId == null)
writer.printf("%s %s", id, sense);
else if (prevId.equals(id))
writer.printf(" %s", sense);
else
writer.printf("\n%s %s", id, sense);
prevId = id;
final int sentenceId = s++;
workQueue.add(key, new Runnable() {
public void run() {
String res = disambiguate(wsd, sentence, focusIndices);
output[sentenceId] = res;
}
});
}
workQueue.await(key);

++i;
PrintWriter writer = new PrintWriter(wsd.toString() + outputPostFix);
for (String out : output)
if (out != null && out.length() > 0)
writer.println(out);
writer.close();
}


public static String disambiguate(WordSenseDisambiguation wsd,
Sentence sentence,
Set<Integer>focusIndices) {
int i = 0;
Sentence disambiguated = wsd.disambiguate(sentence, focusIndices);
System.err.println("Disambiguated a sentence");
StringBuilder sb = new StringBuilder();
String prevId = null;
for (Annotation annot : disambiguated) {
String id = sentence.getAnnotation(i).get(
ValueAnnotation.class);
if (id != null) {
String sense = AnnotationUtil.wordSense(annot);
if (sense == null)
sense = "U";

if (prevId == null)
sb.append(id).append(" ").append(sense);
else if (prevId.equals(id))
sb.append(" ").append(sense);
else
sb.append("\n").append(id).append(" ").append(sense);
prevId = id;
}
++i;
}
writer.close();
return sb.toString();
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package gov.llnl.ontology.mains;

import gov.llnl.ontology.text.Sentence;
import gov.llnl.ontology.text.corpora.SenseEvalAllWordsDocumentReader;
import gov.llnl.ontology.util.AnnotationUtil;
import gov.llnl.ontology.util.StringPair;
import gov.llnl.ontology.text.tag.OpenNlpMEPOSTagger;

import com.google.common.collect.Sets;

import edu.stanford.nlp.ling.CoreAnnotations.StemAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.ValueAnnotation;
import edu.stanford.nlp.pipeline.Annotation;

import edu.ucla.sspace.util.ReflectionUtil;
import edu.ucla.sspace.util.WorkQueue;

import opennlp.tools.postag.POSTagger;

import java.io.PrintWriter;
import java.util.List;
import java.util.Set;


/**
* @author Keith Stevens
*/
public class SaveTaggedSenseTask {

public static void main(String[] args) throws Exception {
String testCorpusName = args[0];
String outputPostFix = args[1];

POSTagger tagger = new OpenNlpMEPOSTagger();

SenseEvalAllWordsDocumentReader reader =
new SenseEvalAllWordsDocumentReader();
reader.parse(testCorpusName);

List<Sentence> sentences = reader.sentences();
final String[] output = new String[sentences.size()];
int s = 0;
WorkQueue workQueue = new WorkQueue();
Object key = workQueue.registerTaskGroup(sentences.size());
for (final Sentence sentence : sentences) {
int i = 0;
final Set<Integer> focusIndices = Sets.newHashSet();
String[] tokens = new String[sentence.numTokens()];
for (Annotation annot : sentence) {
tokens[i] = AnnotationUtil.word(annot);
if (tokens[i].indexOf(" ") != -1)
tokens[i] = annot.get(StemAnnotation.class);

if (annot.get(ValueAnnotation.class) != null) {
focusIndices.add(i);
}
++i;
}

i = 0;
String[] tags = tagger.tag(tokens);
for (Annotation annot : sentence)
AnnotationUtil.setPos(annot, tags[i++]);
}

StringPair lines = Sentence.writeSentences(sentences);
PrintWriter writer = new PrintWriter(outputPostFix);
writer.println(lines.x);
writer.println(lines.y);
writer.close();
}
}
52 changes: 43 additions & 9 deletions wordnet/src/main/java/gov/llnl/ontology/mains/WordnetShell.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@
*/
public class WordnetShell {

public static String join(String[] tokens, int start, int end, String sep) {
StringBuilder sb = new StringBuilder();
for (int i = start; i < end && i < tokens.length; ++i)
sb.append(tokens[i]).append(sep);
return sb.toString().trim();
}

public static void main(String[] args) throws Exception {
OntologyReader wordnet = WordNetCorpusReader.initialize(args[0]);
while (true) {
Expand All @@ -22,15 +29,42 @@ public static void main(String[] args) throws Exception {
System.out.print("> ");
for (String line = null; (line = br.readLine()) != null; ) {
if (line.trim().length() != 0) {
int spaceIndex = line.lastIndexOf(" ");
String word = line.substring(0, spaceIndex).trim();
String p = line.substring(spaceIndex).trim();
PartsOfSpeech pos = PartsOfSpeech.valueOf(
p.toUpperCase());
for (Synset sense : wordnet.getSynsets(word, pos))
System.out.printf("%s %s\n",
sense.getName(),
sense.getSenseKey(word));
String[] tokens = line.split("\\s+");
String command = tokens[0];
if (command.equals("gs")) {
if (tokens.length == 2) {
Synset s = wordnet.getSynset(tokens[1]);
System.out.printf("%s %s\n",
s.getName(), s.getSenseKey());
} else {
String word = join(tokens, 1, tokens.length-1, " ");
PartsOfSpeech pos = PartsOfSpeech.valueOf(
tokens[tokens.length-1].toUpperCase());
System.out.println(word);
for (Synset sense : wordnet.getSynsets(word, pos))
System.out.printf("%s %s\n",
sense.getName(),
sense.getSenseKey(word));
}
} else if (command.equals("gr")) {
String word = tokens[1];
Synset synset = wordnet.getSynset(word);
if (tokens.length == 2) {
for (Synset related : synset.allRelations())
System.out.printf("%s -> %s\n",
word, related.getName());
} else {
for (Synset related : synset.getRelations(tokens[2]))
System.out.printf("%s -> %s\n",
word, related.getName());
}
} else if (command.equals("help")) {
System.out.println("get senses: gs word pos");
System.out.println("get senses: gs word.pos.#");
System.out.println("get relations: gr sense.pos.#");
System.out.println("get relations: gr sense.pos.# relation");
System.out.println("help: help");
}
}
System.out.print("> ");
}
Expand Down
Loading

0 comments on commit 5de7863

Please sign in to comment.