Skip to content

Commit

Permalink
Merge branch 'master' into mt-preordering-feat
Browse files Browse the repository at this point in the history
  • Loading branch information
sebschu authored and Stanford NLP committed Feb 18, 2015
1 parent f1e1e47 commit ec46b99
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 146 deletions.
Expand Up @@ -161,12 +161,12 @@ public void testBasicMatching() throws Exception {
}

/**
* The LOCATION on Ontario Place should not be overwritten since Ontario (STATE_OR_PROVINCE)
* does not span Ontario Place. Native American Church will overwrite ORGANIZATION with
* The LOCATION on Ontario Lake should not be overwritten since Ontario (STATE_OR_PROVINCE)
* does not span Ontario Lake. Native American Church will overwrite ORGANIZATION with
* RELIGION.
*/
public void testOverwrite() throws Exception {
String str = "I like Ontario Place , and I like the Native American Church , too .";
String str = "I like Ontario Lake , and I like the Native American Church , too .";
Annotation document = createDocument(str);
annotator.annotate(document);
List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);
Expand Down
2 changes: 1 addition & 1 deletion scripts/makeSerialized.csh
Expand Up @@ -40,7 +40,7 @@ set ctb=/afs/ir/data/linguistic-data/Chinese-Treebank/6/data/utf8/bracketed
# [(1, 40), (901, 931), (1018, 1018), (1020, 1020), (1036, 1036), (1044, 1044), (1060, 1061), (1072, 1072), (1118, 1119), (1132, 1132), (1141, 1142), (1148, 1148), (2165, 2180), (2295, 2310), (2570, 2602), (2800, 2819)]
set ctb7train=/u/nlp/data/chinese/ctb7/train.mrg
set ctb7test=/u/nlp/data/chinese/ctb7/test.mrg
set negra=/afs/ir/data/linguistic-data/NEGRA/penn-format-train-dev-test
set negra=/u/nlp/data/GermanACL08/negra/penn-format-train-dev-test

set host=`hostname | cut -d. -f1`

Expand Down
7 changes: 7 additions & 0 deletions src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java
Expand Up @@ -1052,11 +1052,18 @@ private void classifyAndWriteAnswers(Collection<List<IN>> documents,
IOUtils.encodedOutputStreamPrintWriter(System.out, flags.outputEncoding, true), readerWriter);
}

/** Does nothing by default. Children classes can override if necessary */
public void dumpFeatures(Collection<List<IN>> documents) {}

public void classifyAndWriteAnswers(Collection<List<IN>> documents,
PrintWriter printWriter,
DocumentReaderAndWriter<IN> readerWriter)
throws IOException
{
if (flags.exportFeatures != null) {
dumpFeatures(documents);
}

Timing timer = new Timing();

Counter<String> entityTP = new ClassicCounter<String>();
Expand Down
18 changes: 13 additions & 5 deletions src/edu/stanford/nlp/ie/crf/CRFClassifier.java
Expand Up @@ -1101,6 +1101,18 @@ private double[] makeDatumUsingEmbedding(List<IN> info, int loc, List<FeatureFac
return featureValArr;
}

@Override
public void dumpFeatures(Collection<List<IN>> docs) {
if (flags.exportFeatures != null) {
Timing timer = new Timing();
timer.start();
CRFFeatureExporter<IN> featureExporter = new CRFFeatureExporter<IN>(this);
featureExporter.printFeatures(flags.exportFeatures, docs);
long elapsedMs = timer.stop();
System.err.println("Time to export features: " + Timing.toSecondsString(elapsedMs) + " seconds");
}
}

@Override
public List<IN> classify(List<IN> document) {
if (flags.doGibbs) {
Expand Down Expand Up @@ -1599,11 +1611,7 @@ public void train(Collection<List<IN>> objectBankWrapper, DocumentReaderAndWrite
}

if (flags.exportFeatures != null) {
timer.start();
CRFFeatureExporter<IN> featureExporter = new CRFFeatureExporter<IN>(this);
featureExporter.printFeatures(flags.exportFeatures, docs);
elapsedMs = timer.stop();
System.err.println("Time to export features: " + Timing.toSecondsString(elapsedMs) + " seconds");
dumpFeatures(docs);
}

for (int i = 0; i <= flags.numTimesPruneFeatures; i++) {
Expand Down
5 changes: 4 additions & 1 deletion src/edu/stanford/nlp/ie/crf/CRFFeatureExporter.java
Expand Up @@ -8,6 +8,7 @@

import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
Expand Down Expand Up @@ -79,7 +80,9 @@ private String getFeatureString(List<IN> document) {

List<List<String>> features = d.asFeatures();
for (Collection<String> cliqueFeatures : features) {
for (String feat : cliqueFeatures) {
List<String> sortedFeatures = new ArrayList<String>(cliqueFeatures);
Collections.sort(sortedFeatures);
for (String feat : sortedFeatures) {
feat = ubPrefixFeatureString(feat);
sb.append(delimiter);
sb.append(feat);
Expand Down
5 changes: 4 additions & 1 deletion src/edu/stanford/nlp/sentiment/SentimentPipeline.java
Expand Up @@ -38,8 +38,11 @@
* <code>-parserModel</code> Which parser model to use, defaults to englishPCFG.ser.gz <br>
* <code>-sentimentModel</code> Which sentiment model to use, defaults to sentiment.ser.gz <br>
* <code>-file</code> Which file to process. <br>
* <code>-fileList</code> A comma separated list of files to process. <br>
* <code>-stdin</code> Read one line at a time from stdin. <br>
* <code>-output</code> pennTrees: Output trees with scores at each binarized node. vectors: Number tree nodes and print out the vectors. Defaults to printing just the root. <br>
* <code>-output</code> pennTrees: Output trees with scores at each binarized node. vectors: Number tree nodes and print out the vectors. probabilities: Output the scores for different labels for each node. Defaults to printing just the root. <br>
* <code>-filterUnknown</code> remove unknown trees from the input. Only applies to TREES input, in which case the trees must be binarized with sentiment labels <br>
* <code>-help</code> Print out help <br>
*
* @author John Bauer
*/
Expand Down
135 changes: 0 additions & 135 deletions src/edu/stanford/nlp/tagger/util/CountTagSequences.java

This file was deleted.

0 comments on commit ec46b99

Please sign in to comment.