Skip to content

Commit

Permalink
Revert "Minor cleanup to old CMMClassifier - final, etc."
Browse files Browse the repository at this point in the history
  • Loading branch information
manning authored and Stanford NLP committed Oct 10, 2015
1 parent 4c2b0c2 commit c0c635e
Show file tree
Hide file tree
Showing 83 changed files with 3,498 additions and 5,758 deletions.
2 changes: 1 addition & 1 deletion doc/loglinear/QUICKSTART.txt
Expand Up @@ -2,7 +2,7 @@ loglinear package quickstart:


First, read the ConcatVector section in ARCH.txt. First, read the ConcatVector section in ARCH.txt.


To jump straight into working code, go read generateSentenceModel() in edu.stanford.nlp.loglinear.CoNLLBenchmark. To jump straight into working code, go read generateSentenceModel() in edu.stanford.nlp.loglinear.learning.CoNLLBenchmark.


##################################################### #####################################################


Expand Down
2 changes: 1 addition & 1 deletion doc/loglinear/README.txt
@@ -1,6 +1,6 @@
For an explanation of how everything fits together, see ARCH.txt For an explanation of how everything fits together, see ARCH.txt


For a quick runnable object, go run edu.stanford.nlp.loglinear.CoNLLBenchmark in core's test package. For a quick runnable object, go run edu.stanford.nlp.loglinear.learning.CoNLLBenchmark in core's test package.


For a tutorial, see QUICKSTART.txt For a tutorial, see QUICKSTART.txt


Expand Down
135 changes: 0 additions & 135 deletions itest/src/edu/stanford/nlp/ie/qe/QuantifiableEntityExtractorITest.java

This file was deleted.

@@ -1,21 +1,21 @@
package edu.stanford.nlp.ling.tokensregex; package edu.stanford.nlp.ling.tokensregex;


import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.*; import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Pair; import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Timing; import edu.stanford.nlp.util.Timing;
import junit.framework.TestCase; import junit.framework.TestCase;


import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.regex.Pattern; import java.util.regex.Pattern;


public class TokenSequenceMatcherITest extends TestCase { public class TokenSequenceMatcherITest extends TestCase {
Expand Down Expand Up @@ -94,50 +94,6 @@ public void testTokenSequenceMatcherValue() throws IOException {
assertFalse(match); assertFalse(match);
} }


public void testTokenSequenceMatcherBeginEnd() throws IOException {
CoreMap doc = createDocument(testText);

// Test simple sequence with begin sequence matching
TokenSequencePattern p = TokenSequencePattern.compile("^ [] []");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));

boolean match = m.find();
assertTrue(match);
assertEquals("the number", m.group());

match = m.find();
assertFalse(match);

// Test simple sequence with end sequence matching
p = TokenSequencePattern.compile("[] [] $");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));

match = m.find();
assertTrue(match);
assertEquals("fifty.", m.group());

match = m.find();
assertFalse(match);

// Test simple sequence with begin and end sequence matching
p = TokenSequencePattern.compile("^ [] [] $");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));

match = m.find();
assertFalse(match);

// Test simple sequence with ^$ in a string regular expression
p = TokenSequencePattern.compile("/^number$/");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));

match = m.find();
assertTrue(match);
assertEquals("number", m.group());

match = m.find();
assertFalse(match);
}

private static final String testText1 = "Mellitus was the first Bishop of London, the third Archbishop of Canterbury, and a member of the Gregorian mission sent to England to convert the Anglo-Saxons. He arrived in 601 AD, and was consecrated as Bishop of London in 604."; private static final String testText1 = "Mellitus was the first Bishop of London, the third Archbishop of Canterbury, and a member of the Gregorian mission sent to England to convert the Anglo-Saxons. He arrived in 601 AD, and was consecrated as Bishop of London in 604.";
public void testTokenSequenceMatcher1() throws IOException { public void testTokenSequenceMatcher1() throws IOException {
CoreMap doc = createDocument(testText1); CoreMap doc = createDocument(testText1);
Expand Down Expand Up @@ -223,7 +179,7 @@ public void testTokenSequenceMatcher1() throws IOException {
match = m.find(); match = m.find();
assertTrue(match); assertTrue(match);
assertEquals(0, m.groupCount()); assertEquals(0, m.groupCount());
assertEquals("London in 604.", m.group()); assertEquals("London in 604 .", m.group());
match = m.find(); match = m.find();
assertFalse(match); assertFalse(match);
} }
Expand Down Expand Up @@ -479,31 +435,6 @@ public void testTokenSequenceMatcherConj() throws IOException {
assertFalse(match); assertFalse(match);
} }


public void testTokenSequenceMatcherConj2() throws IOException {
String content = "The cat is sleeping on the floor.";
String greedyPattern = "(?: ([]* cat []*) & ([]* sleeping []*))";

TokenizerFactory tf = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
List<CoreLabel> tokens = tf.getTokenizer(new StringReader(content)).tokenize();
TokenSequencePattern seqPattern = TokenSequencePattern.compile(greedyPattern);
TokenSequenceMatcher matcher = seqPattern.getMatcher(tokens);

boolean entireMatch = matcher.matches();
assertTrue(entireMatch);

boolean match = matcher.find();
assertTrue(match);
assertEquals("The cat is sleeping on the floor.", matcher.group());

String reluctantPattern = "(?: ([]*? cat []*?) & ([]*? sleeping []*?))";
TokenSequencePattern seqPattern2 = TokenSequencePattern.compile(reluctantPattern);
TokenSequenceMatcher matcher2 = seqPattern2.getMatcher(tokens);

match = matcher2.find();
assertTrue(match);
assertEquals("The cat is sleeping", matcher2.group());
}

public void testTokenSequenceMatcherConjAll() throws IOException { public void testTokenSequenceMatcherConjAll() throws IOException {
CoreMap doc = createDocument(testText1); CoreMap doc = createDocument(testText1);
TokenSequencePattern p = TokenSequencePattern.compile( TokenSequencePattern p = TokenSequencePattern.compile(
Expand Down Expand Up @@ -1048,7 +979,7 @@ public void testTokenSequenceOptimizeOrString() throws IOException {
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class)); TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find(); boolean match = m.find();
assertTrue(match); assertTrue(match);
assertEquals("atropine we need to have many many words here but we don't sweating", m.group(0)); assertEquals("atropine we need to have many many words here but we do n't sweating", m.group(0));


match = m.find(); match = m.find();
assertFalse(match); assertFalse(match);
Expand All @@ -1074,7 +1005,7 @@ public void testMultiplePatterns() throws IOException {
CoreMap doc = createDocument("atropine we need to have many many words here but we don't sweating"); CoreMap doc = createDocument("atropine we need to have many many words here but we don't sweating");
MultiPatternMatcher<CoreMap> multiPatternMatcher = TokenSequencePattern.getMultiPatternMatcher(p1, p2); MultiPatternMatcher<CoreMap> multiPatternMatcher = TokenSequencePattern.getMultiPatternMatcher(p1, p2);
List<String> expected = new ArrayList<String>(); List<String> expected = new ArrayList<String>();
expected.add("atropine we need to have many many words here but we don't sweating"); expected.add("atropine we need to have many many words here but we do n't sweating");
Iterator<String> expectedIter = expected.iterator(); Iterator<String> expectedIter = expected.iterator();


Iterable<SequenceMatchResult<CoreMap>> matches = Iterable<SequenceMatchResult<CoreMap>> matches =
Expand Down Expand Up @@ -1256,7 +1187,7 @@ public void testTokenSequenceMatcherNumber() throws IOException {
match = m.find(); match = m.find();
assertTrue(match); assertTrue(match);
assertEquals(0, m.groupCount()); assertEquals(0, m.groupCount());
assertEquals("January 3, 2002", m.group()); assertEquals("January 3 , 2002", m.group());
match = m.find(); match = m.find();
assertFalse(match); assertFalse(match);


Expand All @@ -1265,7 +1196,7 @@ public void testTokenSequenceMatcherNumber() throws IOException {
match = m.find(); match = m.find();
assertTrue(match); assertTrue(match);
assertEquals(0, m.groupCount()); assertEquals(0, m.groupCount());
assertEquals("January 3, 2002", m.group()); assertEquals("January 3 , 2002", m.group());
match = m.find(); match = m.find();
assertFalse(match); assertFalse(match);


Expand Down Expand Up @@ -1473,32 +1404,6 @@ public void testTokenSequenceMatcherMultiNodePattern() throws IOException {
assertFalse(match); assertFalse(match);
} }


public void testTokenSequenceMatcherMultiNodePattern2() throws IOException {
CoreMap doc = createDocument("Replace the lamp with model wss.32dc55c3e945384dbc5e533ab711fd24");

// Greedy
TokenSequencePattern p = TokenSequencePattern.compile("/model/ ((?m){1,4}/\\w+\\.\\w+/)");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("model wss.32dc55c3e945384dbc5e533ab711fd24", m.group());
assertEquals("wss.32dc55c3e945384dbc5e533ab711fd24", m.group(1));
match = m.find();
assertFalse(match);

// Reluctant
p = TokenSequencePattern.compile("/model/ ((?m){1,4}?/\\w+\\.\\w+/)");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("model wss.32", m.group());
assertEquals("wss.32", m.group(1));
match = m.find();
assertFalse(match);
}

public void testTokenSequenceMatcherBackRef() throws IOException { public void testTokenSequenceMatcherBackRef() throws IOException {
CoreMap doc = createDocument("A A A A A A A B A A B A C A E A A A A A A A A A A A B A A A"); CoreMap doc = createDocument("A A A A A A A B A A B A C A E A A A A A A A A A A A B A A A");


Expand Down Expand Up @@ -1583,18 +1488,17 @@ public void testCompile() {
//assertEquals(m.group(), "matching this"); //assertEquals(m.group(), "matching this");
} }


public void testBindingCompile(){ //This DOES NOT work right now!!
Env env = TokenSequencePattern.getNewEnv(); // public void testCompile2(){
env.bind("wordname",CoreAnnotations.TextAnnotation.class);
String s = "[wordname:\"name\"]{1,2}";
TokenSequencePattern p = TokenSequencePattern.compile(env, s);
}

// // This does not work!!!
// public void testNoBindingCompile(){
// Env env = TokenSequencePattern.getNewEnv(); // Env env = TokenSequencePattern.getNewEnv();
// env.bind("wordname",CoreAnnotations.TextAnnotation.class);
// String s = "[" + CoreAnnotations.TextAnnotation.class.getName()+":\"name\"]{1,2}"; // String s = "[" + CoreAnnotations.TextAnnotation.class.getName()+":\"name\"]{1,2}";
// TokenSequencePattern p = TokenSequencePattern.compile(env, s); // TokenSequencePattern p = TokenSequencePattern.compile(env, s);
// for(Map.Entry<String, Object> vars: env.getVariables().entrySet()){
// if(vars.getValue().equals(CoreAnnotations.TextAnnotation.class)){
// System.out.println("Found " + vars.getKey() + " binding for " + vars.getValue());
// }
// }
// } // }


public void testCaseInsensitive1(){ public void testCaseInsensitive1(){
Expand Down
Expand Up @@ -46,15 +46,15 @@ public void testDependencyParserEnglishSD() {
} }


// Lower because we're evaluating on PTB + extraDevTest, not just PTB // Lower because we're evaluating on PTB + extraDevTest, not just PTB
private static final double EnglishUdLas = 88.72648417258083; private static final double EnglishUdLas = 84.9873;


/** /**
* Test that the NN dependency parser performance doesn't change. * Test that the NN dependency parser performance doesn't change.
*/ */
public void testDependencyParserEnglishUD() { public void testDependencyParserEnglishUD() {
DependencyParser parser = new DependencyParser(); DependencyParser parser = new DependencyParser();
parser.loadModelFile("/u/nlp/data/depparser/nn/distrib-2015-04-16/english_UD.gz"); parser.loadModelFile("/u/nlp/data/depparser/nn/distrib-2015-04-16/english_UD.gz");
double las = parser.testCoNLL("/u/nlp/data/depparser/nn/data/dependency_treebanks/UD-converted/dev.conll", null); double las = parser.testCoNLL("/u/nlp/data/depparser/nn/data/dependency_treebanks/USD/dev.conll", null);
assertEquals(String.format("English UD LAS should be %.2f but was %.2f", assertEquals(String.format("English UD LAS should be %.2f but was %.2f",
EnglishUdLas, las), EnglishUdLas, las, 1e-4); EnglishUdLas, las), EnglishUdLas, las, 1e-4);
} }
Expand Down

0 comments on commit c0c635e

Please sign in to comment.