Skip to content

Commit

Permalink
Refactored test, now the grobid home is taken either from GROBID_HOME…
Browse files Browse the repository at this point in the history
…, or from ../grobid-home or ../../grobid-home.

Moved test of the sax parser in the specific class
Added logger configuration (in console)
Added some training data for evaluation (to be corrected)
  • Loading branch information
lfoppiano committed Aug 26, 2016
1 parent 753b93e commit e0b74d6
Show file tree
Hide file tree
Showing 10 changed files with 54,452 additions and 102 deletions.
2,121 changes: 2,121 additions & 0 deletions grobid-ner/resources/dataset/ner/evaluation/Wikipedia_worldWarZ.training.txt

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

33,283 changes: 33,283 additions & 0 deletions grobid-ner/resources/dataset/ner/evaluation/todo/Wikipedia_holocaust.training.txt

Large diffs are not rendered by default.

14 changes: 14 additions & 0 deletions grobid-ner/src/main/resources/log4j.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd" >
<log4j:configuration>
<appender name="CONSOLE" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{dd MMM yyyy HH:mm.ss} [%-5p] %-25c{1} - %m\n"/>
</layout>
</appender>

<root>
<priority value="DEBUG"></priority>
<appender-ref ref="CONSOLE"/>
</root>
</log4j:configuration>
30 changes: 25 additions & 5 deletions grobid-ner/src/test/java/org/grobid/core/EngineMockTest.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
package org.grobid.core;

import org.grobid.core.engines.Engine;
import org.grobid.core.exceptions.GrobidPropertyException;
import org.grobid.core.factory.GrobidFactory;
import org.grobid.core.mock.MockContext;
import org.grobid.core.utilities.GrobidProperties;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.commons.lang3.StringUtils.isEmpty;

public abstract class EngineMockTest {
private static Logger LOGGER = LoggerFactory.getLogger(EngineMockTest.class);
protected static Engine engine;

@AfterClass
Expand All @@ -17,14 +23,28 @@ public static void destroyInitialContext() throws Exception {

@BeforeClass
public static void initInitialContext() throws Exception {
try {
MockContext.setInitialContext("../../grobid-home");
} catch (Exception e) {
String grobidHome = System.getenv("GROBID_HOME");
if (!isEmpty(grobidHome)) {
GrobidProperties.set_GROBID_HOME_PATH(grobidHome);
GrobidProperties.setGrobidPropertiesPath(grobidHome + "/config/grobid.properties");
} else {
try {
LOGGER.trace("Trying grobid home from the usual location at ../grobid-home ");
GrobidProperties.set_GROBID_HOME_PATH("../grobid-home");
GrobidProperties.setGrobidPropertiesPath("../grobid-home/config/grobid.properties");
} catch (GrobidPropertyException gpe) {
LOGGER.error("Grobid HOME not found, trying to fish it from ../../grobid-home ");
try {
GrobidProperties.set_GROBID_HOME_PATH("../../grobid-home");
GrobidProperties.setGrobidPropertiesPath("../../grobid-home/config/grobid.properties");
} catch (GrobidPropertyException gpe2) {
LOGGER.error("Grobid HOME at ../../grobid-home not found, set the environment variable GROBID_HOME");
}
}
}

GrobidProperties.set_GROBID_HOME_PATH("../../grobid-home");
GrobidProperties.setGrobidPropertiesPath("../../grobid-home/config/grobid.properties");
GrobidProperties.getInstance();
MockContext.setInitialContext();
engine = GrobidFactory.getInstance().createEngine();
}
}
97 changes: 0 additions & 97 deletions grobid-ner/src/test/java/org/grobid/trainer/AssemblerTest.java

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package org.grobid.trainer.sax;

import org.junit.Test;

import java.util.ArrayList;
import java.util.List;

import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;

/**
* Created by lfoppiano on 25/08/16.
*/
public class ReutersSaxHandlerTest {

@Test
public void testRetokenize_1() throws Exception {
List<String> tokens = new ArrayList<String>();
tokens.add("around");
tokens.add(" ");
tokens.add("10");
tokens.add(",");
tokens.add("000");

List<String> tokens2 = ReutersSaxHandler.retokenize(tokens);

assertThat(tokens2.size(), is(3));
assertThat(tokens2.get(0), is("around"));
assertThat(tokens2.get(2), is("10,000"));
}

@Test
public void testRetokenize_2() throws Exception {

List<String> tokens = new ArrayList<String>();
tokens.add("10");
tokens.add(",");
tokens.add("000");
tokens.add(",");
tokens.add("000");
tokens.add(".");
tokens.add("00");
tokens.add(" ");
tokens.add("errors");

List<String> tokens2 = ReutersSaxHandler.retokenize(tokens);

assertThat(tokens2.size(), is(3));
assertThat(tokens2.get(0), is("10,000,000.00"));
assertThat(tokens2.get(2), is("errors"));
}


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package org.grobid.trainer.sax;

import org.junit.Test;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.InputStream;

import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;

/**
* Created by lfoppiano on 25/08/16.
*/
public class SemDocSaxHandlerTest {

private SemDocSaxHandler target;

@Test
public void testAssembler() throws Exception {
InputStream reutersFile = this.getClass().getResourceAsStream("/100100newsML.xml");
InputStream semdocFile = this.getClass().getResourceAsStream("/100100newsML.semdoc.xml");

ReutersSaxHandler reutersSax = new ReutersSaxHandler();

// get a factory
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setValidating(false);
spf.setFeature("http://xml.org/sax/features/namespaces", false);
spf.setFeature("http://xml.org/sax/features/validation", false);
spf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
spf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);

//get a new instance of parser
SAXParser p = spf.newSAXParser();
p.parse(reutersFile, reutersSax);

target = new SemDocSaxHandler(reutersSax.getTextVector());

p = spf.newSAXParser();
p.parse(semdocFile, target);

assertThat(target.getAnnotatedTextVector().size(), is(243));
}

}
14 changes: 14 additions & 0 deletions grobid-ner/src/test/resources/log4j-test.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd" >
<log4j:configuration>
<appender name="CONSOLE" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{dd MMM yyyy HH:mm.ss} [%-5p] %-25c{1} - %m\n"/>
</layout>
</appender>

<root>
<priority value="DEBUG"></priority>
<appender-ref ref="CONSOLE"/>
</root>
</log4j:configuration>

0 comments on commit e0b74d6

Please sign in to comment.