Update tokenizers

kermitt2 · Feb 20, 2017 · 471f3a4 · 471f3a4
2 parents f8387f4 + 9f26a4b
commit 471f3a4
Show file tree

Hide file tree

Showing 4 changed files with 14 additions and 26 deletions.
diff --git a/Readme.md b/Readme.md
@@ -5,12 +5,12 @@
 
 ## Purpose
 
-GROBID NER is a Named-Entity Recogniser based on the GROBID library ([grobid](https://raw.github.com/kermitt2/grobid)), a text mining tool exploiting CRF. The installation of GROBID is necessary.  
+GROBID NER is a Named-Entity Recogniser based on the GROBID library ([grobid](https://github.com/kermitt2/grobid)), a text mining tool exploiting CRF. The installation of GROBID is necessary.  
 
 Grobid NER has been developed more specifically for the purpose of supporting disambiguation and resolution of the entities against knowledge bases such as Wikipedia. For a description of the NER, installation, usage and other technical features, see the [documentation](http://grobid-ner.readthedocs.io/en/latest/). 
 
 ## License
 
 Grobid and grobid-ner are distributed under [Apache 2.0 license](http://www.apache.org/licenses/LICENSE-2.0). 
 
-Author and contact: Patrice Lopez (<patrice.lopez@science-miner.com>) 
+Author and contact: Patrice Lopez (<patrice.lopez@science-miner.com>) 
diff --git a/grobid-ner/doc/build-and-install.md b/grobid-ner/doc/build-and-install.md
@@ -48,13 +48,12 @@ Or download directly the zip file:
 ```
 
 GROBID NER is actually a sub-project of GROBID. 
-Although GROBID NER might be merged with GROBID in the future, at this point the GROBID NER sub-module simply need to added manually. 
-In the main directory of GROBID NER:
+Although GROBID NER might be merged with GROBID in the future, at this point the GROBID NER sub-module simply needs to be added manually to the main directory of GROBID: 
 
 ```bash
 > cp -r grobid-ner /path/to/grobid/
 
-> cp -r grobid-home/models/* /path/to/grobid/grobid-home/
+> cp -r grobid-home/models/* /path/to/grobid/grobid-home/models/
 ```
 
 Then build the GROBID NER subproject:
@@ -63,4 +62,4 @@ Then build the GROBID NER subproject:
 > cd /path/to/grobid/grobid-ner
 
 > mvn clean install
-```
+```
diff --git a/grobid-ner/src/main/java/org/grobid/core/engines/NERFrParser.java b/grobid-ner/src/main/java/org/grobid/core/engines/NERFrParser.java
@@ -1,26 +1,16 @@
 package org.grobid.core.engines;
 
-import org.apache.commons.io.FileUtils;
 import org.grobid.core.GrobidModels;
+import org.grobid.core.analyzers.GrobidAnalyzer;
 import org.grobid.core.data.Entity;
-import org.grobid.core.data.Sense;
-import org.grobid.core.exceptions.GrobidResourceException;
 import org.grobid.core.engines.tagging.GenericTaggerUtils;
-import org.grobid.core.exceptions.GrobidException;
-import org.grobid.core.features.FeaturesVectorNER;
+import org.grobid.core.lang.Language;
 import org.grobid.core.lexicon.Lexicon;
 import org.grobid.core.lexicon.LexiconPositionsIndexes;
-import org.grobid.core.lang.Language;
-import org.grobid.core.analyzers.GrobidAnalyzer;
 import org.grobid.core.utilities.Pair;
-import org.grobid.core.utilities.LanguageUtilities;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.File;
-import java.io.FilenameFilter;
-import java.io.IOException;
-import java.util.ArrayList;
 import java.util.List;
 
 import static org.apache.commons.lang3.StringUtils.isEmpty;
@@ -52,7 +42,7 @@ public List<Entity> extractNE(String text) {
         }
         if (tokens == null)
             return null;
-        
+
         LexiconPositionsIndexes positionsIndexes = new LexiconPositionsIndexes(lexicon);
         positionsIndexes.computeIndexes(text);
 

diff --git a/grobid-ner/src/main/java/org/grobid/trainer/stax/INRIALeMondeCorpusStaxHandler.java b/grobid-ner/src/main/java/org/grobid/trainer/stax/INRIALeMondeCorpusStaxHandler.java
@@ -3,27 +3,26 @@
 import com.ctc.wstx.stax.WstxInputFactory;
 import org.apache.commons.lang3.StringUtils;
 import org.codehaus.stax2.XMLStreamReader2;
-import org.grobid.core.lexicon.NERLexicon;
 import org.grobid.core.analyzers.GrobidAnalyzer;
 import org.grobid.core.lang.Language;
-
+import org.grobid.core.lexicon.NERLexicon;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import javax.xml.stream.XMLEventReader;
 import javax.xml.stream.XMLEventWriter;
 import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.events.XMLEvent;
 import java.io.*;
 import java.util.List;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import static org.apache.commons.lang3.StringUtils.*;
 
 /**
  * Created by lfoppiano on 29/08/16.
  */
 public class INRIALeMondeCorpusStaxHandler implements StaxParserContentHandler {
-	private static Logger LOGGER = LoggerFactory.getLogger(INRIALeMondeCorpusStaxHandler.class);
+    private static Logger LOGGER = LoggerFactory.getLogger(INRIALeMondeCorpusStaxHandler.class);
 
     private Writer writer;
     private StringBuilder sb;
@@ -41,7 +40,7 @@ public class INRIALeMondeCorpusStaxHandler implements StaxParserContentHandler {
     private String comment = null;
     private String gender = null;
 
-	private GrobidAnalyzer analyzer = GrobidAnalyzer.getInstance();
+    private GrobidAnalyzer analyzer = GrobidAnalyzer.getInstance();
 
     public INRIALeMondeCorpusStaxHandler() {
         this.sb = new StringBuilder();