Skip to content

Commit

Permalink
Merge bc4c818 into a088439
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Jul 21, 2016
2 parents a088439 + bc4c818 commit dd0d3db
Show file tree
Hide file tree
Showing 137 changed files with 25,746 additions and 25,720 deletions.
6 changes: 3 additions & 3 deletions Readme.md
Expand Up @@ -51,7 +51,7 @@ For testing purposes, a public GROBID demo server is available at the following

The Web service are documented in the [service manual](https://github.com/kermitt2/grobid/blob/master/grobid-service/src/main/doc/grobid-service-manual.pdf).

_Warning_: Some quota and query limitation apply to the demo server! If you are interested in using such online GROBID service for your project without limitation (and with support), please contact us (<patrice.lopez@science-miner.com>).
_Warning_: Some quota and query limitation apply to the demo server! If you are interested in using such online GROBID service for your project without limitation (and with support), please contact us (<patrice.lopez@science-miner.com>).

## GROBID documentation

Expand All @@ -61,10 +61,10 @@ Visit the [GROBID documentation](http://grobid.readthedocs.org) for more detaile

The latest stable release of GROBID is version ```0.4.0```. As compared to previous version ```0.3.9```, this version brings:

+ Improvement of the recognition of citations thanks to refinements of CRF features - +4% in f-score for the PubMed Central sample.
+ Improvement of the recognition of citations thanks to refinements of CRF features - +4% in f-score for the PubMed Central sample.
+ Improvement of the full text model, with new features and the introduction of two additional models for figures and tables.
+ More robust synchronization of CRF sequence with PDF areas, resulting in improved bounding box calculations for locating annotations in the PDF documents.
+ Improved general robustness thanks to better token alignments.
+ Improved general robustness thanks to better token alignments.

## License

Expand Down
6 changes: 3 additions & 3 deletions doc/Grobid-service.md
Expand Up @@ -28,19 +28,19 @@ You can also test the RESTFul API with **curl** command lines:

* header extraction of a PDF file in the current directory:
```bash
> curl -v -include --form input=@./thefile.pdf localhost:8080/processHeaderDocument
> curl -v --form input=@./thefile.pdf localhost:8080/processHeaderDocument
```
* fulltext extraction (header, body and citations) of a PDF file in the current directory:
```bash
> curl -v -include --form input=@./thefile.pdf localhost:8080/processFulltextDocument
> curl -v --form input=@./thefile.pdf localhost:8080/processFulltextDocument
```
* parsing of a raw reference string in isolation without consolidation (default value):
```bash
> curl -X POST -d "citations=Graff, Expert. Opin. Ther. Targets (2002) 6(1): 103-113" localhost:8080/processCitation
```
* extraction and parsing of all references in a PDF without consolidation (default value):
```bash
> curl -v -include --form --form input=@./thefile.pdf localhost:8080/processReferences
> curl -v --form --form input=@./thefile.pdf localhost:8080/processReferences
```

## Full documentation
Expand Down
Expand Up @@ -3263,6 +3263,11 @@ public String toTEIAuthorBlock(int nbTag) {
continue;
}

if ( (author.getFirstName() == null) && (author.getMiddleName() == null) &&
(author.getLastName() == null) ) {
continue;
}

TextUtilities.appendN(tei, '\t', nbTag);
tei.append("<author");

Expand Down
15 changes: 13 additions & 2 deletions grobid-core/src/main/java/org/grobid/core/data/Person.java
Expand Up @@ -165,8 +165,8 @@ public String toString() {
}

public String toTEI() {
if ((title == null) && (firstName == null) && (middleName == null) &&
(lastName == null) && (suffix == null)) {
if ( (firstName == null) && (middleName == null) &&
(lastName == null) ) {
return null;
}
String res = "<persName>";
Expand All @@ -191,5 +191,16 @@ public String toTEI() {
static public String normalizeName(String inputName) {
return TextUtilities.capitalizeFully(inputName, NAME_DELIMITERS);
}

/**
* Return true if the person structure is a valid person name, in our case
* with at least a lastname or a raw name.
*/
public boolean isValid() {
if ( (lastName == null) && (rawName == null) )
return false;
else
return true;
}

}
Expand Up @@ -988,7 +988,8 @@ else if ( (i==acknowResultLines.length-1) && extraDiv) {
// we skip the last div
}
else {*/
buffer.append(acknowResultLines[i] + "\n");
buffer.append(TextUtilities.dehyphenize(acknowResultLines[i]) + "\n");
//buffer.append(acknowResultLines[i] + "\n");
//}
}
}
Expand Down
@@ -1,20 +1,25 @@
package org.grobid.core.utilities;

import java.io.*;
import java.util.Enumeration;
import java.util.Properties;

import javax.naming.Context;
import javax.naming.InitialContext;
import javax.naming.NamingException;

import org.apache.commons.lang3.StringUtils;
import org.grobid.core.GrobidModels;
import org.grobid.core.engines.tagging.GrobidCRFEngine;
import org.grobid.core.exceptions.GrobidPropertyException;
import org.grobid.core.exceptions.GrobidResourceException;

import org.grobid.core.utilities.counters.CntManager;
import org.grobid.core.utilities.counters.impl.CntManagerFactory;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.naming.Context;
import javax.naming.InitialContext;
import javax.naming.NamingException;
import java.io.*;
import java.util.Enumeration;
import java.util.Properties;

/**
* This class loads contains all names of grobid-properties and provide methods
* to load grobid-properties from a property file. Each property will be copied
Expand All @@ -32,6 +37,8 @@ public class GrobidProperties {
*/
protected static Context context;

private CntManager cntManager = CntManagerFactory.getCntManager();

/**
* name of property which determines, if grobid runs in test mode.
*/
Expand Down
86 changes: 43 additions & 43 deletions grobid-core/src/main/java/org/grobid/core/utilities/KeyGen.java
@@ -1,43 +1,43 @@
package org.grobid.core.utilities;

/**
* Generate a random key.
*
* @author Florian Zipser
*/
public class KeyGen {
/**
* Minimum length for a decent key
*/
public static final int MIN_LENGTH = 10;

/**
* The random number generator.
*/
protected static java.util.Random r = new java.util.Random();

/**
* Set of characters that is valid. Must be printable, memorable, and "won't
* break HTML" (i.e., not ' <', '>', '&', '=', ...). or break shell commands
* (i.e., not ' <', '>', '$', '!', ...). I, L and O are good to leave out,
* as are numeric zero and one.
*/
protected static final char[] goodChar = {'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'j', 'k', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K',
'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'2', '3', '4', '5', '6', '7', '8', '9'};

/**
* Generate a Password object with a random password.
* @return a generated key
*/
public static String getKey() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < MIN_LENGTH; i++) {
sb.append(goodChar[r.nextInt(goodChar.length)]);
}
return sb.toString();
}

}
package org.grobid.core.utilities;

/**
* Generate a random key.
*
* @author Florian Zipser
*/
public class KeyGen {
/**
* Minimum length for a decent key
*/
public static final int MIN_LENGTH = 10;

/**
* The random number generator.
*/
protected static java.util.Random r = new java.util.Random();

/**
* Set of characters that is valid. Must be printable, memorable, and "won't
* break HTML" (i.e., not ' <', '>', '&', '=', ...). or break shell commands
* (i.e., not ' <', '>', '$', '!', ...). I, L and O are good to leave out,
* as are numeric zero and one.
*/
protected static final char[] goodChar = {'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'j', 'k', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K',
'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'2', '3', '4', '5', '6', '7', '8', '9'};

/**
* Generate a Password object with a random password.
* @return a generated key
*/
public static String getKey() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < MIN_LENGTH; i++) {
sb.append(goodChar[r.nextInt(goodChar.length)]);
}
return sb.toString();
}

}
Expand Up @@ -39,7 +39,10 @@ public static BoundingBox getBoundingBoxForPdf(PDDocument document, String coord
float x = Float.parseFloat(split[1]);
float y = Float.parseFloat(split[2]);
float w = Float.parseFloat(split[3]);
float h = Float.parseFloat(split[4]);
String nextString = split[4];
if (nextString.indexOf(";") != -1)
nextString = nextString.substring(0, nextString.indexOf(";"));
float h = Float.parseFloat(nextString);

float annX = x + lowerX;
float annY = (height - (y + h)) + lowerY;
Expand Down

0 comments on commit dd0d3db

Please sign in to comment.