Skip to content

Commit

Permalink
Merge abc9490 into bb8cf62
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Aug 20, 2019
2 parents bb8cf62 + abc9490 commit fefc754
Show file tree
Hide file tree
Showing 41 changed files with 2,644 additions and 1,026 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.java-version
target
grobid-home/tmp
.DS_Store
Expand Down
1 change: 1 addition & 0 deletions .java-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
openjdk64-11.0.2
12 changes: 11 additions & 1 deletion grobid-core/src/main/java/org/grobid/core/GrobidModels.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,11 @@ public enum GrobidModels implements GrobidModel {
// ENTITIES_BIOTECH("entities/biotech"),
ENTITIES_BIOTECH("bio"),
ASTRO("astro"),
SOFTWARE("software");
SOFTWARE("software"),
DUMMY("none");

//I cannot declare it before
public static final String DUMMY_FOLDER_LABEL = "none";

/**
* Absolute path to the model.
Expand All @@ -55,6 +59,12 @@ public enum GrobidModels implements GrobidModel {
private static final ConcurrentMap<String, GrobidModel> models = new ConcurrentHashMap<>();

GrobidModels(String folderName) {
if(StringUtils.equals(DUMMY_FOLDER_LABEL, folderName)) {
modelPath = DUMMY_FOLDER_LABEL;
this.folderName = DUMMY_FOLDER_LABEL;
return;
}

this.folderName = folderName;
File path = GrobidProperties.getModelPath(this);
if (!path.exists()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
package org.grobid.core.engines.tagging;

import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import org.apache.commons.lang3.StringUtils;
//import org.grobid.core.utilities.Pair;
import org.apache.commons.lang3.tuple.Pair;
import org.grobid.core.utilities.Triple;
import org.wipo.analyzers.wipokr.utils.StringUtil;

import org.apache.commons.lang3.tuple.Pair;

import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;
import java.util.regex.Pattern;

/**
Expand All @@ -22,6 +20,7 @@ public class GenericTaggerUtils {

public static final String START_ENTITY_LABEL_PREFIX = "I-";
public static final String START_ENTITY_LABEL_PREFIX_ALTERNATIVE = "B-";
public static final String START_ENTITY_LABEL_PREFIX_ALTERNATIVE_2 = "E-";
public static final Pattern SEPARATOR_PATTERN = Pattern.compile("[\t ]");

/**
Expand All @@ -30,31 +29,23 @@ public class GenericTaggerUtils {
* Note an empty line in the result will be transformed to a 'null' pointer of a pair
*/
public static List<Pair<String, String>> getTokensAndLabels(String labeledResult) {
Function<List<String>, Pair<String, String>> fromSplits = new Function<List<String>, Pair<String, String>>() {
@Override public Pair<String, String> apply(List<String> splits) {
return Pair.of(splits.get(0), splits.get(splits.size() - 1));
}
};

return processLabeledResult(labeledResult, fromSplits);
return processLabeledResult(labeledResult, splits -> Pair.of(splits.get(0), splits.get(splits.size() - 1)));
}

/**
* @param labeledResult labeled result from a tagger
* @return a list of triples - first element in a pair is a token itself, the second is a label (e.g. <footnote> or I-<footnote>)
* @return a list of triples - first element in a pair is a token itself, the second is a label (e.g. <footnote> or I-<footnote>)
* and the third element is a string with the features
* Note an empty line in the result will be transformed to a 'null' pointer of a pair
*/
public static List<Triple<String, String, String>> getTokensWithLabelsAndFeatures(String labeledResult,
final boolean addFeatureString) {
Function<List<String>, Triple<String, String, String>> fromSplits = new Function<List<String>, Triple<String, String, String>>() {
@Override public Triple<String, String, String> apply(List<String> splits) {
String featureString = addFeatureString ? Joiner.on("\t").join(splits.subList(0, splits.size() - 1)) : null;
return new Triple<>(
splits.get(0),
splits.get(splits.size() - 1),
featureString);
}
Function<List<String>, Triple<String, String, String>> fromSplits = splits -> {
String featureString = addFeatureString ? Joiner.on("\t").join(splits.subList(0, splits.size() - 1)) : null;
return new Triple<>(
splits.get(0),
splits.get(splits.size() - 1),
featureString);
};

return processLabeledResult(labeledResult, fromSplits);
Expand Down Expand Up @@ -82,6 +73,8 @@ public static String getPlainLabel(String label) {
}

public static boolean isBeginningOfEntity(String label) {
return StringUtils.startsWith(label, START_ENTITY_LABEL_PREFIX) || StringUtil.startsWith(label, START_ENTITY_LABEL_PREFIX_ALTERNATIVE);
return StringUtils.startsWith(label, START_ENTITY_LABEL_PREFIX)
|| StringUtil.startsWith(label, START_ENTITY_LABEL_PREFIX_ALTERNATIVE)
|| StringUtil.startsWith(label, START_ENTITY_LABEL_PREFIX_ALTERNATIVE_2);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
public enum GrobidCRFEngine {
WAPITI("wapiti"),
CRFPP("crf"),
DELFT("delft");
DELFT("delft"),
DUMMY("dummy");

private final String ext;

Expand Down
Loading

0 comments on commit fefc754

Please sign in to comment.