Skip to content

Commit

Permalink
Sort morphological features in lowercase alphabetical order. Better f…
Browse files Browse the repository at this point in the history
…its with what is expected in UD datasets
  • Loading branch information
AngledLuffa committed Nov 3, 2023
1 parent cb50801 commit f77a9b4
Showing 1 changed file with 21 additions and 3 deletions.
24 changes: 21 additions & 3 deletions src/edu/stanford/nlp/trees/ud/CoNLLUFeatures.java
Expand Up @@ -15,6 +15,23 @@
* which is necessary for the CoNLLU format
*/
public class CoNLLUFeatures extends TreeMap<String, String> {
public static class LowercaseComparator implements Comparator<String> {
public int compare(String x, String y) {
if (x == null && y == null) {
return 0;
}
if (x == null) {
return -1;
}
if (y == null) {
return 1;
}
return x.compareToIgnoreCase(y);
}
}

static final LowercaseComparator comparator = new LowercaseComparator();

/**
* Parses the value of the feature column in a CoNLL-U file
* and returns them in a HashMap with the feature names as keys
Expand All @@ -24,7 +41,7 @@ public class CoNLLUFeatures extends TreeMap<String, String> {
* @return A {@code HashMap<String,String>} with the feature values.
*/
public CoNLLUFeatures(String featureString) {
super();
super(comparator);

if (!featureString.equals("_")) {
String[] featValPairs = featureString.split("\\|");
Expand All @@ -36,11 +53,12 @@ public CoNLLUFeatures(String featureString) {
}

public CoNLLUFeatures(Map<String, String> features) {
super(features);
super(comparator);
putAll(features);
}

public CoNLLUFeatures() {
super();
super(comparator);
}


Expand Down

0 comments on commit f77a9b4

Please sign in to comment.