From f77a9b414cb28c9429a7f100b76272355bf78bb6 Mon Sep 17 00:00:00 2001 From: John Bauer Date: Fri, 27 Oct 2023 21:46:47 -0700 Subject: [PATCH] Sort morphological features in lowercase alphabetical order. Better fits with what is expected in UD datasets --- .../stanford/nlp/trees/ud/CoNLLUFeatures.java | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/edu/stanford/nlp/trees/ud/CoNLLUFeatures.java b/src/edu/stanford/nlp/trees/ud/CoNLLUFeatures.java index 8e769b5dea..7363ce6a6b 100644 --- a/src/edu/stanford/nlp/trees/ud/CoNLLUFeatures.java +++ b/src/edu/stanford/nlp/trees/ud/CoNLLUFeatures.java @@ -15,6 +15,23 @@ * which is necessary for the CoNLLU format */ public class CoNLLUFeatures extends TreeMap { + public static class LowercaseComparator implements Comparator { + public int compare(String x, String y) { + if (x == null && y == null) { + return 0; + } + if (x == null) { + return -1; + } + if (y == null) { + return 1; + } + return x.compareToIgnoreCase(y); + } + } + + static final LowercaseComparator comparator = new LowercaseComparator(); + /** * Parses the value of the feature column in a CoNLL-U file * and returns them in a HashMap with the feature names as keys @@ -24,7 +41,7 @@ public class CoNLLUFeatures extends TreeMap { * @return A {@code HashMap} with the feature values. */ public CoNLLUFeatures(String featureString) { - super(); + super(comparator); if (!featureString.equals("_")) { String[] featValPairs = featureString.split("\\|"); @@ -36,11 +53,12 @@ public CoNLLUFeatures(String featureString) { } public CoNLLUFeatures(Map features) { - super(features); + super(comparator); + putAll(features); } public CoNLLUFeatures() { - super(); + super(comparator); }