-
Notifications
You must be signed in to change notification settings - Fork 2.7k
/
CoNLLUFeatures.java
107 lines (94 loc) · 2.73 KB
/
CoNLLUFeatures.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
package edu.stanford.nlp.trees.ud;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
/**
* A subclass of TreeMap with a toString() that looks like a CoNLLUFeatures
* and a method for extracting the features from a CoNLLU string
* <br>
* This is a TreeMap so that the features are sorted by their key,
* which is necessary for the CoNLLU format
*/
public class CoNLLUFeatures extends TreeMap<String, String> {
public static class LowercaseComparator implements Comparator<String> {
public int compare(String x, String y) {
if (x == null && y == null) {
return 0;
}
if (x == null) {
return -1;
}
if (y == null) {
return 1;
}
return x.compareToIgnoreCase(y);
}
}
static final LowercaseComparator comparator = new LowercaseComparator();
/**
* Parses the value of the feature column in a CoNLL-U file
* and returns them in a HashMap with the feature names as keys
* and the feature values as values.
*
* @param featureString
* @return A {@code HashMap<String,String>} with the feature values.
*/
public CoNLLUFeatures(String featureString) {
super(comparator);
if (!featureString.equals("_")) {
String[] featValPairs = featureString.split("\\|");
for (String p : featValPairs) {
String[] featValPair = p.split("=");
this.put(featValPair[0], featValPair[1]);
}
}
}
public CoNLLUFeatures(Map<String, String> features) {
super(comparator);
putAll(features);
}
public CoNLLUFeatures() {
super(comparator);
}
public static class FeatureNameComparator implements Comparator<String> {
@Override
public int compare(String featureName1, String featureName2) {
return featureName1.toLowerCase().compareTo(featureName2.toLowerCase());
}
}
/**
* Converts the features to a feature string to be used
* in a CoNLL-U file.
*
* @return The feature string.
*/
public static String toFeatureString(Map<String,String> features) {
StringBuilder sb = new StringBuilder();
boolean first = true;
if (features != null) {
List<String> sortedKeys = new ArrayList<>(features.keySet());
Collections.sort(sortedKeys, new FeatureNameComparator());
for (String key : sortedKeys) {
if (!first) {
sb.append("|");
} else {
first = false;
}
sb.append(key)
.append("=")
.append(features.get(key));
}
}
/* Empty feature list. */
if (first) {
sb.append("_");
}
return sb.toString();
}
public String toString() {
return toFeatureString(this);
}
}