-
Notifications
You must be signed in to change notification settings - Fork 3
/
C7g_from_a_Nov.java
48 lines (37 loc) · 1.55 KB
/
C7g_from_a_Nov.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
package dk.kb.webdanica.core.criteria;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
import dk.kb.webdanica.core.utils.Constants;
import dk.kb.webdanica.core.utils.TextUtils;
public class C7g_from_a_Nov extends EvalFunc<String>{
@Override
public String exec(Tuple input) throws IOException {
if (input == null || input.size() == 0 || input.get(0) == null) {
return Constants.getCriteriaName(this) + ": " + Constants.NODATA;
}
String text = (String) input.get(0);
// FIXME Should be SearchWord, but this does not work
Set<String> foundMatches = computeC7g(text);
return (foundMatches.size() > 0? ("C7g: "
+ TextUtils.conjoin("#", foundMatches)):
"C7g: emptylist");
}
public static Set<String> computeC7g(String text) {
return TextUtils.SearchWordRegExp(text, Words.danishMajorCitiesNov,false);
}
public static Set<String> computeC7gV2(String text) {
return TextUtils.SearchWordPatterns(text, Words.patternsdanishMajorCitiesNov,false);
}
public static Set<String> computeC7gV3(String text) {
return TextUtils.SearchWordPatterns(text, Words.patternsdanishMajorCitiesNovNoCase,false);
}
public static Set<String> computeC7gV5(Set<String> tokens) {
List<String> words = Arrays.asList(Words.danishMajorCitiesNov);
tokens.retainAll(words);
return tokens;
}
}