Skip to content

Commit

Permalink
[en] override getTokenizingCharacters() instead of using our own member
Browse files Browse the repository at this point in the history
  • Loading branch information
danielnaber committed Apr 28, 2015
1 parent a0d7e69 commit 5b1ed29
Showing 1 changed file with 6 additions and 4 deletions.
Expand Up @@ -30,10 +30,12 @@
*/
public class EnglishWordTokenizer extends WordTokenizer {

private final String enTokenizing;

public EnglishWordTokenizer() {
enTokenizing = super.getTokenizingCharacters() + "–"; // n-dash
}

@Override
public String getTokenizingCharacters() {
return super.getTokenizingCharacters() + "–"; // n-dash
}

/**
Expand All @@ -52,7 +54,7 @@ public EnglishWordTokenizer() {
public List<String> tokenize(final String text) {
final List<String> l = new ArrayList<>();
final StringTokenizer st = new StringTokenizer(text,
enTokenizing, true);
getTokenizingCharacters(), true);
while (st.hasMoreElements()) {
final String token = st.nextToken();
if (token.length() > 1 && token.endsWith("-")) {
Expand Down

0 comments on commit 5b1ed29

Please sign in to comment.