Skip to content

Commit

Permalink
[uk] performance optimization for Ukrainian tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
arysin committed Jan 22, 2015
1 parent 924a883 commit aa134d6
Showing 1 changed file with 6 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ public List<String> tokenize(String text) {
}

private static String cleanup(String text) {
return text.replace("\u0301", "").replace("\u00AD", "").replace('’', '\'').replace('ʼ', '\'');
text = text.replace('’', '\'').replace('ʼ', '\'');
if( text.contains("\u0301") || text.contains("\u00AD") ) {
text = text.replace("\u0301", "").replace("\u00AD", "");
}
return text;
}

}

0 comments on commit aa134d6

Please sign in to comment.