Skip to content

Commit

Permalink
Update the number finding pattern to accommodate starting with .6 ins…
Browse files Browse the repository at this point in the history
…tead of 0.6 ... addresses #547
  • Loading branch information
AngledLuffa committed Feb 12, 2022
1 parent 93e6811 commit 5ee2c39
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/edu/stanford/nlp/ie/NumberNormalizer.java
Expand Up @@ -88,7 +88,7 @@ public static void setVerbose(boolean verbose) {
// Converts numbers in words to numeric form
// works through trillions
private static final Pattern digitsPattern = Pattern.compile("\\d+");
private static final Pattern digitsPatternExtended = Pattern.compile("(\\d+\\.?\\d*)(dozen|score|hundred|thousand|million|billion|trillion)?"); // this is really just second-guessing the tokenizer
private static final Pattern digitsPatternExtended = Pattern.compile("((?:\\d+\\.?\\d*)|(?:\\.\\d+))(dozen|score|hundred|thousand|million|billion|trillion)?"); // this is really just second-guessing the tokenizer
private static final Pattern numPattern = Pattern.compile("[-+]?(?:\\d+(?:,\\d\\d\\d)*(?:\\.\\d*)?|\\.\\d+)");
private static final Pattern numRangePattern = Pattern.compile("(" + numPattern.pattern() + ")-(" + numPattern.pattern() + ")");
// private static final Pattern[] endUnitWordsPattern = new Pattern[endUnitWords.length];
Expand Down Expand Up @@ -372,7 +372,7 @@ public static Number wordToNumber(String str) {
} else {
throw new NumberFormatException("Bad number put into wordToNumber. Word is: \"" + curPart + "\", originally part of \"" + originalString + "\", piece # " + curIndex);
}
} else if (Character.isDigit(curPart.charAt(0))) {
} else if (Character.isDigit(curPart.charAt(0)) || curPart.charAt(0) == '.') {
if (curPart.endsWith("th") || curPart.endsWith("rd") || curPart.endsWith("nd") || curPart.endsWith("st")) {
curPart = curPart.substring(0, curPart.length()-2).trim();
}
Expand Down

0 comments on commit 5ee2c39

Please sign in to comment.