From 11125fefa07ecc7f1394ee0f31524133f80da5c4 Mon Sep 17 00:00:00 2001 From: ymnliu Date: Tue, 10 Nov 2020 15:35:08 -0600 Subject: [PATCH] add date extractor --- .../org/cd2h/nlpsandbox/DateExtractor.java | 86 +++++++++++++++++++ .../java/org/openapitools/api/DatesApi.java | 12 ++- 2 files changed, 91 insertions(+), 7 deletions(-) create mode 100644 server/src/main/java/org/cd2h/nlpsandbox/DateExtractor.java diff --git a/server/src/main/java/org/cd2h/nlpsandbox/DateExtractor.java b/server/src/main/java/org/cd2h/nlpsandbox/DateExtractor.java new file mode 100644 index 0000000..e3b9da4 --- /dev/null +++ b/server/src/main/java/org/cd2h/nlpsandbox/DateExtractor.java @@ -0,0 +1,86 @@ +package org.cd2h.nlpsandbox; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.openapitools.model.DateAnnotation; + +public class DateExtractor { + + class NamedPattern{ + public String name; + public Pattern pattern; + + public NamedPattern(String name, Pattern pattern){ + this.name = name; + this.pattern = pattern; + } + } + + class Span{ + int begin; + int end; + int length; + + public Span(int begin, int end){ + this.begin = begin; + this.end = end; + this.length = end - begin; + } + } + + static ArrayList datePatterns; + + public DateExtractor(){ + // refer to https://github.com/Sage-Bionetworks/nlp-sandbox-date-annotator-example/blob/develop/server/openapi_server/controllers/date_controller.py#L32-L47 + // "MM/DD/YYYY" and "MM-DD-YYYY" + datePatterns = new ArrayList<>(); + datePatterns.add(new NamedPattern("DD/MM/YYYY", + Pattern.compile( + "\\b([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])(/)([1-9]|0[1-9]|1[0-2])(/)(19[0-9][0-9]|20[0-9][0-9])"))); + + datePatterns.add(new NamedPattern("MM/DD/YYYY", + Pattern.compile( + "\\b([1-9]|0[1-9]|1[0-2])(/)([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])(/)(19[0-9][0-9]|20[0-9][0-9])"))); + + datePatterns.add(new NamedPattern("MM-DD-YYYY", + Pattern.compile( + "\\b([1-9]|0[1-9]|1[0-2])(-)([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])(-)(19[0-9][0-9]|20[0-9][0-9])"))); + + + datePatterns.add(new NamedPattern("MMMM", Pattern.compile("\\b(January|February|March|April|May|June|" + + "July|August|September|October|November|" + + "December)"))); + } + + public List findDatesFromString(String sentence){ + + ArrayList annots = new ArrayList<>(); + for (NamedPattern np: datePatterns) { + // Now create matcher object. + Matcher m = np.pattern.matcher(sentence); + while (m.find()) { + System.out.println(String.format("Found matched pattern \"%s\" in value: %s", np.name, m.group(0) )); + + annots.add(new DateAnnotation() + .start(m.start(0)) + .length(m.group(0).length()) + .noteId(null) + .text(m.group(0)) + .format(np.name)); + } + } + return annots; + } + + public static void main(String[] args) { + DateExtractor de = new DateExtractor(); + String str1 = "Today is 10/26/2020, and yesterday is 10/25/2020. "; + de.findDatesFromString(str1); + + String str2 = "Today is 26/11/2020. "; + de.findDatesFromString(str2); + + } +} diff --git a/server/src/main/java/org/openapitools/api/DatesApi.java b/server/src/main/java/org/openapitools/api/DatesApi.java index 74be047..5a92026 100644 --- a/server/src/main/java/org/openapitools/api/DatesApi.java +++ b/server/src/main/java/org/openapitools/api/DatesApi.java @@ -5,6 +5,7 @@ */ package org.openapitools.api; +import org.cd2h.nlpsandbox.DateExtractor; import org.openapitools.model.DateAnnotation; import org.openapitools.model.Error; import java.util.List; @@ -58,19 +59,16 @@ default Optional getRequest() { consumes = { "application/json" }, method = RequestMethod.POST) default ResponseEntity> datesReadAll(@ApiParam(value = "" ) @Valid @RequestBody(required = false) List note) { + DateExtractor de = new DateExtractor(); + List annotations = new ArrayList(); note.forEach((n) -> { // TODO: Extract annotations from the text of the Note object n String text = n.getText(); System.out.print(text); - - annotations.add(new DateAnnotation() - .start(123) - .length(10) - .noteId(12) - .text("09-03-1999") - .format("MM-DD-YYYY")); + annotations.addAll(de.findDatesFromString(text)); }); + return new ResponseEntity>(annotations, HttpStatus.OK); }