Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions server/src/main/java/org/cd2h/nlpsandbox/DateExtractor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package org.cd2h.nlpsandbox;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.openapitools.model.DateAnnotation;

public class DateExtractor {

class NamedPattern{
public String name;
public Pattern pattern;

public NamedPattern(String name, Pattern pattern){
this.name = name;
this.pattern = pattern;
}
}

class Span{
int begin;
int end;
int length;

public Span(int begin, int end){
this.begin = begin;
this.end = end;
this.length = end - begin;
}
}

static ArrayList<NamedPattern> datePatterns;

public DateExtractor(){
// refer to https://github.com/Sage-Bionetworks/nlp-sandbox-date-annotator-example/blob/develop/server/openapi_server/controllers/date_controller.py#L32-L47
// "MM/DD/YYYY" and "MM-DD-YYYY"
datePatterns = new ArrayList<>();
datePatterns.add(new NamedPattern("DD/MM/YYYY",
Pattern.compile(
"\\b([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])(/)([1-9]|0[1-9]|1[0-2])(/)(19[0-9][0-9]|20[0-9][0-9])")));

datePatterns.add(new NamedPattern("MM/DD/YYYY",
Pattern.compile(
"\\b([1-9]|0[1-9]|1[0-2])(/)([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])(/)(19[0-9][0-9]|20[0-9][0-9])")));

datePatterns.add(new NamedPattern("MM-DD-YYYY",
Pattern.compile(
"\\b([1-9]|0[1-9]|1[0-2])(-)([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])(-)(19[0-9][0-9]|20[0-9][0-9])")));


datePatterns.add(new NamedPattern("MMMM", Pattern.compile("\\b(January|February|March|April|May|June|" +
"July|August|September|October|November|" +
"December)")));
}

public List<DateAnnotation> findDatesFromString(String sentence){

ArrayList<DateAnnotation> annots = new ArrayList<>();
for (NamedPattern np: datePatterns) {
// Now create matcher object.
Matcher m = np.pattern.matcher(sentence);
while (m.find()) {
System.out.println(String.format("Found matched pattern \"%s\" in value: %s", np.name, m.group(0) ));

annots.add(new DateAnnotation()
.start(m.start(0))
.length(m.group(0).length())
.noteId(null)
.text(m.group(0))
.format(np.name));
}
}
return annots;
}

public static void main(String[] args) {
DateExtractor de = new DateExtractor();
String str1 = "Today is 10/26/2020, and yesterday is 10/25/2020. ";
de.findDatesFromString(str1);

String str2 = "Today is 26/11/2020. ";
de.findDatesFromString(str2);

}
}
12 changes: 5 additions & 7 deletions server/src/main/java/org/openapitools/api/DatesApi.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/
package org.openapitools.api;

import org.cd2h.nlpsandbox.DateExtractor;
import org.openapitools.model.DateAnnotation;
import org.openapitools.model.Error;
import java.util.List;
Expand Down Expand Up @@ -58,19 +59,16 @@ default Optional<NativeWebRequest> getRequest() {
consumes = { "application/json" },
method = RequestMethod.POST)
default ResponseEntity<List<DateAnnotation>> datesReadAll(@ApiParam(value = "" ) @Valid @RequestBody(required = false) List<Note> note) {
DateExtractor de = new DateExtractor();

List<DateAnnotation> annotations = new ArrayList<DateAnnotation>();
note.forEach((n) -> {
// TODO: Extract annotations from the text of the Note object n
String text = n.getText();
System.out.print(text);

annotations.add(new DateAnnotation()
.start(123)
.length(10)
.noteId(12)
.text("09-03-1999")
.format("MM-DD-YYYY"));
annotations.addAll(de.findDatesFromString(text));
});

return new ResponseEntity<List<DateAnnotation>>(annotations, HttpStatus.OK);
}

Expand Down