Skip to content

Commit

Permalink
add parameter option to get definition in plain text
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Jan 7, 2023
1 parent bf6833d commit 2557847
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 10 deletions.
1 change: 1 addition & 0 deletions doc/restAPI.rst
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,7 @@ GET /kb/concept/{id}
========== ======= ===================== ===============================================================================================================
required id String ID of the concept to be retrieved (wikipedia, wikidata id (starting with `Q`) or property (starting with `P`).
optional lang String (valid only for wikipedia IDs) The language knowledge base where to fetch the concept from. Default: `en`.
optional definitionFormat String The format of the definition text associated to the concept. Possible choice are: Default `MediaWiki` or `PlainText`
========== ======= ===================== ===============================================================================================================

(2) Request header
Expand Down
32 changes: 24 additions & 8 deletions src/main/java/com/scienceminer/nerd/service/NerdRestKB.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import com.scienceminer.nerd.kb.model.Label;
import com.scienceminer.nerd.kb.model.Page;
import com.scienceminer.nerd.kb.model.Page.PageType;
import com.scienceminer.nerd.utilities.mediaWiki.MediaWikiParser;
import org.apache.commons.lang3.ArrayUtils;
import org.grobid.core.lang.Language;

Expand All @@ -35,6 +36,9 @@ public class NerdRestKB {

private static final Logger LOGGER = LoggerFactory.getLogger(NerdRestKB.class);

public static final String MediaWikiFormat = "MediaWiki";
public static final String PlainTextFormat = "PlainText";

@Inject
public NerdRestKB() {
}
Expand All @@ -45,23 +49,23 @@ public NerdRestKB() {
* @param id identifier of the concept
* @return a response object containing the information related to the identified concept.
*/
public String getConceptInfo(String id, String lang) {
public String getConceptInfo(String id, String lang, String definitionFormat) {
String response = null;
if (id.startsWith("Q")) {
// we have a concept
response = getWikidataConceptInfo(id);
response = getWikidataConceptInfo(id, definitionFormat);
} else if (id.startsWith("P")) {
// we have a property
response = getWikidataConceptInfo(id);
response = getWikidataConceptInfo(id, definitionFormat);
} else {
// we have a wikipedia page id, and the lang field matters
response = getWikipediaConceptInfo(id, lang);
response = getWikipediaConceptInfo(id, lang, definitionFormat);
}

return response;
}

private String getWikipediaConceptInfo(String id, String lang) throws QueryException {
private String getWikipediaConceptInfo(String id, String lang, String definitionFormat) throws QueryException {
Integer identifier = null;
try {
identifier = Integer.parseInt(id);
Expand Down Expand Up @@ -93,7 +97,13 @@ private String getWikipediaConceptInfo(String id, String lang) throws QueryExcep
// definition
Definition definition = new Definition();
try {
definition.setDefinition(article.getFirstParagraphWikiText());
String wikiText = article.getFirstParagraphWikiText();
if (definitionFormat.equals(this.PlainTextFormat)) {
String wikiTextOnly = MediaWikiParser.getInstance().toTextOnly(wikiText, lang);
definition.setDefinition(wikiTextOnly);
}
else
definition.setDefinition(wikiText);
} catch (Exception e) {
LOGGER.debug("Error when getFirstParagraphWikiText for page id " + identifier);
}
Expand Down Expand Up @@ -151,7 +161,7 @@ private void handleCategories(NerdEntity entity, String identifier, com.sciencem
}
}

private String getWikidataConceptInfo(String id) {
private String getWikidataConceptInfo(String id, String definitionFormat) {
NerdEntity entity = new NerdEntity();
entity.setLang(Language.EN);
UpperKnowledgeBase knowledgeBase = UpperKnowledgeBase.getInstance();
Expand Down Expand Up @@ -180,7 +190,13 @@ private String getWikidataConceptInfo(String id) {
// definition
Definition definition = new Definition();
try {
definition.setDefinition(article.getFirstParagraphWikiText());
String wikiText = article.getFirstParagraphWikiText();
if (definitionFormat.equals(this.PlainTextFormat)) {
String wikiTextOnly = MediaWikiParser.getInstance().toTextOnly(wikiText, Language.EN);
definition.setDefinition(wikiTextOnly);
}
else
definition.setDefinition(wikiText);
} catch (Exception e) {
LOGGER.debug("Error when getFirstParagraphWikiTextfor page id " + id);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public class NerdRestService implements NerdPaths {
private static final String SENTENCE = "sentence";
private static final String FORMAT = "format";
private static final String CUSTOMISATION = "customisation";
private static final String DEFINITIONFORMAT = "definitionFormat";

NerdRestProcessQuery nerdProcessQuery;
NerdRestProcessFile nerdProcessFile;
Expand Down Expand Up @@ -330,13 +331,14 @@ private Response handleQueryException(QueryException qe, String query) {
@Produces(MediaType.APPLICATION_JSON)
@GET
public Response getConceptInformation(@PathParam(ID) String identifier,
@DefaultValue(Language.EN) @QueryParam(LANG) String lang) {
@DefaultValue(Language.EN) @QueryParam(LANG) String lang,
@DefaultValue(NerdRestKB.MediaWikiFormat) @QueryParam(DEFINITIONFORMAT) String definitionFormat) {

String output = null;
Response response = null;

try {
output = nerdRestKB.getConceptInfo(identifier, lang);
output = nerdRestKB.getConceptInfo(identifier, lang, definitionFormat);

if (isBlank(output)) {
response = Response.status(Response.Status.NOT_FOUND).build();
Expand Down

0 comments on commit 2557847

Please sign in to comment.