In [1]:
import requests
import json
import pandas as pd
from typing import Any, List, Mapping, Optional
from pandas import DataFrame

In [2]:
def annotate_json(file_path:str, endpoint:str = "http://localhost:9000/cosmos_json_to_mentions") -> List[Mapping[str, Any]]:
	""" Annotates an existing json file on the server with the text reading pipeline """

	return requests.post(endpoint, json={'pathToCosmosJson': file_path}).json()['mentions']


def build_data_frame(mentions:List[Mapping[str, Any]]) -> DataFrame:
	""" Builds a data frame from the output of text reading """

	return DataFrame(
			{
				'text': mention['text'],
				'mention_type': mention['type'],
				'label': mention['labels'][0],
				'grounding_id': "TBD",
				'sentence_ix': mention['sentence'],
				'start_token': mention['tokenInterval']['start'],
				'end_token': mention['tokenInterval']['end'],
			}
			for mention in mentions
		)

def text_reading_mentions(file_path:str) -> DataFrame:
	""" Puts together annotation and creating the data frame"""
	
	return build_data_frame(annotate_json(file_path))

In [3]:
def get_paper_by_doi(doi:str) -> Optional[str]:
	""" Mocks getting the Cosmos output by the paper's DOI """
	papers = {
		"10.1016/j.chaos.2021.110689": "/media/evo870/github/COSMOS-data/output_files/documents_5Febcovid19--COSMOS-data.json"
	}

	return papers.get(doi)

In [4]:
paper_path = get_paper_by_doi("10.1016/j.chaos.2021.110689")

text_reading_mentions(paper_path)

Unnamed: 0,text,mention_type,label,grounding_id,sentence_ix,start_token,end_token
0,fractional dynamics,TextBoundMention,Phrase,TBD,0,2,4
1,Projections,TextBoundMention,Phrase,TBD,0,0,1
2,b,TextBoundMention,Phrase,TBD,0,20,21
3,optimal control strategies,TextBoundMention,Phrase,TBD,0,7,10
4,COVID-19,TextBoundMention,Phrase,TBD,0,5,6
...,...,...,...,...,...,...,...
4069,2021,TextBoundMention,Phrase,TBD,0,7,8
4070,Fractals 145,TextBoundMention,Phrase,TBD,0,4,6
4071,110689,TextBoundMention,Value,TBD,0,9,10
4072,145,TextBoundMention,Value,TBD,0,5,6
