# Text Reading Integration Notebook

### Running a PDF through COSMOS

In [5]:
import requests

# Endpint configuration. Set the host correctly depending on where are you running this code snippet
# ENDPOINT="https://api.askem.lum.ai" # Uncomment this line to use SKEMA's web service
ENDPOINT = "http://127.0.0.1:8000" # Uncomment this line to use the docker compose service

response = requests.post(
    f"{ENDPOINT}/text-reading/cosmos_to_json",
            files=[
                ("pdf", ("ijerp.pdf", open("../../data/pdf/ijerph-18-09027.pdf", 'rb')))
            ]
    )

if response.status_code == 200:
    print(response.json())
else:
    print(f"Error {response.status_code} - {response.text}")

[{'pdf_name': 'ijerp.pdf', 'dataset_id': 'ijerp', 'page_num': 1, 'img_pth': '/tmp/tmpciodhkjm/ijerp.pdf_1_pad', 'pdf_dims': [0.0, 0.0, 595.276, 841.89], 'bounding_box': [82.0, 107.0, 1275.0, 195.0], 'classes': ['Equation label', 'Section Header', 'Page Header', 'Body Text', 'Figure', 'Other', 'Equation', 'Reference text', 'Figure Caption', 'Table', 'Page Footer', 'Table Note', 'Table Caption', 'Abstract'], 'scores': [-5.158577919, -8.3714532852, -10.0183992386, -10.2071590424, -11.1120090485, -11.1143541336, -11.2161874771, -13.0983781815, -15.0558624268, -15.1471719742, -15.9366912842, -18.405462265, -18.7139511108, -18.9916629791], 'content': '', 'postprocess_cls': 'Page Header', 'postprocess_score': 0.9985458851, 'detect_cls': 'Equation label', 'detect_score': -5.158577919}, {'pdf_name': 'ijerp.pdf', 'dataset_id': 'ijerp', 'page_num': 1, 'img_pth': '/tmp/tmpciodhkjm/ijerp.pdf_1_pad', 'pdf_dims': [0.0, 0.0, 595.276, 841.89], 'bounding_box': [80.0, 234.0, 1197.0, 305.0], 'classes': ['

### Plain Text Annotation Example

In [9]:
import requests

# Endpint configuration. Set the host correctly depending on where are you running this code snippet
ENDPOINT="https://api.askem.lum.ai"

# Read an example document to annotate
params = {
       "annotate_skema":True,
       "annotate_mit": False
    }

payload = {
  "texts": [
    "x = 0",
    "y = 1",
    "I: Infected population"
  ]
}

response = requests.post(f"{ENDPOINT}/text-reading/integrated-text-extractions", params=params, json=payload)
if response.status_code == 200:
    print(response.json())
else:
    print(f"Error {response.status_code} - {response.text}")

{'outputs': [{'data': {'attributes': [{'type': 'anchored_extraction', 'amr_element_id': None, 'payload': {'id': {'id': 'E:-589301722'}, 'names': [{'id': {'id': 'T:-1411656337'}, 'name': 'x', 'extraction_source': {'page': 0, 'block': 0, 'char_start': 1, 'char_end': 2, 'document_reference': {'id': 'N/A'}}, 'provenance': {'method': 'Skema TR Pipeline rules', 'timestamp': '2023-07-19T14:58:29.863578'}}], 'descriptions': [], 'value_specs': [{'id': {'id': 'T:432832120'}, 'value': {'source': '0', 'grounding': [], 'extraction_source': {'page': 0, 'block': 0, 'char_start': 5, 'char_end': 6, 'document_reference': {'id': 'N/A'}}}, 'units': None, 'type': None, 'bounds': None, 'provenance': {'method': 'Skema TR Pipeline rules', 'timestamp': '2023-07-19T14:58:29.863578'}}], 'groundings': [], 'data_columns': None}}, {'type': 'document_collection', 'amr_element_id': None, 'payload': {'documents': [{'id': {'id': 'N/A'}, 'source_file': 'N/A', 'doi': ''}]}}]}, 'errors': None}, {'data': {'attributes': [{'

### PDF Annotation Example

In [13]:
import requests

# Endpint configuration. Set the host correctly depending on where are you running this code snippet
ENDPOINT="https://api.askem.lum.ai"

params = {
	"annotate_skema":True,
	"annotate_mit": False
}

files = [("pdfs", ("ijerp.pdf", open("../../data/pdf/ijerph-18-09027.pdf", "rb")))]

response = requests.post(f"{ENDPOINT}/text-reading/integrated-pdf-extractions", params=params, files=files)
if response.status_code == 200:
	print(response.json())
else:
	print(f"Error {response.status_code} - {response.text}")

{'outputs': [{'data': {'attributes': [{'type': 'anchored_extraction', 'amr_element_id': None, 'payload': {'id': {'id': 'E:-1621000196'}, 'names': [{'id': {'id': 'T:-2105346093'}, 'name': 'potential prevention of up', 'extraction_source': {'page': 0, 'block': 0, 'char_start': 524, 'char_end': 550, 'document_reference': {'id': 'ijerp.pdf'}}, 'provenance': {'method': 'Skema TR Pipeline rules', 'timestamp': '2023-07-19T15:18:53.674423'}}], 'descriptions': [], 'value_specs': [{'id': {'id': 'T:-202544802'}, 'value': {'source': '%', 'grounding': [], 'extraction_source': {'page': 0, 'block': 0, 'char_start': 556, 'char_end': 557, 'document_reference': {'id': 'ijerp.pdf'}}}, 'units': None, 'type': None, 'bounds': None, 'provenance': {'method': 'Skema TR Pipeline rules', 'timestamp': '2023-07-19T15:18:53.674423'}}], 'groundings': [{'grounding_text': 'Centers for Disease Control and Prevention', 'grounding_id': 'ncit:C16408', 'source': [], 'score': 0.7546381950378418, 'provenance': {'method': 'SK

### Ground Entities to MIRA concepts using embedding similarity

In [16]:
import requests

# Endpint configuration. Set the host correctly depending on where are you running this code snippet
ENDPOINT="https://api.askem.lum.ai"

queries = {"queries": ["infected", "suceptible"]}
params = {"k": 5}
response = requests.post(f"{ENDPOINT}/text-reading/ground_to_mira", params=params, json=queries)

if response.status_code == 200:
    results = response.json()
else:
	print(f"Error {response.status_code} - {response.text}")

Error 500 - Internal Server Error
