# Text Reading Integration Notebook

### Running a PDF through COSMOS

In [1]:
import requests

# Endpint configuration. Set the host correctly depending on where are you running this code snippet
# ENDPOINT="https://api.askem.lum.ai" # Uncomment this line to use SKEMA's web service
ENDPOINT = "http://127.0.0.1:8000" # Uncomment this line to use the docker compose service

response = requests.post(
    f"{ENDPOINT}/text-reading/cosmos_to_json",
            files=[
                ("pdf", ("ijerp.pdf", open("../../data/pdf/ijerph-18-09027.pdf", 'rb')))
            ]
    )

if response.status_code == 200:
    print(response.json())
else:
    print(f"Error {response.status_code} - {response.text}")

[{'pdf_name': 'ijerp.pdf', 'dataset_id': 'ijerp', 'page_num': 1, 'img_pth': '/tmp/tmpaegyk2uw/ijerp.pdf_1_pad', 'pdf_dims': [0.0, 0.0, 595.276, 841.89], 'bounding_box': [82.0, 107.0, 1275.0, 195.0], 'classes': ['Equation label', 'Section Header', 'Page Header', 'Body Text', 'Figure', 'Other', 'Equation', 'Reference text', 'Figure Caption', 'Table', 'Page Footer', 'Table Note', 'Table Caption', 'Abstract'], 'scores': [-5.1587600708, -8.3740234375, -10.0186519623, -10.2147502899, -11.1155567169, -11.1168165207, -11.2150306702, -13.100736618, -15.0583581924, -15.1461448669, -15.9341535568, -18.4104003906, -18.7148742676, -18.9958457947], 'content': '', 'postprocess_cls': 'Page Header', 'postprocess_score': 0.9985129237, 'detect_cls': 'Equation label', 'detect_score': -5.1587600708}, {'pdf_name': 'ijerp.pdf', 'dataset_id': 'ijerp', 'page_num': 1, 'img_pth': '/tmp/tmpaegyk2uw/ijerp.pdf_1_pad', 'pdf_dims': [0.0, 0.0, 595.276, 841.89], 'bounding_box': [1176.5424804688, 121.4962387085, 1278.48

### Plain Text Annotation Example

In [2]:
import requests

# Endpint configuration. Set the host correctly depending on where are you running this code snippet
# ENDPOINT="https://api.askem.lum.ai"
ENDPOINT = "http://127.0.0.1:8000" # Uncomment this line to use the docker compose service

# Read an example document to annotate
params = {
       "annotate_skema":True,
       "annotate_mit": False
    }

payload = {
  "texts": [
    "x = 0",
    "y = 1",
    "I: Infected population"
  ],
    "amrs": []
}

response = requests.post(f"{ENDPOINT}/text-reading/integrated-text-extractions", params=params, json=payload)
if response.status_code == 200:
    print(response.json())
else:
    print(f"Error {response.status_code} - {response.text}")

{'outputs': [{'data': {'attributes': [{'type': 'anchored_entity', 'amr_element_id': None, 'payload': {'id': {'id': 'E:-589301722'}, 'mentions': [{'id': {'id': 'T:-1411656337'}, 'name': 'x', 'extraction_source': {'page': 0, 'block': 0, 'surrounding_passage': "`` x = 0 ''", 'char_start': 1, 'char_end': 2, 'document_reference': {'id': 'N/A'}}, 'provenance': {'method': 'Skema TR Pipeline rules', 'timestamp': '2024-03-20T23:57:29.664501'}}], 'text_descriptions': [], 'value_descriptions': [{'id': {'id': 'T:432832120'}, 'value': {'amount': '0', 'grounding': [], 'extraction_source': {'page': 0, 'block': 0, 'surrounding_passage': None, 'char_start': 5, 'char_end': 6, 'document_reference': {'id': 'N/A'}}}, 'units': None, 'type': None, 'bounds': None, 'provenance': {'method': 'Skema TR Pipeline rules', 'timestamp': '2024-03-20T23:57:29.664501'}}], 'groundings': [], 'data_columns': None}}, {'type': 'document_collection', 'amr_element_id': None, 'payload': {'documents': [{'id': {'id': 'N/A'}, 'sour

### PDF Annotation Example

In [3]:
import requests

# Endpint configuration. Set the host correctly depending on where are you running this code snippet
ENDPOINT="https://api.askem.lum.ai"

params = {
	"annotate_skema":True,
	"annotate_mit": False
}

files = [("pdfs", ("ijerp.pdf", open("../../data/pdf/ijerph-18-09027.pdf", "rb")))]

response = requests.post(f"{ENDPOINT}/text-reading/integrated-pdf-extractions", params=params, files=files)
if response.status_code == 200:
	print(response.json())
else:
	print(f"Error {response.status_code} - {response.text}")

{'outputs': [{'data': {'attributes': [{'type': 'anchored_entity', 'amr_element_id': None, 'payload': {'id': {'id': 'E:-1621000196'}, 'mentions': [{'id': {'id': 'T:-2105346093'}, 'name': 'potential prevention of up', 'extraction_source': {'page': 1, 'block': 9, 'surrounding_passage': "It has shown success in limiting community spread of SARS 2003 [ 1 ] , and more recently , in Taiwan 's management of COVID-19 [ 2 ] .\nRecent hypothetical studies on masking by the states of New York and Washington suggests a potential prevention of up to 45 % of their projected death rates [ 3 ] .\nIn this study of potential face-mask usage for the general public , the authors investigated how public masking can control the infection , in the context of the USA .", 'char_start': 524, 'char_end': 550, 'document_reference': {'id': 'ijerp.pdf'}}, 'provenance': {'method': 'Skema TR Pipeline rules', 'timestamp': '2024-03-20T23:58:20.733738'}}], 'text_descriptions': [], 'value_descriptions': [{'id': {'id': 'T:

### Ground Entities to MIRA concepts using embedding similarity

In [5]:
import requests

# Endpint configuration. Set the host correctly depending on where are you running this code snippet
# ENDPOINT="https://api.askem.lum.ai"
ENDPOINT = "http://127.0.0.1:8000" # Uncomment this line to use the docker compose service

queries = {"queries": ["infected", "suceptible"]}
params = {"k": 5}
response = requests.post(f"{ENDPOINT}/text-reading/ground_to_mira", params=params, json=queries)

if response.status_code == 200:
    print(response.json())
else:
	print(f"Error {response.status_code} - {response.text}")

[[{'score': 0.8168461918830872, 'groundingConcept': {'id': 'apollosv:00000209', 'name': 'infected organism', 'description': 'An infectious agent host who is NOT able to transmit the infection to another organism of the same species.', 'synonyms': [], 'embedding': [-0.04194965958595276, -0.01585334725677967, -0.062012385576963425, 0.05832172557711601, -0.04215122014284134, 0.03349621966481209, -0.032431844621896744, 0.042882293462753296, -0.14511610567569733, -0.09979952871799469, 0.09288360178470612, 0.011159561574459076, -0.1610398143529892, 0.07069626450538635, 0.14190736413002014, -0.045939892530441284, -0.00435914471745491, -0.07486970722675323, -0.01833431050181389, 0.06107548624277115, -0.0005337423062883317, 0.03158421069383621, -0.003600242780521512, 0.03896883502602577, 0.0932014063000679, -0.07409533858299255, -0.03148200362920761, 0.048488423228263855, -0.04102925583720207, 0.078282929956913, -0.023712240159511566, 0.08119269460439682, -0.09483719617128372, 0.001924233278259