In [1]:
# Import main library
import os
import sys
PROJ_DIR = os.path.realpath(os.path.dirname(os.path.abspath('')))
sys.path.append(os.path.join(PROJ_DIR,'src'))
import ataxai

In [2]:
FILE_PATH = '../data/informe_ejemplo_en.txt'
print(f'Extracting entities from {FILE_PATH}...')
entities = ataxai.extract_entities_from_file(FILE_PATH)

print('Las entidades extraídas por en_ner_bc5cdr_md con negaciones son:')
for element in entities:
    negated = "NEGATED" if element._.negex else ""
    print(f'{element.text:32s} {element.label_} {negated}')

Extracting entities from ../data/informe_ejemplo_en.txt...
Las entidades extraídas por en_ner_bc5cdr_md con negaciones son:
headache                         DISEASE NEGATED
right homonymous hemianopsia     DISEASE 
bleeding                         DISEASE 
appetite                         DISEASE 
gait disturbances hearing deficits DISEASE 
disorientation                   DISEASE 
sensorineural hearing and visual deficits meningism DISEASE 
fever                            DISEASE NEGATED
ataxic gait                      DISEASE NEGATED
dementia forgetfulness           DISEASE NEGATED
apathy                           DISEASE NEGATED
right occipital bleeding         DISEASE 
haemosiderin                     DISEASE 
siderosis                        DISEASE 
lactate                          CHEMICAL 
glucose                          CHEMICAL 
vasculitis                       DISEASE 
antinuclear                      DISEASE 
ANA                              CHEMICAL 
desferrioxamine    

In [5]:
processed = {} # Keep track of processed entities to avoid duplicates
all_matches:list[ataxai.HPOMatch] = []
unmatched:list[str] = []
for entity in entities:
    if entity in processed: # Skip entities that have already been processed
        continue
    processed[element.text] = True
    print(f'Buscando "{entity.text}" en HPO...')
    matches = ataxai.get_HPO_matches(entity)

    if len(matches) > 0:
        for m in matches:
            print(f'\t{m.HPO.id} - {m.HPO.name} ({m.query} vs. {m.matching_HPO_term} - dist={m.distance})')
    else:
        print(f'\tNo se han detectado términos HPO para "{entity.text}"')



Buscando "headache" en HPO...
	HP:0030907 - Thunderclap headache (headache vs. Thunderclap headache - dist=1)
Buscando "right homonymous hemianopsia" en HPO...
	HP:0030516 - Homonymous hemianopia (homonymous hemianopsia vs. Homonymous hemianopsia - dist=0)
Buscando "bleeding" en HPO...
	HP:0100608 - Metrorrhagia (bleeding vs. Intermenstrual bleeding - dist=1)
	HP:0100608 - Metrorrhagia (bleeding vs. Metrorrhagia - dist=1)
Buscando "appetite" en HPO...
	HP:0004396 - Poor appetite (appetite vs. Decreased appetite - dist=1)
	HP:0004396 - Poor appetite (appetite vs. No appetite - dist=1)
	HP:0004396 - Poor appetite (appetite vs. Poor appetite - dist=1)
	HP:0004396 - Poor appetite (appetite vs. Poor appetite - dist=1)
Buscando "gait disturbances hearing deficits" en HPO...
	HP:0100543 - Cognitive impairment (deficits vs. Cognitive deficits - dist=1)
Buscando "disorientation" en HPO...
	HP:0001289 - Confusion (disorientation vs. Disorientation - dist=0)
Buscando "sensorineural hearing and vi



	HP:0100543 - Cognitive impairment (deficits vs. Cognitive deficits - dist=1)
Buscando "fever" en HPO...
	HP:0033399 - Persistent fever (fever vs. Persistent fever - dist=1)
Buscando "ataxic gait" en HPO...
	HP:0002066 - Gait ataxia (ataxic gait vs. Ataxic gait - dist=0)
	HP:0002066 - Gait ataxia (ataxic gait vs. Gait ataxia - dist=0)
Buscando "dementia forgetfulness" en HPO...
	HP:0030219 - Semantic dementia (dementia vs. Semantic dementia - dist=1)
Buscando "apathy" en HPO...
	HP:0000741 - Apathy (apathy vs. Apathy - dist=0)
Buscando "right occipital bleeding" en HPO...
	HP:0100608 - Metrorrhagia (bleeding vs. Intermenstrual bleeding - dist=1)
	HP:0100608 - Metrorrhagia (bleeding vs. Metrorrhagia - dist=1)
Buscando "haemosiderin" en HPO...
	No se han detectado términos HPO para "haemosiderin"
Buscando "siderosis" en HPO...
	No se han detectado términos HPO para "siderosis"
Buscando "lactate" en HPO...
	No se han detectado términos HPO para "lactate"
Buscando "glucose" en HPO...
	No s



	HP:0100608 - Metrorrhagia (bleeding vs. Intermenstrual bleeding - dist=1)
	HP:0100608 - Metrorrhagia (bleeding vs. Metrorrhagia - dist=1)
Buscando "death" en HPO...
	HP:0100758 - Gangrene (death vs. Gangrene - dist=1)
Buscando "subarachnoid haemorrhage" en HPO...
	HP:0002138 - Subarachnoid hemorrhage (subarachnoid haemorrhage vs. Subarachnoid haemorrhage - dist=0)
Buscando "herniation" en HPO...
	HP:0008441 - Herniation of intervertebral nuclei (herniation vs. Herniated disc - dist=1)
	HP:0008441 - Herniation of intervertebral nuclei (herniation vs. Herniated disk - dist=1)
Buscando "undifferentiated carcinoma left occipital and right parietooccipital but no primary tumour" en HPO...




	HP:0030447 - Merkel cell skin cancer (undifferentiated carcinoma vs. Cutaneous APUDoma - dist=2)
Buscando "siderosis" en HPO...
	No se han detectado términos HPO para "siderosis"
Buscando "bleeding" en HPO...
	HP:0100608 - Metrorrhagia (bleeding vs. Intermenstrual bleeding - dist=1)
	HP:0100608 - Metrorrhagia (bleeding vs. Metrorrhagia - dist=1)
Buscando "ataxia hypoacusis" en HPO...
	HP:0000365 - Hearing impairment (hypoacusis vs. Hypoacusis - dist=0)
Buscando "dementia" en HPO...
	HP:0030219 - Semantic dementia (dementia vs. Semantic dementia - dist=1)
Buscando "trauma" en HPO...
	HP:0500260 - Triggered by head trauma (trauma vs. Head trauma triggered symptoms - dist=3)
	HP:0500260 - Triggered by head trauma (trauma vs. Triggered by head trauma - dist=3)
Buscando "siderosis" en HPO...
	No se han detectado términos HPO para "siderosis"
