In [1]:
import pathlib
import sys

PACKAGE_PATH = pathlib.Path.cwd().parent.resolve()
assert PACKAGE_PATH.exists()

sys.path.append(str(PACKAGE_PATH))

In [2]:
import json
import numpy as np

In [3]:
from CogniScan.encoder import Encoder
from CogniScan.index import RamIndex
from CogniScan.wrapper import DRPSNPTO_Wrapper

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
DATADIR = pathlib.Path.cwd().parent.resolve() / "data"
assert DATADIR.exists()

In [5]:
demo_data_path = DATADIR / "abstracts_en_1400.json"

with open(demo_data_path, "r", encoding="utf-8") as file:
    demo_data = json.load(file)

len(demo_data)

1458

In [6]:
texts = [
    entry["abstract"]
    for entry
    in demo_data
]

meta = [None] * len(texts)

In [7]:
encoder = Encoder("emilyalsentzer/Bio_ClinicalBERT")

In [8]:
onto = DRPSNPTO_Wrapper(str(DATADIR / "DRPSNPTO v1.0.owl"))

Ontology loaded successfully!
Ontology name: DRPSNPTO v1.0
Ontology base IRI: http://www.semanticweb.org/zhenyuzhang/ontologies/2020/DRPSNPTO/
Number of classes: 610
Number of object properties: 60
Number of data properties: 0
Number of individuals: 0


In [9]:
index = RamIndex()

In [10]:
index.build_index(
    texts,
    meta,
    encoder,
)

In [11]:
def get_top_k_respect_description(
    query: str,
    encoder: Encoder,
    index: RamIndex,
    onto: DRPSNPTO_Wrapper,
    delimiter = ",",
):
    symptoms = query.split(delimiter)
    deseases = []
    for symptom in symptoms:
        if symptom.strip():
            deseases.extend(
                onto.get_diseases_for_symptom(
                    symptom.strip()
                )
            )

    descriptions = [query] + [entry["disease_description"] for entry in deseases]

    return index.search(descriptions, encoder)

In [12]:
get_top_k_respect_description(
    "bizarre delusion, visual hallucination",
    encoder,
    index,
    onto,
)

(1, 768)


[{'score': np.float32(0.8999916),
  'text': "In the present paper, human startle reaction (SR) characteristics were estimated by the amplitude of eye-lid reflex and extent of monosynaptic H-reflexes increase. SR depended not only on the parameters of sound stimulation (strength, unexpectedness) but also on the subject's functional state (attention, emotional background). Differences are given of the SR parameters from the orienting reaction. It is supposed that SR is an independent form of emotional-motor reaction of an adaptive character.",
  'meta': None},
 {'score': np.float32(0.89696985),
  'text': "In-depth clinical psychological investigation of airline pilots with neurocirculatory dystonia (n=194, mean age 38.57 +/- 0.85) and essentially healthy control pilots (n=183, mean age 38.4+/-0.92) revealed distinctive features in NCD pilots' mentality and behavior including personality, interpersonal communication, type of thinking, stress reaction, protection tactics, and mental dysfun