In [1]:
#!pip install ./medkit_lib-0.13a0-py3-none-any.whl[all]

Processing ./medkit_lib-0.13a0-py3-none-any.whl
medkit-lib is already installed with the same version as the provided wheel. Use --force-reinstall to force an installation of the wheel.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
from medkit.core import DocPipeline, PipelineStep
from medkit.text.segmentation import SentenceTokenizer, SyntagmaTokenizer
from medkit.text.context import FamilyDetector, NegationDetector
from medkit.text.ner import UMLSMatcher, EDSNLPDateMatcher

pipeline = DocPipeline(
    # receive raw text
    input="full_text",
    steps=[
        # split it in sentences
        PipelineStep(
            operation=SentenceTokenizer(),
            input="full_text",
            output="sentences",
        ),
        # detect sentences about family history
        PipelineStep(
            operation=FamilyDetector(),
            input="sentences",
        ),
        # split sentences in syntagmas
        PipelineStep(
            operation=SyntagmaTokenizer(),
            input="sentences",
            output="syntagmas",
        ),
        # detected negated syntagmas
        PipelineStep(
            operation=NegationDetector(),
            input="syntagmas",
        ),
        # find medical entities using the UMLS knowledge base
        PipelineStep(
            operation=UMLSMatcher(umls_dir="umls/", language="FRE"),
            input="syntagmas",
            output="medical_entities",
        ),
        # find dates
        PipelineStep(
            operation=EDSNLPDateMatcher(),
            input="syntagmas",
            output="dates",
        ),
    ],
    # keep medical entities and dates found
    output=["medical_entities", "dates"],
)

In [3]:
from medkit.core.text import TextDocument

# load medical report and apply pipeline
doc = TextDocument.from_file("report.txt")
pipeline.run([doc])

In [4]:
from spacy import displacy
from medkit.text.spacy.displacy_utils import medkit_doc_to_displacy

# enrich entity labels with negation [n] or family [f] indicators
def format(entity):
    label = entity.label
    negation_attr = entity.attrs.get(label="negation")[0]
    if negation_attr.value:
        return label + "[n]"
    family_attr = entity.attrs.get(label="family")[0]
    if family_attr.value:
        return label + "[f]"
    return label

# custom colors
COLORS = {"chemical": "#0eab81", "anatomy": "#0eab81", "procedure": "#0eab81", "disorder": "#0eab81", "disorder[n]": "#f5b0b0", "disorder[f]": "#dce9fc",}

# displacy document and entities using displacy
displacy_data = medkit_doc_to_displacy(doc, entity_formatter=format)
displacy.render(docs=displacy_data, manual=True, style="ent", options=dict(colors=COLORS))