In [1]:
import spacy
import json
import os
import re


# from negspacy.negation import Negex

from scispacy.linking import EntityLinker
from scispacy.abbreviation import AbbreviationDetector

In [2]:
# setting up nlp pipeline
# nlp = medspacy.load()
nlp = spacy.load("en_core_sci_sm")
nlp.add_pipe("abbreviation_detector")
nlp.add_pipe("scispacy_linker", config={"resolve_abbreviations": True, "linker_name": "umls"})


test_string_1 = """Myocardial infarction (MI), colloquially known as "heart attack,"
is caused by decreased or complete cessation of blood flow to a portion of the myocardium. 
Myocardial infarction may be "silent," and go undetected, or it could be a catastrophic event
leading to hemodynamic deterioration and sudden death. Most myocardial infarctions are due to 
underlying coronary artery disease, the leading cause of death in the United States. 
With coronary artery occlusion, the myocardium is deprived of oxygen. 
Prolonged deprivation of oxygen supply to the myocardium can lead to myocardial cell death and necrosis. 
Patients can present with chest discomfort or pressure that can radiate to the neck, 
jaw, shoulder, or arm. In addition to the history and physical exam, myocardial ischemia 
may be associated with ECG changes and elevated biochemical markers such as cardiac troponins. 
This activity describes the pathophysiology, evaluation, and management of myocardial infarction 
and highlights the role of the interprofessional team in improving care for affected patients. Hypertension"""

test_string_2 = """Hypertension"""

test_string_3 = ""


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [8]:
# call to set the case of the file, which will fill cuis list with relevant information
def set_case(case, section):
    with open("config.json") as json_file:
        cases = json.load(json_file)
    rules_fail = cases[case.lower()][section.lower()]["fail"]
    rules_review = cases[case.lower()][section.lower()]["review"]
    return (rules_fail, rules_review)

In [4]:
# search by cuis
def cui_search(nlp_doc, rule):
    term_name = rule["name"]
    search_cuis = rule["cuis"]
    for entity in nlp_doc.ents:
        for umls_ent in entity._.kb_ents:
            for cui in search_cuis:
                if umls_ent[0] == cui[0]:
                    continue
                else:
                    return [False, term_name]
    
    return [True, term_name]

In [10]:
# search by term using regex
def term_search(text, rule):
    print(rule["name"])
    print(rule["term"])
    term_name = rule["name"]
    search_term = rule["term"]
    if re.search(search_term, text, re.IGNORECASE):
        print(f"Search successful, {term_name}")
        return [True, term_name]
    else :
        print(f"Search unsuccessful, {term_name}")
        return [False, term_name]

In [9]:
def check(case, section, text):
    rules_fail, rules_review = set_case(case, section)
    # do nlp on text
    doc = nlp(text)
    pass_conditions = []
    # check terms needed to review first
    # for each rule needed to not fail
    for rule in rules_fail:
        # if searching by cui
        if 'cuis' in rule:
            flag = cui_search(doc, rule)
            if flag[0] == False: 
                flag.append("Fail")
                pass_conditions.append(flag)
                return pass_conditions
            else:
                flag.append("Needs review")

        # if searching by regex
        if 'term' in rule:
            flag = term_search(text, rule)
            if flag[0] == False: 
                flag.append("Fail")
                pass_conditions.append(flag)
                return pass_conditions
            else: 
                flag.append("Needs review")
            
    
    # for each rule needed to pass without review
    for rule in rules_review:
        # collecting the status of each term we're searching for
        
        # if searching by cui
        if 'cuis' in rule:
            flag = cui_search(doc, rule)
            if flag[0] == True:
                flag.append("Pass")
            else: 
                flag.append("Needs review")
            pass_conditions.append(flag)
            
        # if searching by regex
        if 'term' in rule:
            flag = term_search(text, rule)
            if flag[0] == True:
                flag.append("Pass")
            else: 
                flag.append("Needs review")
            pass_conditions.append(flag)
        
    # if all terms found, flag to pass without review
    if all(flag == True for (flag, _, _,) in pass_conditions):
        return {True, "All terms found", "Pass"}
    
    # otherwise, return missing terms
    else:
        missing = []
        for term in pass_conditions:
            if term[0] == False:
                missing.append(term)
        return missing


In [11]:
flag_1 = check("cage", "subjective", test_string_1)
flag_2 = check("cage", "subjective", test_string_2)
flag_3 = check("cage", "subjective", test_string_3)
print(flag_1)
print(flag_2)
print(flag_3)

hypertension
hypertension
Search successful, hypertension
myocardial infarction
(myocardial)\s+(infarction)
Search successful, myocardial infarction
hypertension
hypertension
Search successful, hypertension
myocardial infarction
(myocardial)\s+(infarction)
Search unsuccessful, myocardial infarction
hypertension
hypertension
Search unsuccessful, hypertension
{True, 'Pass', 'All terms found'}
[[False, 'myocardial infarction', 'Needs review']]
[[False, 'hypertension', 'Fail']]
