In [None]:
from pathlib import Path

from spacy.lang.en import English
from tqdm.notebook import tqdm

from discharge_summaries.preprocessing.preprocess_snomed import Snomed

In [None]:
SNOMED_DIR = Path.cwd().parent / "data" / "snomed"

INT_DATE = "20230131"
UK_EXT_DIR = "20230802"
UK_DRUG_DATE = "20230927"

In [None]:
int_terminology_dir = (
    SNOMED_DIR
    / f"SnomedCT_InternationalRF2_PRODUCTION_{INT_DATE}T120000Z"
    / "Snapshot"
    / "Terminology"
)
uk_ext_terminology_dir = (
    SNOMED_DIR
    / f"SnomedCT_UKClinicalRF2_PRODUCTION_{UK_EXT_DIR}T000001Z"
    / "Snapshot"
    / "Terminology"
)
uk_drug_ext_terminology_dir = (
    SNOMED_DIR
    / f"SnomedCT_UKDrugRF2_PRODUCTION_{UK_DRUG_DATE}T000001Z"
    / "Snapshot"
    / "Terminology"
)

In [None]:
int_concepts_fpath = int_terminology_dir / f"sct2_Concept_Snapshot_INT_{INT_DATE}.txt"
uk_ext_concepts_fpath = (
    uk_ext_terminology_dir / f"sct2_Concept_UKCLSnapshot_GB1000000_{UK_EXT_DIR}.txt"
)
uk_drug_ext_concepts_fpath = (
    uk_drug_ext_terminology_dir
    / f"sct2_Concept_UKDGSnapshot_GB1000001_{UK_DRUG_DATE}.txt"
)

In [None]:
int_description_fpath = (
    int_terminology_dir / f"sct2_Description_Snapshot-en_INT_{INT_DATE}.txt"
)
uk_ext_description_fpath = (
    uk_ext_terminology_dir
    / f"sct2_Description_UKCLSnapshot-en_GB1000000_{UK_EXT_DIR}.txt"
)
uk_drug_ext_description_fpath = (
    uk_drug_ext_terminology_dir
    / f"sct2_Description_UKDGSnapshot-en_GB1000001_{UK_DRUG_DATE}.txt"
)

In [None]:
int_relations_fpath = (
    int_terminology_dir / f"sct2_Relationship_Snapshot_INT_{INT_DATE}.txt"
)
uk_ext_relations_fpath = (
    uk_ext_terminology_dir
    / f"sct2_Relationship_UKCLSnapshot_GB1000000_{UK_EXT_DIR}.txt"
)
uk_drug_ext_relations_fpath = (
    uk_drug_ext_terminology_dir
    / f"sct2_Relationship_UKDGSnapshot_GB1000001_{UK_DRUG_DATE}.txt"
)

In [None]:
snomed = Snomed.load_from_raw_snomed_files(
    int_concepts_fpath,
    uk_ext_concepts_fpath,
    uk_drug_ext_concepts_fpath,
    int_description_fpath,
    uk_ext_description_fpath,
    uk_drug_ext_description_fpath,
    int_relations_fpath,
    uk_ext_relations_fpath,
    uk_drug_ext_relations_fpath,
)

In [None]:
snomed.save(SNOMED_DIR)

In [None]:
reload = Snomed.load(SNOMED_DIR)

In [None]:
reload.cui_to_preferred_term[127362006]

In [None]:
reload.cui_to_synonyms[127362006]

In [None]:
reload.get_cuis("flu")

In [None]:
reload.get_child_cuis(34887511000001102)

In [None]:
cuis = reload.get_cuis("heart attack")
cuis

In [None]:
tokenizer = English().tokenizer
heart_attack_matcher = reload.get_phrase_matcher({"heart attack"}, tokenizer)

In [None]:
heart_attack_matcher(
    tokenizer("The patient had old myocardial infarction"), as_spans=True
)

In [None]:
snomed.get_cuis("morphine")

In [None]:
snomed.cui_to_preferred_term[773372004]

In [None]:
snomed.get_cuis("morphine")

In [None]:
drug_relations_df = Snomed._parse_snomed_file(uk_drug_ext_description_fpath)

In [None]:
drug_relations_df = Snomed._parse_snomed_file(uk_drug_ext_relations_fpath)
drug_relations_df = drug_relations_df[drug_relations_df.active == "1"].copy()
drug_relations_df = drug_relations_df[
    drug_relations_df.typeId == Snomed.IS_A_RELATIONSHIP_ID
].copy()
drug_relations_df = drug_relations_df.drop_duplicates()
drug_relations_df.rename(
    columns={"sourceId": "child_cui", "destinationId": "parent_cui"},
    inplace=True,
)

In [None]:
drug_relations_df_grouped = (
    drug_relations_df.groupby("child_cui")["parent_cui"]
    .apply(set)
    .reset_index()
    .set_index("child_cui")
)

In [None]:
def recurse(child_cui, drug_relations_df_grouped):
    if child_cui not in drug_relations_df_grouped.index:
        return {child_cui}
    else:
        return {
            cui
            for parent_cui in drug_relations_df_grouped.loc[child_cui]["parent_cui"]
            for cui in recurse(parent_cui, drug_relations_df_grouped)
        }


recurse("317320001", drug_relations_df_grouped)

In [None]:
answer = {
    cui
    for child_cui in tqdm(drug_relations_df_grouped.index)
    for cui in recurse(child_cui, drug_relations_df_grouped)
}
answer