In [205]:
# /Users/teliov/TUD/Thesis/Medvice/Notebooks/data/04_06_new_data/data/split
# So we can use the *thesislib* package
import sys
import os

module_path = os.path.abspath("..")

if module_path not in sys.path:
    sys.path.append(module_path)

In [206]:
from thesislib.utils import pathutils
import json

In [207]:
op_dir = pathutils.get_data_file("05_27_nlice/nlice/data")

In [208]:
ai_med_data_file = pathutils.get_data_file('definitions/ai-med-data.json')
with open(ai_med_data_file) as fp:
    ai_med_data = json.load(fp)

In [209]:
# first we need to determine the groupings for every symptom that exists
symptom_combinations = {}
nlice_key_set = []
for value in ai_med_data.values():
    symptoms = value.get("symptoms")
    for symptom in symptoms.values():
        slug = symptom.get("slug")
        nlice = symptom.get("nlice", {})
        if slug not in symptom_combinations:
            symptom_combinations[slug] = {
                "nature": ["0-None"],
                "vas": ["0-None"],
                "duration": ["0-None"],
                "location": ["0-None"]
            }
        combo = symptom_combinations[slug]
        for nlice_key, nlice_values in nlice.items():
            nlice_list = list(nlice_values.keys())
            nlice_key_set.append(nlice_key)
            curr_list = combo.get(nlice_key, [])
            new_list = sorted(list(set(curr_list + nlice_list)))
            
            combo[nlice_key] = new_list
        symptom_combinations[slug] = combo

In [210]:
# some replacements
symptom_combinations["headache"]["nature"] = ["0-None", "aching", "stabbing"]
symptom_combinations["abdominal-pain"]["location"] = ["0-None", "llq", "luq", "rlq", "ruq"]

In [211]:
reduction_map = {
    "headache": {
        "thunderclap": "stabbing",
        "throbbing": "aching"
    },
    "abdominal-pain": {
        "epigastric": "ruq",
        "left-abdomen": "llq",
        "right-abdomen": "rlq",
        "umbilical": "rlq",
        "upper-abdomen": "luq"
    }
}

In [212]:
# for each symptom, create a map of the possible combinations
# assumption is that only one nature is possible
import itertools

symptom_combination_encoding_map = {}
for key, value in symptom_combinations.items():
    nature = value.get("nature")
    vas = value.get("vas")
    duration = value.get("duration")
    location = value.get("location")
    
    combos = itertools.product(nature, vas, duration, location)
    combos = map(lambda item: ";".join(item).strip(), combos)
    combos = list(filter(lambda item: len(item) > 0, combos))
    
#     # since we can have multiple locations, we add them to the list as well
#     all_combos = []

#     for idx in range(len(location)):
#         selection = itertools.combinations(location, idx+1)
#         for item in selection:
#             for combo in combos:
#                 curr_combo = combo + ";" +  ";".join(item)
#                 all_combos.append(curr_combo)
    

    encoding = {}
    for idx, combo in enumerate(combos):
        encoding[combo] = idx+1
    symptom_combination_encoding_map[key] = encoding

In [213]:
len(symptom_combination_encoding_map["abdominal-pain"])

240

In [214]:
nlice_symptoms = [key for key in symptom_combination_encoding_map if len(symptom_combination_encoding_map[key]) > 1 ]

In [215]:
from glob import glob
import re
import hashlib
def slugify_condition(condition_name):
    condition_name = condition_name.lower()
    condition_name = re.sub(r"\s+", "-", condition_name)
    condition_name = re.sub(r"'", "-", condition_name)
    condition_name = re.sub(r"\(", "", condition_name)
    condition_name = re.sub(r"\)", "", condition_name)
    return condition_name

def get_symptom_condition_map(module_dir):
    module_files = glob(os.path.join(module_dir, "*.json"))
    symptom_map = {}
    condition_map = {}
    for file in module_files:
        with open(file) as fp:
            module = json.load(fp)
        states = module.get("states")
        for state in states.values():
            if state.get("type") != "Symptom" and state.get("type") != "ConditionOnset":
                continue
            if state.get("type") == "ConditionOnset":
                code = state.get("codes")[0]
                condition_map[code["code"]] = slugify_condition(code.get("display"))
                continue
            symptom_code = state.get("symptom_code")
            slug = slugify_condition(symptom_code.get("display"))
            slug_hash  = hashlib.sha224(slug.encode("utf-8")).hexdigest()
            symptom_map[slug_hash] = slug
    return symptom_map, condition_map

In [216]:
nlice_module_dir = "/Users/teliov/TUD/symcat-to-synthea/output/module_ai_med_adv"
nlice_symptom_map, nlice_condition_map = get_symptom_condition_map(nlice_module_dir)

In [219]:
actual_symptom_map = {key: value for key, value in nlice_symptom_map.items() if "nlice" not in value}

In [220]:
reverse_map = {value: key for key, value in actual_symptom_map.items()}

In [221]:
nlice_regex = re.compile("(.*)\-nlice-(.*)-(.*)")
nlice_transformation_map = {}
for key, value in nlice_symptom_map.items():
    match = nlice_regex.match(value)
    if match is None:
        xform = {
            "symptom": value,
            "nlice":  None,
            "value": None
        }
    else:
        xform = {
            "symptom": match.group(1),
            "nlice": match.group(2),
            "value": match.group(3)
        }
    nlice_transformation_map[key] = xform

In [222]:
def tranform_symptoms(symptom_str, transformation_map, symptom_combination_encoding_map, reduction_map):
    symptom_list = symptom_str.split(";")
    symptoms = {}
    for item in symptom_list:
        transformed = transformation_map.get(item)
        name = transformed.get("symptom")
        if name not in symptoms:
            symptoms[name] = {
                "nature": "0-None",
                "vas": "0-None",
                "duration": "0-None",
                "location": "0-None"
            }
        nlice = transformed.get("nlice")
        nlice_value = transformed.get("value")
        if nlice is not None and nlice_value is not None:
            if name in reduction_map and nlice_value in reduction_map[name]:
                nlice_value = reduction_map[name][nlice_value]
            symptoms[name][nlice] = nlice_value
        
    transformed_symptoms = []
    for key, value in symptoms.items():
        ordered = [value.get(item) for item in ["nature", "vas", "duration", "location"]]
        ordered = ";".join(ordered)
        encoding = symptom_combination_encoding_map[key][ordered]
        symptom_hash = hashlib.sha224(key.encode("utf-8")).hexdigest()
        transformed_symptoms.append("|".join([symptom_hash, str(encoding)]))
    return ";".join(transformed_symptoms)

In [223]:
m = "b1feb680c25ab9e5d88d282638d14ab9cf597c264dad9d6b6558e8dc;ba54bddaf72dfbdff9fc5aeee4364a993978f5c0d395442a33395302"
v = tranform_symptoms(m, nlice_transformation_map, symptom_combination_encoding_map, reduction_map)

In [224]:
v

'8f2c2f06a10a80de2d46ff07627b5d6cd6a19cb4ebc28c4c7c124f82|16;67fe1b0607dced2d78d47eb7b8f2b599c0823043d54f0d875d9e5505|17'

In [226]:
transformation_map_file = os.path.join(op_dir, "transformation_map.json")
encoding_map_file = os.path.join(op_dir, "encoding_map.json")
reduction_map_file = os.path.join(op_dir, "reduction_map.json")
with open(transformation_map_file, "w") as fp:
    json.dump(nlice_transformation_map, fp, indent=4)
with open(encoding_map_file, "w") as fp:
    json.dump(symptom_combination_encoding_map, fp, indent=4)
with open(reduction_map_file, "w") as fp:
    json.dump(reduction_map, fp, indent=4)

In [227]:
nlice_symptoms

['headache', 'limb-weakness', 'abdominal-pain', 'fever']

In [228]:
nlice_symptoms

['headache', 'limb-weakness', 'abdominal-pain', 'fever']

In [230]:
exc = {key: reverse_map[key] for key in nlice_symptoms}

In [231]:
exc

{'headache': '67fe1b0607dced2d78d47eb7b8f2b599c0823043d54f0d875d9e5505',
 'limb-weakness': '5a541f8150a6de2483d6c9b24d71e90eb1af2b96e7f4db49979f2e2a',
 'abdominal-pain': '8f2c2f06a10a80de2d46ff07627b5d6cd6a19cb4ebc28c4c7c124f82',
 'fever': 'dd52980213ed3f58007375b494cf13182420dd104acf39cb84c683ab'}

In [232]:
symptom_combination_encoding_map["headache"]
# nature, vas, duration, location
throbbing, mild, short, None
"throbbing-mild-short-none"

{'0-None;0-None;0-None;0-None': 1,
 '0-None;0-None;long;0-None': 2,
 '0-None;0-None;medium;0-None': 3,
 '0-None;0-None;short;0-None': 4,
 '0-None;mild;0-None;0-None': 5,
 '0-None;mild;long;0-None': 6,
 '0-None;mild;medium;0-None': 7,
 '0-None;mild;short;0-None': 8,
 '0-None;moderate;0-None;0-None': 9,
 '0-None;moderate;long;0-None': 10,
 '0-None;moderate;medium;0-None': 11,
 '0-None;moderate;short;0-None': 12,
 '0-None;severe;0-None;0-None': 13,
 '0-None;severe;long;0-None': 14,
 '0-None;severe;medium;0-None': 15,
 '0-None;severe;short;0-None': 16,
 'aching;0-None;0-None;0-None': 17,
 'aching;0-None;long;0-None': 18,
 'aching;0-None;medium;0-None': 19,
 'aching;0-None;short;0-None': 20,
 'aching;mild;0-None;0-None': 21,
 'aching;mild;long;0-None': 22,
 'aching;mild;medium;0-None': 23,
 'aching;mild;short;0-None': 24,
 'aching;moderate;0-None;0-None': 25,
 'aching;moderate;long;0-None': 26,
 'aching;moderate;medium;0-None': 27,
 'aching;moderate;short;0-None': 28,
 'aching;severe;0-None