In [1]:
# /Users/teliov/TUD/Thesis/Medvice/Notebooks/data/04_06_new_data/data/split
# So we can use the *thesislib* package
import sys
import os

module_path = os.path.abspath("..")

if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
%matplotlib inline

In [3]:
from thesislib.utils import pathutils
from thesislib.utils.stringutils import slugify

In [4]:
import json
import pathlib

In [5]:
ai_med_data_file = pathutils.get_data_file('definitions/ai-med-plus.json')
with open(ai_med_data_file) as fp:
    ai_med_data = json.load(fp)

In [6]:
from glob import glob
import hashlib
import re

In [7]:
def get_symptom_condition_map(module_dir):
    module_files = glob(os.path.join(module_dir, "*.json"))
    symptom_map = {}
    condition_map = {}
    for file in module_files:
        with open(file) as fp:
            module = json.load(fp)
        states = module.get("states")
        for state in states.values():
            if state.get("type") != "Symptom" and state.get("type") != "ConditionOnset":
                continue
            if state.get("type") == "ConditionOnset":
                code = state.get("codes")[0]
                condition_map[code["code"]] = slugify(code.get("display"))
                continue
            symptom_code = state.get("symptom_code")
            slug = slugify(symptom_code.get("display"))
            slug_hash  = hashlib.sha224(slug.encode("utf-8")).hexdigest()
            symptom_map[slug_hash] = slug
    return symptom_map, condition_map

In [8]:
basic_module_dir = "/Users/teliov/TUD/symcat-to-synthea/output/module_ai_med_plus_basic"

In [9]:
basic_symptom_map, basic_condition_map = get_symptom_condition_map(basic_module_dir)

In [10]:
data_dir = pathutils.get_data_file("06_18_nlice_plus")
basic_data_dir = os.path.join(data_dir, "basic")

pathlib.Path(basic_data_dir).mkdir(exist_ok=True, parents=True)

In [11]:
basic_symptom_map_file = os.path.join(basic_data_dir, "symptom_db.json")
with open(basic_symptom_map_file, "w") as fp:
    json.dump(basic_symptom_map, fp, indent=4)

In [12]:
basic_condition_map_file = os.path.join(basic_data_dir, "condition_db.json")
with open(basic_condition_map_file, "w") as fp:
    json.dump(basic_condition_map, fp, indent=4)

In [13]:
from thesislib.utils.ml import process
from thesislib.utils.ml import runners, models

In [14]:
import importlib
_ = importlib.reload(process)
_ = importlib.reload(runners)

In [15]:
basic_data_csv = pathutils.get_data_file("06_18_nlice_plus/ai/output_med_ai_plus_basic/symptoms/csv/symptoms.csv")

In [16]:
basic_op_data_dir = os.path.join(basic_data_dir, "data")
# split into a train and test set
basic_train_file, basic_test_file = process.split_data(basic_data_csv, basic_op_data_dir)

basic_parsed_data_dir = os.path.join(basic_op_data_dir, "parsed")

In [17]:
import pandas as pd
import numpy as np

In [18]:
# parse the train set and let's train
basic_parsed_train = process.parse_data(
    basic_train_file,
    basic_condition_map_file,
    basic_symptom_map_file,
    basic_parsed_data_dir
)

In [19]:
# train with RF and then with NB
basic_rf_dir = os.path.join(basic_op_data_dir, "output/rf")
pathlib.Path(basic_rf_dir).mkdir(exist_ok=True, parents=True)

rfparams = models.RFParams()
rfparams.n_estimators = 200
rfparams.max_depth = None

run_ok = runners.train_ai_med_rf(
    basic_parsed_train,
    basic_symptom_map_file,
    basic_rf_dir,
    rfparams,
    "Basic AI-MED Run",
    "local-pc"
)

In [20]:
# train NB
basic_nb_dir = os.path.join(basic_op_data_dir, "output/nb")
pathlib.Path(basic_nb_dir).mkdir(exist_ok=True, parents=True)

run_ok = runners.train_ai_med_nb(
    basic_parsed_train,
    basic_symptom_map_file,
    basic_nb_dir,
    "Basic AI-MED Run",
    "local-pc"
)