In [1]:
import ast
import json
import random
import os
import copy
import sys
from collections import namedtuple, defaultdict, OrderedDict

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import hashlib

In [2]:
class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        dict.__init__(self, *args, **kwargs)
        self.__dict__ = self
    def copy(self):
        return type(self)(**{k: v.copy() if isinstance(v, AttrDict) else v
            for k, v in self.items()})

In [3]:
def clean(data):
    result = data.replace("\r\n", " ")
    result = result.replace("\r", " ")
    result = result.replace("\n", " ")
    result = result.replace(",", " ")
    return result

def load_json_data(data_filepath, key_name):
    with open(data_filepath) as fp:
        data = json.load(fp)
    index_2_key = sorted(list(data.keys()))
    for k in index_2_key:
        data[k][key_name] = clean(data[k][key_name])
    name_2_index = {data[index_2_key[i]][key_name]: i for i in range(len(index_2_key))}
    data_names = [data[k][key_name] for k in index_2_key]    
    return index_2_key, name_2_index, data

In [4]:
fr_evidence_2_statements = {
    "fievre": "I have fever",
    "douleurxx_endroitducorps": "The pain locations are",
    "douleurxx": "I feel pain",
    "douleurxx_irrad": "The pain radiates to these locations",
    "douleurxx_carac": "The pain is",
    "douleurxx_soudain": "On a scale of 0-10, the pace at which the pain appear is",
    "douleurxx_intens": "On a scale of 0-10, the pain intensity is",
    "douleurxx_precis": "On a scale of 0-10, the pain's location precision is",
    "pdc": "I lost consciousness",
    "lesions_peau_endroitducorps": "The regions affected by the rash are",
    "lesions_peau": "I have rashes",
    "lesions_peau_couleur": "The color of the rash is",
    "lesions_peau_intens": "On a scale of 0-10, the pain intensity caused by the rash is",
    "lesions_peau_elevee": "On a scale of 0-10, the rash swelling is",
    "lesions_peau_prurit": "On a scale of 0-10, the severity of the rash itching is",
    "lesions_peau_plusqu1cm": "The rash lesions are larger than 1cm",
    "lesions_peau_desquame": "The rash lesions peel off",
    "pale": "My skin is much paler than usual",
    "palpit": "I feel palpitations",
    "vo_sg": "I have recently thrown up blood or something resembling coffee beans",
    "melena": "I have recently had stools that were black (like coal)",
    "diarrhee": "I have diarrhea or an increase in stool frequency",
    "etouff": "I feel like choking or suffocating",
    "fatig_mod": "I constantly feel fatigued ",
    "irritable": "I feel more irritable and  my mood has been very unstable recently",
    "faible": "I feel like I am about to faint",
    "nausee": "I have nausea",
    "friss": "I have shivers",
    "ww_respi": "I have pain which increases when I breathe in deeply",
    "perte_appet": "I have a loss of appetite",
    "rectorragie": "I noticed light red blood or blood clots in my stool",
    "perte_poids": "I had an involuntary weight loss over the last 3 months",
    "pyrosis": "I have a burning sensation that starts in my stomach then goes up into my throat, and can be associated with a bitter taste in my mouth?",
    "bw_bending": "I have pain that improves when I lean forward",
    "ww_effort": "I have symptoms that increase with physical exertion but alleviate with rest",
    "footnumb": "I have numbness, loss of sensation or tingling in the feet",
    "dyspn": "I am experiencing shortness of breath or difficulty breathing in a significant way",
    "pertes_vag": "I have had vaginal discharge",
    "ballon_abdo": "I feel my abdomen is bloated or distended",
    "larmes": "I feel that my eyes produce excessive tears",
    "rhino_clair": "I have nasal congestion",
    "fatig_ext": "I feel so tired that I am unable to do my usual activities",
    "convulsion": "I had an absence episode and lost consciousness",
    "paralysie_visage": "I have had weakness or paralysis on one side of the face",
    "msk_dlr": "I have diffuse muscle pain",
    "ww_bouger": "I have pain that is increased with movement",
    "toux": "I am coughing",
    "ww_dd": "My symptoms worse when lying down and alleviated while sitting up",
    "ww_bouffe": "I have symptoms that get worse after eatin",
    "dysp_effort": "I feel out of breath with minimal physical effort",
    "gain_poids": "I have gained weight recently",
    "anxiete_s": "I feel anxious",
    "diaph": "I have had significantly increased sweating",
    "gorge_dlr": "I have a sore throat",
    "adp_dlr": "I have swollen and painful lymph nodes",
    "etourdissement": "I feel slightly dizzy or lightheaded",
    "hta": "I have high blood pressure",
    "dysphagie": "I have difficulty swallowing",
    "erytheme_occ": "I have noticed a widespread redness in one or both eyes",
    "trismus": "I suddenly have difficulty to open my mouth or have jaw pain when opening it",
    "dysarthrie": "I have difficulty articulating words",
    "laryngospasme": "I have felt like I was suffocating for a very short time with an inability to breathe or speak",
    "sialorhee": "I have noticed that I produce more saliva than usual",
    "confusion": "I have felt confused or disorientated lately",
    "volume_parole": "I have noticed that the tone of my voice has become deeper, softer or hoarse",
    "ulc\u00e8res_bouche": "I have painful mouth ulcers",
    "diplopie": "I have the perception of seeing two images of a single object seen overlapping or adjacent to each other",
    "toux_sev": "I have intense coughing fits",
    "claud_m\u00e2choire": "I have pain in my jaw",
    "ptose": "I have a hard time opening/raising one or both eyelids",
    "faiblesse_msmi": "I feel weakness in both arms and both legs",
    "fatigabilit\u00e9_msk": "My symptoms of muscle weakness increase with fatigue and stress",
    "vo_violent": "I have vomited several times",
    "posttus_emesis": "I vomitted after coughing",
    "insp_siffla": "My breathing is noisy after coughing spells",
    "rds_sg": "I have noticed unusual bleeding or bruising",
    "oedeme_endroitducorps": "The swelling locations are",
    "oedeme": "I have swelling in one or more areas of your body",
    "crach_sg": "I have been coughing up blood",
    "stridor": "I have noticed a high pitched sound when breathing in",
    "faiblesse faciale": "I have noticed weakness in my facial muscles and eyes",
    "paresthesies_bilat": "I have recently had numbness, loss of sensation or tingling, in both arms and legs and around my mouth",
    "rds_paralys_gen": "I have had paralysis in my limbs or in my face",
    "contact_allergie": "I have eaten something that I have an allergy to",
    "wheez": "I have noticed a wheezing sound when I exhale",
    "obstipation": "I have been able to pass stools or gas since my symptoms increased",
    "boire_ped": "I have a decrease in appetite",
    "ww_valsalva": "My symptoms increase with coughing or an effort like lifting a weight",
    "toux_Aboy": "I have a whooping cough",
    "ww_nuit": "My symptoms are more prominent at night",
    "surg1": "I have had surgery within the last month",
    "prurit_occ": "I have severe itching in one or both eyes",
    "expecto": "My cough produces colored or more abundant sputum than usual",
    "angor_accelere": "My symptoms have worsened over the last 2 weeks and progressively less effort is required to cause the symptoms",
    "angor_repos": "I have chest pain even at rest",
    "prurit_nasal": "My nose or the back of my throat is itchy",
    "rds_paresthesie_gen": "I have recently had numbness, loss of sensation or tingling on my body",
    "rds_deg": "I have noticed a generalized and vague discomfort with diffuse  muscle aches",
    "rds_anorexie": "I have been unintentionally losing weight",
    "m\u00e9norr": "I have very long menstruation periods",
    "flushing": "My cheeks suddenly turned red",
    "spasme_trapeze": "I feel that muscle spasms or soreness in my neck are keeping me from turning my head to one side",
    "selles_pale": "I have pale stools and dark urine",
    "pls_irreg": "I feel my heart is beating very irregularly in a disorganized pattern",
    "spasmes_msk": "I have annoying muscle spasms in my face/neck",
    "protu_langue": "I have trouble keeping my tongue in my mouth",
    "regard_d\u00e9vi\u00e9": "I am unable to control the direction of my eyes",
    "dyspn_noct": "I have bouts of choking or shortness of breath that wake me up at night",
    "f10.129": "I drink alcohol excessively",
    "naco": "I am taking a new oral anticoagulants",
    "psy_depers": "I feel like I am detached from my own body or my surroundings",
    "impression_mort": "I feel like I am dying",
    "rhino_pur": "I have greenish/yellowish nasal discharge",
    "hyponos": "I have lost my sense of smell",
    "apnee": "I temporarily stop breathing while I am asleep",
    "i25.1": "I have had a heart attack",
    "i73.9": "I have a problem with poor blood circulation",
    "i10": "I take medications to treat high blood pressure",
    "f17.210": "I smoke cigarettes",
    "e78.5": "I am taking medications to treat high cholesterol",
    "contact": "I have been in contact with a person with similar symptoms in the past 2 weeks",
    "horm1": "I currently take hormones",
    "e10_e11": "I have diabetes",
    "preg1": "I am pregnant",
    "itss_risque": "I have had unprotected sex with more than one partner in the last 6 months",
    "e66": "I am significantly overweight compared to people of the same height as me",
    "ains": "I have recently taken anti-inflammatory drugs",
    "pneumothorax": "Some family members have had a pneumothorax",
    "Z99.2": "I am currently undergo dialysis",
    "atcd_cluster": "Some family members have been diagnosed with cluster headaches",
    "rx_vasodil": "I take medications that dilates your blood vessels",
    "s09.90": "I have had a head trauma",
    "histfammigraine": "Some family members are known to have migraines",
    "i60-i69": "I have had a stroke",
    "z84.89": "I am more likely to develop common allergies than the general population",
    "drogues_stimul": "I regularly take stimulant drugs",
    "dayc": "I work in a daycare",
    "g20": "I have the Parkinson's disease",
    "cancer_m\u00e9ta": "I have a metastatic cancer",
    "osteoporose": "I am being treated for osteoporosis",
    "j45": "I have had to use a bronchodilator in the past",
    "hernie_hiatale": "I have a hiatal hernia",
    "j06.9": "I have had a cold in the last 2 weeks",
    "crowd": "I live with 4 or more people",
    "HIV": "I am infected with the human immunodeficiency virus (HIV)",
    "z92.25": "I am immunosuppressed",
    "drogues_IV": "I am currently using intravenous drugs",
    "crohn_cu": "I suffer from Crohn's disease or ulcerative colitis",
    "atcd_its": "I have had a sexually transmitted infection",
    "move": "I exercise regularly, 4 times per week or more",
    "smokingpast": "I am a former smoker",
    "v85.0": "I am underweight",
    "k74": "I have liver cirrhosis",
    "sex_vih": "I have had sexual intercourse with an HIV-positive partner in the past 12 months",
    "Mauv_aliment": "I have a poor diet",
    "atcd_anem": "I have had a diagnosis of anemia",
    "atcd_fam_anem": "Some family members have been diagnosed with anemia",
    "irc": "I have chronic kidney failure",
    "perinatality": "I was born prematurely",
    "atcdfam_mg": "Some family members have been diagnosed with myasthenia gravis",
    "ap_par": "I have Rheumatoid Arthritis",
    "f32": "I have been diagnosed with depression",
    "cont_coq": "I have been in contact with someone who has had whoooping cough",
    "B34.9": "I have recently had a viral infection",
    "H6690": "I have recently been treated with an oral antibiotic for an ear infection",
    "c00-d48": "I have an active cancer",
    "vaccination": "my vaccinations are up to date",
    "malf_cardiaque": "I have a known heart defect",
    "ap_hypert4": "I have been diagnosed with hyperthyroidism",
    "bode": "I have severe Chronic Obstructive Pulmonary Disease",
    "ap_valve": "I have a known issue with one of my heart valves",
    "drink_energie": "I regularly consume energy drinks",
    "cafe": "I regularly drink coffee or tea",
    "wakeup": "I have recently taken decongestants or substances that may have stimulant effects",
    "allergie_sev": "I have a known severe food allergy",
    "cortico": "I take corticosteroids",
    "J05.0": "Some family members have had croup",
    "f41.9": "I suffer from chronic anxiety",
    "i80": "I have had deep vein thrombosis",
    "immob1": "I have been unable to move or get up for more than 3 consecutive days within the last 4 weeks",
    "ap_asian": "I am of Asian descent",
    "ap_fk": "I have cystic fibrosis",
    "j17_j18": "I have had pneumonia",
    "j44_j42": "I have a chronic obstructive pulmonary disease",
    "urban1": "I live in in a big city",
    "fam_atopie": "Some family members suffer from allergies, hay fever or eczema",
    "fam_j45": "Some family members have asthma",
    "menarche_hat": "I have my first menstrual period before the age of 12",
    "allait_prol": "I have breastfed one of my children for more than 9 months",
    "sahs": "I have been diagnosed with obstructive sleep apnea",
    "i50": "I have heart failure",
    "patho_endo": "I was diagnosed with a hormonal dysfunction",
    "scombroide": "I ate tuna and Swiss cheese before the reaction occurred",
    "K86.1": "I have chronic pancreatitis",
    "k21": "I have been diagnosed with gastroesophageal reflux",
    "ap_pneumothorax": "I have had a spontaneous pneumothorax",
    "z77.22": "I am exposed to secondhand cigarette smoke on a daily basis",
    "antipsy_r\u00e9cent": "I have taken some antipsychotic medications within the last 7 days",
    "nau_psy_recent": "I have been treated in hospital recently for nausea, agitation, intoxication or aggressive behavior and received medication via an intravenous or intramuscular route",
    "lymphoed\u00e8me": "I have had surgery to remove lymph nodes",
    "norvasc": "I take a calcium channel blockers as medications",
    "synd_nephro": "I have a known kidney problem resulting in an inability to retain proteins",
    "I30": "I have had a pericarditis",
    "atcdpsyfam": "Some members of my immediate family have a psychiatric illness",
    "z82.49": "I have close family members who had a cardiovascular disease problem before the age of 50",
    "ebolacase": "I have been in contact with anyone infected with the Ebola virus in the last month",
    "m79.7": "I suffer from fibromyalgia",
    "hosptisasm": "I have been hospitalized for an asthma attack in the past year",
    "criseasthm": "I have had more than one asthma attack in the past year",
    "j32": "I have been diagnosed with chronic sinusitis",
    "eampoc1": "I have had one or several flare ups of chronic obstructive pulmonary disease",
    "tagri": "I work in agriculture",
    "tmine": "I work in the mining sector",
    "tconst": "I work in construction",
    "j34.2": "I have a deviated nasal septum",
    "j33": "I have polyps in my nose",
    "momasthma": "My mother suffers from asthma",
    "suburb": "I live in the suburbs",
    "rural": "I live in a rural area",
    "z80.1": "Some family members have had lung cancer",
    "z80.0": "Some family members have been diagnosed with pancreatic cancer",
    "J81": "I have had fluid in your lungs",
    "trav1": "I have recently travelled to",
}

In [5]:
en_evidence_2_statements = {
     'E_91': 'I have fever',
     'E_55': 'The pain locations are',
     'E_53': 'I feel pain',
     'E_57': 'The pain radiates to these locations',
     'E_54': 'The pain is',
     'E_59': 'On a scale of 0-10, the pace at which the pain appear is',
     'E_56': 'On a scale of 0-10, the pain intensity is',
     'E_58': "On a scale of 0-10, the pain's location precision is",
     'E_159': 'I lost consciousness',
     'E_133': 'The regions affected by the rash are',
     'E_129': 'I have rashes',
     'E_130': 'The color of the rash is',
     'E_134': 'On a scale of 0-10, the pain intensity caused by the rash is',
     'E_132': 'On a scale of 0-10, the rash swelling is',
     'E_136': 'On a scale of 0-10, the severity of the rash itching is',
     'E_135': 'The rash lesions are larger than 1cm',
     'E_131': 'The rash lesions peel off',
     'E_154': 'My skin is much paler than usual',
     'E_155': 'I feel palpitations',
     'E_210': 'I have recently thrown up blood or something resembling coffee beans',
     'E_140': 'I have recently had stools that were black (like coal)',
     'E_51': 'I have diarrhea or an increase in stool frequency',
     'E_75': 'I feel like choking or suffocating',
     'E_89': 'I constantly feel fatigued ',
     'E_114': 'I feel more irritable and  my mood has been very unstable recently',
     'E_82': 'I feel like I am about to faint',
     'E_148': 'I have nausea',
     'E_94': 'I have shivers',
     'E_220': 'I have pain which increases when I breathe in deeply',
     'E_161': 'I have a loss of appetite',
     'E_179': 'I noticed light red blood or blood clots in my stool',
     'E_162': 'I had an involuntary weight loss over the last 3 months',
     'E_173': 'I have a burning sensation that starts in my stomach then goes up into my throat, and can be associated with a bitter taste in my mouth?',
     'E_33': 'I have pain that improves when I lean forward',
     'E_218': 'I have symptoms that increase with physical exertion but alleviate with rest',
     'E_93': 'I have numbness, loss of sensation or tingling in the feet',
     'E_66': 'I am experiencing shortness of breath or difficulty breathing in a significant way',
     'E_163': 'I have had vaginal discharge',
     'E_30': 'I feel my abdomen is bloated or distended',
     'E_127': 'I feel that my eyes produce excessive tears',
     'E_181': 'I have nasal congestion',
     'E_88': 'I feel so tired that I am unable to do my usual activities',
     'E_43': 'I had an absence episode and lost consciousness',
     'E_156': 'I have had weakness or paralysis on one side of the face',
     'E_144': 'I have diffuse muscle pain',
     'E_216': 'I have pain that is increased with movement',
     'E_201': 'I am coughing',
     'E_217': 'My symptoms worse when lying down and alleviated while sitting up',
     'E_215': 'I have symptoms that get worse after eatin',
     'E_64': 'I feel out of breath with minimal physical effort',
     'E_96': 'I have gained weight recently',
     'E_16': 'I feel anxious',
     'E_50': 'I have had significantly increased sweating',
     'E_97': 'I have a sore throat',
     'E_9': 'I have swollen and painful lymph nodes',
     'E_76': 'I feel slightly dizzy or lightheaded',
     'E_102': 'I have high blood pressure',
     'E_65': 'I have difficulty swallowing',
     'E_74': 'I have noticed a widespread redness in one or both eyes',
     'E_205': 'I suddenly have difficulty to open my mouth or have jaw pain when opening it',
     'E_63': 'I have difficulty articulating words',
     'E_128': 'I have felt like I was suffocating for a very short time with an inability to breathe or speak',
     'E_190': 'I have noticed that I produce more saliva than usual',
     'E_39': 'I have felt confused or disorientated lately',
     'E_212': 'I have noticed that the tone of my voice has become deeper, softer or hoarse',
     'E_206': 'I have painful mouth ulcers',
     'E_52': 'I have the perception of seeing two images of a single object seen overlapping or adjacent to each other',
     'E_203': 'I have intense coughing fits',
     'E_38': 'I have pain in my jaw',
     'E_172': 'I have a hard time opening/raising one or both eyelids',
     'E_84': 'I feel weakness in both arms and both legs',
     'E_90': 'My symptoms of muscle weakness increase with fatigue and stress',
     'E_211': 'I have vomited several times',
     'E_166': 'I vomitted after coughing',
     'E_112': 'My breathing is noisy after coughing spells',
     'E_178': 'I have noticed unusual bleeding or bruising',
     'E_152': 'The swelling locations are',
     'E_151': 'I have swelling in one or more areas of your body',
     'E_45': 'I have been coughing up blood',
     'E_194': 'I have noticed a high pitched sound when breathing in',
     'E_83': 'I have noticed weakness in my facial muscles and eyes',
     'E_157': 'I have recently had numbness, loss of sensation or tingling, in both arms and legs and around my mouth',
     'E_176': 'I have had paralysis in my limbs or in my face',
     'E_42': 'I have eaten something that I have an allergy to',
     'E_214': 'I have noticed a wheezing sound when I exhale',
     'E_150': 'I have been able to pass stools or gas since my symptoms increased',
     'E_32': 'I have a decrease in appetite',
     'E_221': 'My symptoms increase with coughing or an effort like lifting a weight',
     'E_202': 'I have a whooping cough',
     'E_219': 'My symptoms are more prominent at night',
     'E_196': 'I have had surgery within the last month',
     'E_170': 'I have severe itching in one or both eyes',
     'E_77': 'My cough produces colored or more abundant sputum than usual',
     'E_13': 'My symptoms have worsened over the last 2 weeks and progressively less effort is required to cause the symptoms',
     'E_14': 'I have chest pain even at rest',
     'E_169': 'My nose or the back of my throat is itchy',
     'E_177': 'I have recently had numbness, loss of sensation or tingling on my body',
     'E_175': 'I have noticed a generalized and vague discomfort with diffuse  muscle aches',
     'E_174': 'I have been unintentionally losing weight',
     'E_145': 'I have very long menstruation periods',
     'E_92': 'My cheeks suddenly turned red',
     'E_192': 'I feel that muscle spasms or soreness in my neck are keeping me from turning my head to one side',
     'E_188': 'I have pale stools and dark urine',
     'E_164': 'I feel my heart is beating very irregularly in a disorganized pattern',
     'E_193': 'I have annoying muscle spasms in my face/neck',
     'E_168': 'I have trouble keeping my tongue in my mouth',
     'E_180': 'I am unable to control the direction of my eyes',
     'E_67': 'I have bouts of choking or shortness of breath that wake me up at night',
     'E_78': 'I drink alcohol excessively',
     'E_146': 'I am taking a new oral anticoagulants',
     'E_171': 'I feel like I am detached from my own body or my surroundings',
     'E_111': 'I feel like I am dying',
     'E_182': 'I have greenish/yellowish nasal discharge',
     'E_103': 'I have lost my sense of smell',
     'E_23': 'I temporarily stop breathing while I am asleep',
     'E_105': 'I have had a heart attack',
     'E_108': 'I have a problem with poor blood circulation',
     'E_104': 'I take medications to treat high blood pressure',
     'E_79': 'I smoke cigarettes',
     'E_71': 'I am taking medications to treat high cholesterol',
     'E_41': 'I have been in contact with a person with similar symptoms in the past 2 weeks',
     'E_100': 'I currently take hormones',
     'E_69': 'I have diabetes',
     'E_167': 'I am pregnant',
     'E_115': 'I have had unprotected sex with more than one partner in the last 6 months',
     'E_70': 'I am significantly overweight compared to people of the same height as me',
     'E_10': 'I have recently taken anti-inflammatory drugs',
     'E_165': 'Some family members have had a pneumothorax',
     'E_8': 'I am currently undergo dialysis',
     'E_25': 'Some family members have been diagnosed with cluster headaches',
     'E_184': 'I take medications that dilates your blood vessels',
     'E_185': 'I have had a head trauma',
     'E_99': 'Some family members are known to have migraines',
     'E_107': 'I have had a stroke',
     'E_226': 'I am more likely to develop common allergies than the general population',
     'E_62': 'I regularly take stimulant drugs',
     'E_49': 'I work in a daycare',
     'E_95': "I have the Parkinson's disease",
     'E_37': 'I have a metastatic cancer',
     'E_153': 'I am being treated for osteoporosis',
     'E_124': 'I have had to use a bronchodilator in the past',
     'E_98': 'I have a hiatal hernia',
     'E_116': 'I have had a cold in the last 2 weeks',
     'E_48': 'I live with 4 or more people',
     'E_2': 'I am infected with the human immunodeficiency virus (HIV)',
     'E_227': 'I am immunosuppressed',
     'E_61': 'I am currently using intravenous drugs',
     'E_47': "I suffer from Crohn's disease or ulcerative colitis",
     'E_27': 'I have had a sexually transmitted infection',
     'E_143': 'I exercise regularly, 4 times per week or more',
     'E_191': 'I am a former smoker',
     'E_208': 'I am underweight',
     'E_126': 'I have liver cirrhosis',
     'E_189': 'I have had sexual intercourse with an HIV-positive partner in the past 12 months',
     'E_7': 'I have a poor diet',
     'E_24': 'I have had a diagnosis of anemia',
     'E_26': 'Some family members have been diagnosed with anemia',
     'E_113': 'I have chronic kidney failure',
     'E_160': 'I was born prematurely',
     'E_28': 'Some family members have been diagnosed with myasthenia gravis',
     'E_20': 'I have Rheumatoid Arthritis',
     'E_80': 'I have been diagnosed with depression',
     'E_40': 'I have been in contact with someone who has had whoooping cough',
     'E_0': 'I have recently had a viral infection',
     'E_1': 'I have recently been treated with an oral antibiotic for an ear infection',
     'E_34': 'I have an active cancer',
     'E_209': 'my vaccinations are up to date',
     'E_139': 'I have a known heart defect',
     'E_19': 'I have been diagnosed with hyperthyroidism',
     'E_31': 'I have severe Chronic Obstructive Pulmonary Disease',
     'E_22': 'I have a known issue with one of my heart valves',
     'E_60': 'I regularly consume energy drinks',
     'E_35': 'I regularly drink coffee or tea',
     'E_213': 'I have recently taken decongestants or substances that may have stimulant effects',
     'E_12': 'I have a known severe food allergy',
     'E_44': 'I take corticosteroids',
     'E_4': 'Some family members have had croup',
     'E_81': 'I suffer from chronic anxiety',
     'E_109': 'I have had deep vein thrombosis',
     'E_110': 'I have been unable to move or get up for more than 3 consecutive days within the last 4 weeks',
     'E_17': 'I am of Asian descent',
     'E_18': 'I have cystic fibrosis',
     'E_118': 'I have had pneumonia',
     'E_123': 'I have a chronic obstructive pulmonary disease',
     'E_207': 'I live in in a big city',
     'E_86': 'Some family members suffer from allergies, hay fever or eczema',
     'E_87': 'Some family members have asthma',
     'E_141': 'I have my first menstrual period before the age of 12',
     'E_11': 'I have breastfed one of my children for more than 9 months',
     'E_186': 'I have been diagnosed with obstructive sleep apnea',
     'E_106': 'I have heart failure',
     'E_158': 'I was diagnosed with a hormonal dysfunction',
     'E_187': 'I ate tuna and Swiss cheese before the reaction occurred',
     'E_6': 'I have chronic pancreatitis',
     'E_125': 'I have been diagnosed with gastroesophageal reflux',
     'E_21': 'I have had a spontaneous pneumothorax',
     'E_222': 'I am exposed to secondhand cigarette smoke on a daily basis',
     'E_15': 'I have taken some antipsychotic medications within the last 7 days',
     'E_147': 'I have been treated in hospital recently for nausea, agitation, intoxication or aggressive behavior and received medication via an intravenous or intramuscular route',
     'E_137': 'I have had surgery to remove lymph nodes',
     'E_149': 'I take a calcium channel blockers as medications',
     'E_197': 'I have a known kidney problem resulting in an inability to retain proteins',
     'E_3': 'I have had a pericarditis',
     'E_29': 'Some members of my immediate family have a psychiatric illness',
     'E_225': 'I have close family members who had a cardiovascular disease problem before the age of 50',
     'E_73': 'I have been in contact with anyone infected with the Ebola virus in the last month',
     'E_138': 'I suffer from fibromyalgia',
     'E_101': 'I have been hospitalized for an asthma attack in the past year',
     'E_46': 'I have had more than one asthma attack in the past year',
     'E_119': 'I have been diagnosed with chronic sinusitis',
     'E_72': 'I have had one or several flare ups of chronic obstructive pulmonary disease',
     'E_198': 'I work in agriculture',
     'E_200': 'I work in the mining sector',
     'E_199': 'I work in construction',
     'E_121': 'I have a deviated nasal septum',
     'E_120': 'I have polyps in my nose',
     'E_142': 'My mother suffers from asthma',
     'E_195': 'I live in the suburbs',
     'E_183': 'I live in a rural area',
     'E_224': 'Some family members have had lung cancer',
     'E_223': 'Some family members have been diagnosed with pancreatic cancer',
     'E_5': 'I have had fluid in your lungs',
     'E_204': 'I have recently travelled to'
}

In [6]:
meta_data = {
    "fr": [fr_evidence_2_statements, "trav1", "N", "AmerN"],    
    "en": [en_evidence_2_statements, "E_204", "V_10", "V_4"]
}

In [7]:
def pprintPatient(
    df, idx, cond_data, symp_data, evidence_2_statements, 
    travel_evidence, travel_negative_answer, default_location, filename=None
):
    age = df.iloc[idx]['AGE']
    sex = df.iloc[idx]['SEX']
    patho = df.iloc[idx]['PATHOLOGY']
    evidences = ast.literal_eval(df.iloc[idx]['EVIDENCES'])
    ddx = ast.literal_eval(df.iloc[idx]['DIFFERENTIAL_DIAGNOSIS'])
    geo = [a for a in evidences if a.startswith(f"{travel_evidence}_@_")]
    geo = geo[0][len(f"{travel_evidence}_@_"):] if len(geo) > 0 else default_location
    geo = default_location if geo == travel_negative_answer else geo
    geo = symp_data[travel_evidence]["value_meaning"][geo]["en"]
    
    str_ddx = []
    for i in range(len(ddx)):
        ddx[i][0] = cond_data[ddx[i][0]]['cond-name-eng']
        res = f"{ddx[i][0]}: {ddx[i][1]:.3f}"
        str_ddx.append(res)
    
    symp, atcd = {}, {}
    for evi in evidences:
        if not ("_@_" in evi):
            if symp_data[evi]["is_antecedent"]:
                atcd[evi] = True
            else:
                symp[evi] = True
        else:
            evi, val = evi.split("_@_")
            if evi == travel_evidence:
                continue
            l = symp.get(evi, [])
            l.append(val)
            symp[evi] = l
            

    if filename is not None:
        f = open(filename, 'a')
    else:
        f = sys.stdout

    enKey = "cond-name-eng"
    eol = ""
    print(f"Sex: {sex}, Age: {age}{eol}", file=f)
    print(f"Geographical region: {geo}{eol}", file=f)
    print(f"Pathology: {cond_data[patho][enKey]}{eol}", file=f)
    print(f"Symptoms:{eol}", file=f)
    print(f"---------{eol}", file=f)
    alist = sorted(symp.keys())
    for evi in alist:
        if isinstance(symp[evi], bool):
            print(f"\t - {evidence_2_statements[evi]}.{eol}", file=f)
        elif len(symp[evi]) > 1:
            print(f"\t - {evidence_2_statements[evi]}:{eol}", file=f)
            for val in symp[evi]:
                vtext = symp_data[evi]["value_meaning"][val]["en"]
                print(f"\t\t » {vtext}{eol}", file=f)
        else:
            val = symp[evi][0]
            if isinstance(symp_data[evi]["default_value"], (int, float)):
                val = int(val)
                if val != symp_data[evi]["default_value"]:
                    print(f"\t - {evidence_2_statements[evi]} {val}{eol}", file=f)
            elif "N" == symp_data[evi]["default_value"] and evi != travel_evidence:
                if val != symp_data[evi]["default_value"]:
                    print(f"\t - {evidence_2_statements[evi]}{eol}", file=f)
            elif val != symp_data[evi]["default_value"]:
                vtext = symp_data[evi]["value_meaning"][val]["en"]
                if symp_data[evi]["data_type"] == "M":
                    sep = ":\n\t\t » "
                else:
                    sep = " "         
                print(f"\t - {evidence_2_statements[evi]}{sep}{vtext}{eol}", file=f)
    
    print(f"Antecedents:{eol}", file=f)
          
    print(f"-----------{eol}", file=f)
    alist = sorted(atcd.keys())
    for evi in alist:
        print(f"\t - {evidence_2_statements[evi]}.{eol}", file=f)
            
    print(f"Differential diagnosis:{eol}", file=f)
    print(f"----------------------{eol}", file=f)
    ddx_string = ", ".join(str_ddx)
    print(f"{ddx_string}{eol}", file=f)
    
    print("\n##########################################################\n", file=f)

In [8]:
def pprintInteraction(
    df, cond_data, symp_data, evidence_2_statements, 
    travel_evidence, travel_negative_answer, default_location,
    action, diff, idx=0, showDiff=False, filename=None
):
    
    evidences = ast.literal_eval(df.iloc[idx]['EVIDENCES'])
    ddx = ast.literal_eval(df.iloc[idx]['DIFFERENTIAL_DIAGNOSIS'])
    geo = [a for a in evidences if a.startswith(f"{travel_evidence}_@_")]
    geo = geo[0][len(f"{travel_evidence}_@_"):] if len(geo) > 0 else default_location
    geo = default_location if geo == travel_negative_answer else geo
    geo = symp_data[travel_evidence]["value_meaning"][geo]["en"]
    age = df.iloc[idx]['AGE']
    sex = df.iloc[idx]['SEX']
    
    str_ddx = []
    for i in range(len(ddx)):
        ddx[i][0] = cond_data[ddx[i][0]]['cond-name-eng']
        res = f"{ddx[i][0]}: {ddx[i][1]:.3f}"
        str_ddx.append(res)
    
    symp, atcd = {}, {}
    for evi in evidences:
        if not ("_@_" in evi):
            if symp_data[evi]["is_antecedent"]:
                atcd[evi] = True
            else:
                symp[evi] = True
        else:
            evi, val = evi.split("_@_")
            if evi == travel_evidence:
                atcd[evi] = [val]
                continue
            l = symp.get(evi, [])
            l.append(val)
            symp[evi] = l
    
    dfa = pd.read_csv(action)
    df_diff = pd.read_csv(diff)
    
    actions = dfa.iloc[idx].tolist()
    all_ddx = df_diff.iloc[idx].tolist()
    
    index = sum([a!='None' for a in actions])
    all_ddx = [ast.literal_eval(all_ddx[i]) for i in range(index)]
    for i in range(len(all_ddx)):
        all_ddx[i] = [cond_data[a]['cond-name-eng'] for a in all_ddx[i]]
    ddx = all_ddx[index - 1]
    
    
    if filename is not None:
        f = open(filename, 'a')
    else:
        f = sys.stdout
    
    print(f"Sex: {sex}, Age: {age}", file=f)
    # print(f"Geographical region: {geo}", file=f)    
    for i in range(index):
        if i == 0:
            print(f"Initial evidence: \n\t {evidence_2_statements[actions[i]]}", file=f) 
        else:
            print(f"*Turn {i}:", file=f) 
            a = actions[i]
            q = symp_data[a]["question_en"]
            print(f"  Q: {q}", file=f)
            
            if (not (a in symp)) and (not (a in atcd)):
                o = "N" if symp_data[a]["data_type"] == "B" else (
                    symp_data[a]["default_value"] if isinstance(symp_data[a]["default_value"], (int, float))
                    else symp_data[a]["value_meaning"][symp_data[a]["default_value"]]["en"]
                )
                print(f"  A: {o}", file=f)
                 
            elif a in symp:
                if isinstance(symp[a], bool):
                    print("  A: Y", file=f)
                else:
                    print("  A:", file=f)
                    for o in symp[a]:
                        v = o if isinstance(symp_data[a]["default_value"], (int, float)) else symp_data[a]["value_meaning"][o]["en"]
                        print(f"\t * {v}", file=f)
            else:
                if isinstance(atcd[a], bool):
                    print("  A: Y", file=f)
                else:
                    for o in atcd[a]:
                        v = symp_data[a]["value_meaning"][o]["en"]
                        print(f"  A: {v}", file=f)
        if showDiff:
            ddxstr = ", ".join(all_ddx[i])
            if i==0:
                print(f"Differential diagnosis: \n\t {ddxstr}\n", file=f)
            else:
                print(f"  Differential diagnosis: \n\t{ddxstr}\n", file=f)
            
        if i == 0:             
            print("Agent inquiries:", file=f)
            print("---------------", file=f)
    
    if not showDiff:
        print("Predicted differential:", file=f)
        print("----------------------", file=f)
        ddxstr = ", ".join(ddx)
        print(ddxstr, file=f)
    print("Ground-truth differential:", file=f)
    print("-------------------------", file=f)
    ddx_string = ", ".join(str_ddx)
    print(f"{ddx_string}", file=f)
    
    print("\n##########################################################\n", file=f)

# Printing Patients

In [10]:
_, _, cond_data = load_json_data("../data/release_conditions.json", "condition_name")
_, _, symp_data = load_json_data("../data/release_evidences.json", "name")

In [11]:
filename = "../data/release_test_patients.zip"
df = pd.read_csv(filename)

In [12]:
df["DDX_LEN"] = df["DIFFERENTIAL_DIAGNOSIS"].apply(lambda x: len(ast.literal_eval(x)))

In [13]:
ddx_lt10_indices = df[df['DDX_LEN']<=10].index.tolist()
ddx_in10_20_indices = df[(df['DDX_LEN'] > 10) & (df['DDX_LEN'] <= 20)].index.tolist()
ddx_gt20_indices = df[df['DDX_LEN']>20].index.tolist()

In [14]:
# for the 20 patients
# - 10 within ddx indices with a number of entries between 1 and 10
# - 6 within ddx indices with a number of entries between 11 and 20
# - 4 within ddx indices with a number of entries greater than 20

# seed format: ddmmYYYY
seed = 22082022

np.random.seed(seed)

sub1 = np.random.choice(ddx_lt10_indices, size=10, replace=False).tolist()
sub2 = np.random.choice(ddx_in10_20_indices, size=6, replace=False).tolist()
sub3 = np.random.choice(ddx_gt20_indices, size=4, replace=False).tolist()

all_20_patient_indices = sub1 + sub2 + sub3

In [15]:
suffix = "Test"

In [16]:
ds_version="en"

In [17]:
for idx in all_20_patient_indices:
    pprintPatient(df, idx, cond_data, symp_data, *meta_data[ds_version], f"all_20_patients{suffix}.txt")

In [18]:
df2 = df.filter(items = all_20_patient_indices, axis=0).reset_index()

In [19]:
df2.to_csv(f"patientSample20{suffix}.csv", sep=",", index=False)

In [20]:
df2 = pd.read_csv(f"patientSample20{suffix}.csv")

In [21]:
for idx in range(len(df2)):
    pprintPatient(df2, idx, cond_data, symp_data, *meta_data[ds_version])

Sex: F, Age: 55
Geographical region: North America
Pathology: URTI
Symptoms:
---------
	 - I have nasal congestion.
	 - I am coughing.
	 - I have had significantly increased sweating.
	 - I feel pain.
	 - The pain is:
		 » sensitive
		 » heavy
	 - The pain locations are:
		 » top of the head
		 » forehead
		 » temple(R)
	 - On a scale of 0-10, the pain intensity is 7
	 - On a scale of 0-10, the pain's location precision is 6
	 - On a scale of 0-10, the pace at which the pain appear is 3
	 - My cough produces colored or more abundant sputum than usual.
	 - I have fever.
	 - I have a sore throat.
Antecedents:
-----------
	 - I have been in contact with a person with similar symptoms in the past 2 weeks.
	 - I live with 4 or more people.
	 - I work in a daycare.
Differential diagnosis:
----------------------
URTI: 0.171, Bronchitis: 0.155, Influenza: 0.139, Pneumonia: 0.139, Bronchiectasis: 0.104, Tuberculosis: 0.095, HIV (initial infection): 0.061, Chronic rhinosinusitis: 0.057, Chagas: 

# Printing Interactions

Let assume that you run a model that you trained on the patients in df2 using the provided run_eval.sh script. In addition to the metric file, the script will generate two other files:
  * a csv file containing the differential predicted for each patient at each turn
  * a csv file contating the actions (i.e., questions) asked by the agent at each turn.

Let **diff_filepath** and  **action_filepath** be the filepaths to these two files, then the command below will print the interaction of first patient in the df (i.e., patient at index 0):

In [23]:
# action_filepath = "[path to the generated csv action file]"
# diff_filepath = "[path to the generated csv differential file]"
pprintInteraction(df2, cond_data, symp_data, *meta_data[ds_version], action_filepath, diff_filepath, 0, True)

Sex: F, Age: 55
Initial evidence: 
	 I have nasal congestion
Differential diagnosis: 
	 Bronchitis, Allergic sinusitis, Viral pharyngitis, Pneumonia, URTI, Bronchiectasis, Acute COPD exacerbation / infection, Chronic rhinosinusitis, Cluster headache, Acute rhinosinusitis, Tuberculosis, Acute otitis media, Unstable angina, Anemia, Bronchospasm / acute asthma exacerbation, Croup, Possible NSTEMI / STEMI, Pancreatic neoplasm, Pulmonary neoplasm, Spontaneous rib fracture, Influenza, Chagas, Pulmonary embolism, Stable angina, Atrial fibrillation, Epiglottitis, Anaphylaxis, Acute dystonic reactions, Boerhaave, Guillain-Barré syndrome, Sarcoidosis

Agent inquiries:
---------------
*Turn 1:
  Q: Do you have any lesions, redness or problems on your skin that you believe are related to the condition you are consulting for?
  A: N
  Differential diagnosis: 
	Allergic sinusitis, URTI, Pulmonary embolism, Chagas, Influenza, Anemia, Unstable angina, Cluster headache, Larygospasm, Pulmonary neoplasm,

To generate the interaction of all the patients in df2, simply iterate over them as follows, to save the interactions in an ouput fileL

In [10]:
# action_filepath = "[path to the generated csv action file]"
# diff_filepath = "[path to the generated csv differential file]"
# output_filepath = "[path output file]"
for idx in range(len(df2)):
    pprintInteraction(df2, cond_data, symp_data, *meta_data[ds_version], action_filepath, diff_filepath, idx, False, output_filepath)

Or to print it on the console

In [11]:
# action_filepath = "[path to the generated csv action file]"
# diff_filepath = "[path to the generated csv differential file]"
for idx in range(len(df2)):
    pprintInteraction(df2, cond_data, symp_data, *meta_data[ds_version], action, diff, idx, False)

Sex: F, Age: 55
Initial evidence: 
	 I have nasal congestion
Agent inquiries:
---------------
*Turn 1:
  Q: Do you have any lesions, redness or problems on your skin that you believe are related to the condition you are consulting for?
  A: N
*Turn 2:
  Q: Do you have pain somewhere, related to your reason for consulting?
  A: Y
*Turn 3:
  Q: Does the pain radiate to another location?
  A:
	 * nowhere
*Turn 4:
  Q: Do you feel pain somewhere?
  A:
	 * top of the head
	 * forehead
	 * temple(R)
*Turn 5:
  Q: Characterize your pain:
  A:
	 * sensitive
	 * heavy
*Turn 6:
  Q: How precisely is the pain located?
  A:
	 * 6
*Turn 7:
  Q: How intense is the pain?
  A:
	 * 7
*Turn 8:
  Q: How fast did the pain appear?
  A:
	 * 3
*Turn 9:
  Q: Do you live with 4 or more people?
  A: Y
*Turn 10:
  Q: Do you have a sore throat?
  A: Y
*Turn 11:
  Q: Do you have a cough that produces colored or more abundant sputum than usual?
  A: Y
*Turn 12:
  Q: Are you exposed to secondhand cigarette smoke on 

Sex: F, Age: 39
Initial evidence: 
	 I have fever
Agent inquiries:
---------------
*Turn 1:
  Q: Do you have pain somewhere, related to your reason for consulting?
  A: Y
*Turn 2:
  Q: Does the pain radiate to another location?
  A:
	 * nowhere
*Turn 3:
  Q: Do you have any lesions, redness or problems on your skin that you believe are related to the condition you are consulting for?
  A: N
*Turn 4:
  Q: Do you feel pain somewhere?
  A:
	 * lower chest
	 * breast(L)
	 * posterior chest wall(L)
*Turn 5:
  Q: Characterize your pain:
  A:
	 * burning
*Turn 6:
  Q: How precisely is the pain located?
  A:
	 * 3
*Turn 7:
  Q: How intense is the pain?
  A:
	 * 2
*Turn 8:
  Q: Are you experiencing shortness of breath or difficulty breathing in a significant way?
  A: N
*Turn 9:
  Q: How fast did the pain appear?
  A:
	 * 5
*Turn 10:
  Q: Do you have a cough?
  A: Y
*Turn 11:
  Q: Do you have a chronic obstructive pulmonary disease (COPD)?
  A: N
*Turn 12:
  Q: Do you have a cough that produces