In [1]:
import pandas as pd
import json
import numpy as np
from sklearn.neighbors import NearestNeighbors
import pickle
from constants import n_questions, base_path
import warnings
warnings.filterwarnings("ignore")

In [2]:
with open(f"{base_path}\\input\\release_evidences.json") as f:
  evidences = json.load(f)
evidences_list_code = []
evidences_list = []
evidences_dict = {}
for e in evidences.keys():
  # only binary symptoms and no antecedents
  if (not evidences[e]["possible-values"]) and (not evidences[e]["is_antecedent"]):
    evidences_list_code.append(e)
    evidences_list.append(evidences[e]["question_en"])
    evidences_dict[e] = evidences[e]["question_en"]
# evidences_list

In [3]:
len(evidences_list)

96

In [4]:
with open(f"{base_path}\\input\\release_conditions.json") as f:
  disease_dict = json.load(f)
disease_list = list(disease_dict.keys())
# disease_list

In [5]:
def get_next_question(evidences, nbrs, feature_importance_df):
    centroid = np.array([feature_importance_df.loc[e].values for e in evidences]).mean(axis=0)
    _, indices = nbrs.kneighbors([centroid])
    ask_list = [evidences_list[i] for i in indices[0] if evidences_list[i] not in evidences]
    try:
        return ask_list[0]
    except:
        return ""

In [6]:
def get_ftr_importance_df(feature_importance_dict):
    feature_importance_df = pd.DataFrame()
    feature_importance_df["evidence"] = evidences_list
    for disease in feature_importance_dict:
        feature_importance_df[disease] = [feature_importance_dict[disease]["top10_relevant_symptoms"].get(evidence, 0) for evidence in evidences_list]
    feature_importance_df.set_index('evidence', inplace=True)
    return feature_importance_df

## Random Forest

In [7]:
with open(f"{base_path}\\output\\feature_importance.json") as f:
  feature_importance_dict = json.load(f)
# feature_importance_dict

In [8]:
feature_importance_df = get_ftr_importance_df(feature_importance_dict)
feature_importance_df

Unnamed: 0_level_0,Spontaneous pneumothorax,Cluster headache,Boerhaave,Spontaneous rib fracture,GERD,HIV (initial infection),Anemia,Viral pharyngitis,Inguinal hernia,Myasthenia gravis,...,Pneumonia,Acute rhinosinusitis,Chronic rhinosinusitis,Bronchiolitis,Pulmonary neoplasm,Possible NSTEMI / STEMI,Sarcoidosis,Pancreatic neoplasm,Acute pulmonary edema,Pericarditis
evidence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Do you have a fever (either felt or measured with a thermometer)?,0.025764,0.066418,0.027025,0.023705,0.027436,0.094198,0.000000,0.036879,0.027490,0.000000,...,0.025668,0.000000,0.058277,0.175665,0.031826,0.034001,0.033759,0.028436,0.000000,0.027511
"Do you have pain somewhere, related to your reason for consulting?",0.048024,0.075092,0.049367,0.031275,0.028815,0.000000,0.023572,0.086531,0.042174,0.129557,...,0.000000,0.045449,0.043964,0.137135,0.000000,0.045028,0.049111,0.020412,0.018093,0.050291
Did you lose consciousness?,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
"Do you have any lesions, redness or problems on your skin that you believe are related to the condition you are consulting for?",0.000000,0.029475,0.017294,0.014129,0.000000,0.156626,0.000000,0.034248,0.184359,0.000000,...,0.147003,0.000000,0.015698,0.013473,0.016532,0.033064,0.018412,0.093787,0.000000,0.000000
Is your skin much paler than usual?,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.135016,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Do you feel like you are detached from your own body or your surroundings?,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Do you feel like you are dying or were you afraid that you were about do die?,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Do you have greenish or yellowish nasal discharge?,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.307416,0.159449,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Have you lost your sense of smell?,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.230766,0.324863,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [9]:
nbrs = NearestNeighbors(n_neighbors=n_questions, metric='cosine').fit(feature_importance_df)
# save model
with open(f'{base_path}\\output\\questionnaire\\questionairre.pkl','wb') as f:
    pickle.dump(nbrs, f)

In [10]:
# URTI symptoms
# evidences = [
#     "Do you have a sore throat?",
#     "Do you have a cough?",
#     "Do you have a fever (either felt or measured with a thermometer)?"
# ]

# Anemia symptoms
evidences = [
    "Is your skin much paler than usual?",
    "Have you recently had stools that were black (like coal)?",
    "Do you constantly feel fatigued or do you have non-restful sleep?"
]

# URTI and Anemia - just to test mixed initial symptoms
# evidences = [
#     "Do you have a sore throat?",
#     "Do you have a cough?",
#     "Do you have a fever (either felt or measured with a thermometer)?",
#     "Is your skin much paler than usual?",
#     "Have you recently had stools that were black (like coal)?",
#     "Do you constantly feel fatigued or do you have non-restful sleep?"
# ]

initial_evidence_count = len(evidences)

# gets the next immediate evidence
get_next_question(evidences, nbrs, feature_importance_df)

'Have you noticed light red blood or blood clots in your stool?'

In [12]:
# sequentially, gets next immediate evidence, assuming you answered yes to all the previous questions
for i in range(initial_evidence_count, n_questions):
    next_question = get_next_question(evidences, nbrs, feature_importance_df)
    if next_question:
        print(next_question)
        evidences.append(next_question)
    else:
        break

Have you noticed light red blood or blood clots in your stool?
Do you have very abundant or very long menstruation periods?
Do you feel lightheaded and dizzy or do you feel like you are about to faint?
Do you feel slightly dizzy or lightheaded?
Do you feel so tired that you are unable to do your usual activities or are you stuck in your bed all day long?
Do you feel your heart is beating fast (racing), irregularly (missing a beat) or do you feel palpitations?
Do you have a cough?
Do you have pain somewhere, related to your reason for consulting?
Do you have a fever (either felt or measured with a thermometer)?
Are you experiencing shortness of breath or difficulty breathing in a significant way?
Do you have nasal congestion or a clear runny nose?
Have you had significantly increased sweating?
Have you been coughing up blood?
Have you noticed a wheezing sound when you exhale?
Do you have a cough that produces colored or more abundant sputum than usual?
Do you have any lesions, redness o

## Logistic Regression

In [13]:
with open(f"{base_path}\\output\\feature_importance_logreg.json") as f:
  feature_importance_dict = json.load(f)
# feature_importance_dict

In [14]:
feature_importance_df = get_ftr_importance_df(feature_importance_dict)
feature_importance_df

Unnamed: 0_level_0,Spontaneous pneumothorax,Cluster headache,Boerhaave,Spontaneous rib fracture,GERD,HIV (initial infection),Anemia,Viral pharyngitis,Inguinal hernia,Myasthenia gravis,...,Pneumonia,Acute rhinosinusitis,Chronic rhinosinusitis,Bronchiolitis,Pulmonary neoplasm,Possible NSTEMI / STEMI,Sarcoidosis,Pancreatic neoplasm,Acute pulmonary edema,Pericarditis
evidence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Do you have a fever (either felt or measured with a thermometer)?,0.000000,0.000000,0.00000,0.000000,0.000000,1.585639,0.000000,0.142418,0.000000,0.0,...,0.000000,0.242576,0.000000,0.777401,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
"Do you have pain somewhere, related to your reason for consulting?",1.650935,3.568194,1.36364,0.881306,1.371733,0.862920,2.121008,5.186258,1.910237,0.0,...,1.026723,2.024601,2.033464,0.000000,1.195002,2.634012,1.386526,1.375371,0.217331,3.17952
Did you lose consciousness?,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
"Do you have any lesions, redness or problems on your skin that you believe are related to the condition you are consulting for?",0.000000,0.000000,0.00000,0.000000,0.000000,4.273725,0.000000,0.000000,3.133899,0.0,...,3.567464,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.973815,0.000000,0.00000
Is your skin much paler than usual?,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,3.052768,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Do you feel like you are detached from your own body or your surroundings?,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
Do you feel like you are dying or were you afraid that you were about do die?,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
Do you have greenish or yellowish nasal discharge?,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,1.856938,1.590656,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
Have you lost your sense of smell?,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.956432,2.054624,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000


In [15]:
nbrs = NearestNeighbors(n_neighbors=n_questions, metric='cosine').fit(feature_importance_df)
# save model
with open(f'{base_path}\\output\\questionnaire\\questionnaire_logreg.pkl','wb') as f:
    pickle.dump(nbrs, f)

In [16]:
# URTI symptoms
# evidences = [
#     "Do you have a sore throat?",
#     "Do you have a cough?",
#     "Do you have a fever (either felt or measured with a thermometer)?"
# ]

# Anemia symptoms
evidences = [
    "Is your skin much paler than usual?",
    "Have you recently had stools that were black (like coal)?",
    "Do you constantly feel fatigued or do you have non-restful sleep?"
]

# URTI and Anemia - just to test mixed initial symptoms
# evidences = [
#     "Do you have a sore throat?",
#     "Do you have a cough?",
#     "Do you have a fever (either felt or measured with a thermometer)?",
#     "Is your skin much paler than usual?",
#     "Have you recently had stools that were black (like coal)?",
#     "Do you constantly feel fatigued or do you have non-restful sleep?"
# ]

initial_evidence_count = len(evidences)

# gets the next immediate evidence
get_next_question(evidences, nbrs, feature_importance_df)

'Have you noticed light red blood or blood clots in your stool?'

In [17]:
# sequentially, gets next immediate evidence, assuming you answered yes to all the previous questions
for i in range(initial_evidence_count, n_questions):
    next_question = get_next_question(evidences, nbrs, feature_importance_df)
    if next_question:
        print(next_question)
        evidences.append(next_question)
    else:
        break

Have you noticed light red blood or blood clots in your stool?
Do you have very abundant or very long menstruation periods?
Do you feel slightly dizzy or lightheaded?
Do you feel so tired that you are unable to do your usual activities or are you stuck in your bed all day long?
Do you feel lightheaded and dizzy or do you feel like you are about to faint?
Do you have pain somewhere, related to your reason for consulting?
Are you experiencing shortness of breath or difficulty breathing in a significant way?
Have you been coughing up blood?
Do you have a cough?
Do you have a fever (either felt or measured with a thermometer)?
Have you had an involuntary weight loss over the last 3 months?
Have you recently had a loss of appetite or do you get full more quickly then usually?
Do you have any lesions, redness or problems on your skin that you believe are related to the condition you are consulting for?
Have you had chills or shivers?
Are you feeling nauseous or do you feel like vomiting?
Hav