In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import json
from collections import Counter
import ast
import networkx as nx
import re
from constants import base_path

In [2]:
with open(f"{base_path}\\input\\release_conditions.json") as f:
  disease_dict = json.load(f)
disease_list = list(disease_dict.keys())

In [3]:
def get_pred_miss_dict(error_df):
    pred_miss_dict = {}
    for disease in disease_list:
        miss_list = []
        for idx, row in error_df[error_df["PATHOLOGY"]==disease].iterrows():
            miss_list.extend(ast.literal_eval(row["predicted_diagnosis"]))
        miss_list = [i for i in miss_list if i!=disease]
        if miss_list:
            count = dict(Counter(miss_list))
            pred_miss_dict[disease]={i: round(count[i] / len(miss_list), 3) for i in count}
            # pred_miss_dict[disease]=dict(Counter(miss_list))
    return pred_miss_dict

In [4]:
def get_pred_miss_df(pred_miss_dict):
    pred_miss_df = pd.DataFrame({"disease": pred_miss_dict.keys()})
    for disease in disease_list:
        pred_miss_df[disease]=[pred_miss_dict[i].get(disease, 0) for i in pred_miss_dict.keys()]
    pred_miss_df.set_index('disease', inplace=True)
    return pred_miss_df

In [5]:
def get_pred_miss_graph(pred_miss_df):
    pred_miss_graph = pred_miss_df.stack()
    pred_miss_graph = pred_miss_graph.rename_axis(('Actual', 'Prediction Miss')).reset_index(name='weight')
    pred_miss_graph = pred_miss_graph[pred_miss_graph["weight"]>0]
    pred_miss_graph = pred_miss_graph.sort_values(['Actual','weight'], ascending=False)
    return pred_miss_graph

## Random Forest

In [6]:
error_df = pd.read_csv(f"{base_path}\\output\\error_analysis_questionnaire\\validation_df_all_patients_questionnaire.csv")
error_df = error_df[error_df["is_matched"]==False]

In [7]:
error_df = error_df[["PATHOLOGY", "predicted_diagnosis"]]
error_df

Unnamed: 0,PATHOLOGY,predicted_diagnosis
5,Bronchospasm / acute asthma exacerbation,"['Bronchiectasis', 'Tuberculosis', 'Bronchospa..."
7,Acute otitis media,['GERD']
12,SLE,"['Inguinal hernia', 'SLE']"
22,Acute rhinosinusitis,['Chronic rhinosinusitis']
23,Acute otitis media,['Croup']
...,...,...
132411,Viral pharyngitis,['Chronic rhinosinusitis']
132417,Chronic rhinosinusitis,"['Acute rhinosinusitis', 'Chronic rhinosinusit..."
132427,Viral pharyngitis,['Acute laryngitis']
132443,Viral pharyngitis,['Acute otitis media']


In [8]:
error_df["PATHOLOGY"].value_counts().sort_values().plot.barh(figsize=(6, 8))
plt.title("Prediction Error Frequency")
plt.xlabel("Count")
plt.savefig(f'{base_path}\\output\\error_analysis_questionnaire\\pred_miss_freq_questionnaire.jpg', bbox_inches='tight')
plt.clf()

<Figure size 600x800 with 0 Axes>

In [9]:
pred_miss_dict = get_pred_miss_dict(error_df)
pred_miss_dict

{'Spontaneous pneumothorax': {'Unstable angina': 0.075,
  'Pericarditis': 0.776,
  'Stable angina': 0.092,
  'Pulmonary embolism': 0.057},
 'Cluster headache': {'Acute otitis media': 0.688,
  'Viral pharyngitis': 0.263,
  'Possible NSTEMI / STEMI': 0.049},
 'Boerhaave': {'Possible NSTEMI / STEMI': 0.281,
  'Stable angina': 0.305,
  'Pericarditis': 0.293,
  'Cluster headache': 0.03,
  'Acute otitis media': 0.01,
  'Myocarditis': 0.06,
  'Viral pharyngitis': 0.01,
  'Unstable angina': 0.002,
  'SLE': 0.008},
 'GERD': {'Acute otitis media': 0.368,
  'Acute laryngitis': 0.182,
  'Viral pharyngitis': 0.291,
  'Cluster headache': 0.128,
  'Possible NSTEMI / STEMI': 0.006,
  'Pericarditis': 0.014,
  'Tuberculosis': 0.003,
  'Bronchitis': 0.006,
  'Boerhaave': 0.001,
  'Anemia': 0.001},
 'HIV (initial infection)': {'Influenza': 1.0},
 'Anemia': {'Possible NSTEMI / STEMI': 0.229,
  'Stable angina': 0.251,
  'Pericarditis': 0.151,
  'Acute otitis media': 0.028,
  'SLE': 0.05,
  'Cluster headache

In [10]:
pred_miss_df = get_pred_miss_df(pred_miss_dict)
pred_miss_df

Unnamed: 0_level_0,Spontaneous pneumothorax,Cluster headache,Boerhaave,Spontaneous rib fracture,GERD,HIV (initial infection),Anemia,Viral pharyngitis,Inguinal hernia,Myasthenia gravis,...,Pneumonia,Acute rhinosinusitis,Chronic rhinosinusitis,Bronchiolitis,Pulmonary neoplasm,Possible NSTEMI / STEMI,Sarcoidosis,Pancreatic neoplasm,Acute pulmonary edema,Pericarditis
disease,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Spontaneous pneumothorax,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.776
Cluster headache,0.0,0.0,0.0,0,0.0,0.0,0.0,0.263,0.0,0,...,0,0.0,0.0,0,0.0,0.049,0,0.0,0.0,0.0
Boerhaave,0.0,0.03,0.0,0,0.0,0.0,0.0,0.01,0.0,0,...,0,0.0,0.0,0,0.0,0.281,0,0.0,0.0,0.293
GERD,0.0,0.128,0.001,0,0.0,0.0,0.001,0.291,0.0,0,...,0,0.0,0.0,0,0.0,0.006,0,0.0,0.0,0.014
HIV (initial infection),0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0
Anemia,0.0,0.106,0.0,0,0.0,0.0,0.0,0.011,0.0,0,...,0,0.0,0.0,0,0.0,0.229,0,0.0,0.0,0.151
Viral pharyngitis,0.0,0.213,0.0,0,0.01,0.0,0.0,0.0,0.0,0,...,0,0.09,0.072,0,0.0,0.001,0,0.0,0.0,0.0
Inguinal hernia,0.0,0.226,0.0,0,0.0,0.0,0.0,0.024,0.0,0,...,0,0.0,0.0,0,0.0,0.0,0,0.085,0.0,0.0
Myasthenia gravis,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0,0.111,0.0,0,0.0,0.0,0.0
Anaphylaxis,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0


In [11]:
pred_miss_graph = get_pred_miss_graph(pred_miss_df)
pred_miss_graph

Unnamed: 0,Actual,Prediction Miss,weight
325,Viral pharyngitis,Acute otitis media,0.490
295,Viral pharyngitis,Cluster headache,0.213
308,Viral pharyngitis,Acute laryngitis,0.116
334,Viral pharyngitis,Acute rhinosinusitis,0.090
335,Viral pharyngitis,Chronic rhinosinusitis,0.072
...,...,...,...
974,Acute dystonic reactions,Pulmonary neoplasm,0.284
948,Acute dystonic reactions,Atrial fibrillation,0.242
946,Acute dystonic reactions,Croup,0.105
1503,Acute COPD exacerbation / infection,Bronchospasm / acute asthma exacerbation,0.737


In [12]:
pred_miss_graph.to_csv(f"{base_path}\\output\\error_analysis_questionnaire\\pred_miss_weigths.csv", index=False)

In [13]:
G = nx.from_pandas_edgelist(pred_miss_graph, 'Actual', 'Prediction Miss', edge_attr='weight', create_using=nx.DiGraph())
pos = nx.spring_layout(G, seed=0)
print(nx.info(G))

DiGraph with 44 nodes and 238 edges


In [14]:
fig = plt.figure(figsize=(15, 8))
weights = [G[u][v]['weight'] for u,v in G.edges()]
nx.draw(G, pos, edge_color=weights, edge_cmap=plt.cm.Blues, with_labels=True, arrowsize=20)
plt.title("Prediction Miss", fontsize=20)
plt.tight_layout()
plt.savefig(f'{base_path}\\output\\error_analysis_questionnaire\\pred_miss_network.jpg', bbox_inches='tight')
plt.clf()

  plt.tight_layout()


<Figure size 1500x800 with 0 Axes>

In [15]:
for disease in pred_miss_dict:
    G = nx.from_pandas_edgelist(pred_miss_graph[(pred_miss_graph["Actual"]==disease) | (pred_miss_graph["Prediction Miss"]==disease) ], 'Actual', 'Prediction Miss', edge_attr='weight', create_using=nx.DiGraph())
    pos = nx.spring_layout(G, seed=0)
    fig = plt.figure(figsize=(15, 8))
    weights = [G[u][v]['weight'] for u,v in G.edges()]
    nx.draw(G, pos, edge_color=weights, edge_cmap=plt.cm.Blues, with_labels=True, arrowsize=20)
    plt.title(f"Prediction Miss - {disease}", fontsize=20)
    plt.tight_layout()
    img_filename = re.sub('[^a-zA-Z0-9 \n\.]', '', disease).replace(" ", "_")
    plt.savefig(f'{base_path}\\output\\error_analysis_questionnaire\\pred_miss_{img_filename}.jpg', bbox_inches='tight')
    plt.clf()

  plt.tight_layout()
  fig = plt.figure(figsize=(15, 8))


<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

## Logistic Regression

In [16]:
error_df = pd.read_csv(f"{base_path}\\output\\error_analysis_questionnaire\\validation_logreg_df_all_patients_questionnaire.csv")
error_df = error_df[error_df["is_matched"]==False]

In [17]:
error_df = error_df[["PATHOLOGY", "predicted_diagnosis"]]
error_df

Unnamed: 0,PATHOLOGY,predicted_diagnosis
7,Acute otitis media,['Viral pharyngitis']
22,Acute rhinosinusitis,['Chronic rhinosinusitis']
23,Acute otitis media,['Allergic sinusitis']
29,URTI,['Viral pharyngitis']
51,Acute dystonic reactions,['Bronchospasm / acute asthma exacerbation']
...,...,...
132399,Tuberculosis,['Bronchiectasis']
132400,Acute otitis media,['Viral pharyngitis']
132404,GERD,['Viral pharyngitis']
132427,Viral pharyngitis,['Acute laryngitis']


In [18]:
pred_miss_df = get_pred_miss_df(pred_miss_dict)
pred_miss_df

Unnamed: 0_level_0,Spontaneous pneumothorax,Cluster headache,Boerhaave,Spontaneous rib fracture,GERD,HIV (initial infection),Anemia,Viral pharyngitis,Inguinal hernia,Myasthenia gravis,...,Pneumonia,Acute rhinosinusitis,Chronic rhinosinusitis,Bronchiolitis,Pulmonary neoplasm,Possible NSTEMI / STEMI,Sarcoidosis,Pancreatic neoplasm,Acute pulmonary edema,Pericarditis
disease,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Spontaneous pneumothorax,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.776
Cluster headache,0.0,0.0,0.0,0,0.0,0.0,0.0,0.263,0.0,0,...,0,0.0,0.0,0,0.0,0.049,0,0.0,0.0,0.0
Boerhaave,0.0,0.03,0.0,0,0.0,0.0,0.0,0.01,0.0,0,...,0,0.0,0.0,0,0.0,0.281,0,0.0,0.0,0.293
GERD,0.0,0.128,0.001,0,0.0,0.0,0.001,0.291,0.0,0,...,0,0.0,0.0,0,0.0,0.006,0,0.0,0.0,0.014
HIV (initial infection),0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0
Anemia,0.0,0.106,0.0,0,0.0,0.0,0.0,0.011,0.0,0,...,0,0.0,0.0,0,0.0,0.229,0,0.0,0.0,0.151
Viral pharyngitis,0.0,0.213,0.0,0,0.01,0.0,0.0,0.0,0.0,0,...,0,0.09,0.072,0,0.0,0.001,0,0.0,0.0,0.0
Inguinal hernia,0.0,0.226,0.0,0,0.0,0.0,0.0,0.024,0.0,0,...,0,0.0,0.0,0,0.0,0.0,0,0.085,0.0,0.0
Myasthenia gravis,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0,0.111,0.0,0,0.0,0.0,0.0
Anaphylaxis,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0


In [19]:
error_df["PATHOLOGY"].value_counts().sort_values().plot.barh(figsize=(6, 8))
plt.title("Prediction Error Frequency")
plt.xlabel("Count")
plt.savefig(f'{base_path}\\output\\error_analysis_questionnaire\\pred_miss_freq_logreg_questionnaire.jpg', bbox_inches='tight')
plt.clf()

<Figure size 600x800 with 0 Axes>

In [20]:
pred_miss_dict = get_pred_miss_dict(error_df)
pred_miss_dict

{'Spontaneous pneumothorax': {'Pericarditis': 0.899, 'Stable angina': 0.101},
 'Boerhaave': {'Possible NSTEMI / STEMI': 0.152,
  'Stable angina': 0.792,
  'Cluster headache': 0.055},
 'GERD': {'Viral pharyngitis': 0.768,
  'Cluster headache': 0.206,
  'Pericarditis': 0.017,
  'Tuberculosis': 0.003,
  'Boerhaave': 0.003,
  'Anemia': 0.002},
 'HIV (initial infection)': {'Pancreatic neoplasm': 0.294, 'Influenza': 0.706},
 'Anemia': {'Stable angina': 0.767,
  'Cluster headache': 0.207,
  'Bronchospasm / acute asthma exacerbation': 0.017,
  'PSVT': 0.009},
 'Viral pharyngitis': {'Acute otitis media': 0.062,
  'Cluster headache': 0.646,
  'Acute laryngitis': 0.293},
 'Inguinal hernia': {'SLE': 0.725, 'Cluster headache': 0.275},
 'Myasthenia gravis': {'Bronchospasm / acute asthma exacerbation': 0.333,
  'Guillain-Barré syndrome': 0.667},
 'Anaphylaxis': {'SLE': 1.0},
 'Epiglottitis': {'Acute laryngitis': 0.317,
  'Stable angina': 0.545,
  'Bronchitis': 0.092,
  'Acute otitis media': 0.016,
  

In [21]:
pred_miss_df = get_pred_miss_df(pred_miss_dict)
pred_miss_df

Unnamed: 0_level_0,Spontaneous pneumothorax,Cluster headache,Boerhaave,Spontaneous rib fracture,GERD,HIV (initial infection),Anemia,Viral pharyngitis,Inguinal hernia,Myasthenia gravis,...,Pneumonia,Acute rhinosinusitis,Chronic rhinosinusitis,Bronchiolitis,Pulmonary neoplasm,Possible NSTEMI / STEMI,Sarcoidosis,Pancreatic neoplasm,Acute pulmonary edema,Pericarditis
disease,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Spontaneous pneumothorax,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.899
Boerhaave,0.0,0.055,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.152,0,0.0,0,0.0
GERD,0.0,0.206,0.003,0,0,0.0,0.002,0.768,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.017
HIV (initial infection),0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.294,0,0.0
Anemia,0.0,0.207,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0
Viral pharyngitis,0.0,0.646,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0
Inguinal hernia,0.0,0.275,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0
Myasthenia gravis,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0
Anaphylaxis,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0
Epiglottitis,0.0,0.03,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0


In [22]:
pred_miss_graph = get_pred_miss_graph(pred_miss_df)
pred_miss_graph

Unnamed: 0,Actual,Prediction Miss,weight
246,Viral pharyngitis,Cluster headache,0.646
259,Viral pharyngitis,Acute laryngitis,0.293
276,Viral pharyngitis,Acute otitis media,0.062
1058,Unstable angina,Stable angina,0.418
1073,Unstable angina,Possible NSTEMI / STEMI,0.398
...,...,...,...
566,Acute laryngitis,Tuberculosis,0.008
915,Acute dystonic reactions,Bronchospasm / acute asthma exacerbation,0.987
900,Acute dystonic reactions,Bronchiectasis,0.013
1405,Acute COPD exacerbation / infection,Bronchospasm / acute asthma exacerbation,0.733


In [23]:
pred_miss_graph.to_csv(f"{base_path}\\output\\error_analysis_questionnaire\\pred_miss_weigths_logreg.csv", index=False)

In [24]:
G = nx.from_pandas_edgelist(pred_miss_graph, 'Actual', 'Prediction Miss', edge_attr='weight', create_using=nx.DiGraph())
pos = nx.spring_layout(G, seed=0)
print(nx.info(G))

DiGraph with 47 nodes and 134 edges


In [25]:
fig = plt.figure(figsize=(15, 8))
weights = [G[u][v]['weight'] for u,v in G.edges()]
nx.draw(G, pos, edge_color=weights, edge_cmap=plt.cm.Blues, with_labels=True, arrowsize=20)
plt.title("Prediction Miss", fontsize=20)
plt.tight_layout()
plt.savefig(f'{base_path}\\output\\error_analysis_questionnaire\\pred_miss_network_logreg.jpg', bbox_inches='tight')
plt.clf()

  plt.tight_layout()


<Figure size 1500x800 with 0 Axes>

In [26]:
for disease in pred_miss_dict:
    G = nx.from_pandas_edgelist(pred_miss_graph[(pred_miss_graph["Actual"]==disease) | (pred_miss_graph["Prediction Miss"]==disease) ], 'Actual', 'Prediction Miss', edge_attr='weight', create_using=nx.DiGraph())
    pos = nx.spring_layout(G, seed=0)
    fig = plt.figure(figsize=(15, 8))
    weights = [G[u][v]['weight'] for u,v in G.edges()]
    nx.draw(G, pos, edge_color=weights, edge_cmap=plt.cm.Blues, with_labels=True, arrowsize=20)
    plt.title(f"Prediction Miss - {disease}", fontsize=20)
    plt.tight_layout()
    img_filename = re.sub('[^a-zA-Z0-9 \n\.]', '', disease).replace(" ", "_")
    plt.savefig(f'{base_path}\\output\\error_analysis_questionnaire\\pred_miss_{img_filename}_logreg.jpg', bbox_inches='tight')
    plt.clf()

  plt.tight_layout()
  fig = plt.figure(figsize=(15, 8))


<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

## Decision Tree

In [27]:
error_df = pd.read_csv(f"{base_path}\\output\\error_analysis_questionnaire\\validation_dt_df_all_patients_questionnaire.csv")
error_df = error_df[error_df["is_matched"]==False]

In [28]:
error_df = error_df[["PATHOLOGY", "predicted_diagnosis"]]
error_df

Unnamed: 0,PATHOLOGY,predicted_diagnosis
5,Bronchospasm / acute asthma exacerbation,"['Bronchiectasis', 'Tuberculosis', 'Bronchospa..."
7,Acute otitis media,['GERD']
12,SLE,"['Inguinal hernia', 'SLE', 'Pancreatic neoplasm']"
19,Influenza,"['Influenza', 'Bronchiolitis']"
22,Acute rhinosinusitis,['Chronic rhinosinusitis']
...,...,...
132420,Possible NSTEMI / STEMI,"['Possible NSTEMI / STEMI', 'Pancreatic neopla..."
132427,Viral pharyngitis,['Acute laryngitis']
132440,Stable angina,"['Unstable angina', 'Stable angina']"
132443,Viral pharyngitis,['Acute otitis media']


In [29]:
pred_miss_df = get_pred_miss_df(pred_miss_dict)
pred_miss_df

Unnamed: 0_level_0,Spontaneous pneumothorax,Cluster headache,Boerhaave,Spontaneous rib fracture,GERD,HIV (initial infection),Anemia,Viral pharyngitis,Inguinal hernia,Myasthenia gravis,...,Pneumonia,Acute rhinosinusitis,Chronic rhinosinusitis,Bronchiolitis,Pulmonary neoplasm,Possible NSTEMI / STEMI,Sarcoidosis,Pancreatic neoplasm,Acute pulmonary edema,Pericarditis
disease,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Spontaneous pneumothorax,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.899
Boerhaave,0.0,0.055,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.152,0,0.0,0,0.0
GERD,0.0,0.206,0.003,0,0,0.0,0.002,0.768,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.017
HIV (initial infection),0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.294,0,0.0
Anemia,0.0,0.207,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0
Viral pharyngitis,0.0,0.646,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0
Inguinal hernia,0.0,0.275,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0
Myasthenia gravis,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0
Anaphylaxis,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0
Epiglottitis,0.0,0.03,0.0,0,0,0.0,0.0,0.0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0


In [30]:
error_df["PATHOLOGY"].value_counts().sort_values().plot.barh(figsize=(6, 8))
plt.title("Prediction Error Frequency")
plt.xlabel("Count")
plt.savefig(f'{base_path}\\output\\error_analysis_questionnaire\\pred_miss_freq_dt_questionnaire.jpg', bbox_inches='tight')
plt.clf()

<Figure size 600x800 with 0 Axes>

In [31]:
pred_miss_dict = get_pred_miss_dict(error_df)
pred_miss_dict

{'Spontaneous pneumothorax': {'Unstable angina': 0.181,
  'Pulmonary embolism': 0.308,
  'Pericarditis': 0.376,
  'Stable angina': 0.058,
  'Pulmonary neoplasm': 0.046,
  'Myocarditis': 0.004,
  'GERD': 0.01,
  'Boerhaave': 0.002,
  'Chronic rhinosinusitis': 0.012,
  'Viral pharyngitis': 0.002},
 'Cluster headache': {'Acute otitis media': 0.465,
  'Viral pharyngitis': 0.185,
  'Possible NSTEMI / STEMI': 0.214,
  'GERD': 0.027,
  'Acute laryngitis': 0.044,
  'Pericarditis': 0.008,
  'Chronic rhinosinusitis': 0.016,
  'Boerhaave': 0.013,
  'Stable angina': 0.026,
  'Pulmonary embolism': 0.002},
 'Boerhaave': {'SLE': 0.091,
  'Unstable angina': 0.033,
  'Possible NSTEMI / STEMI': 0.183,
  'Panic attack': 0.003,
  'Pericarditis': 0.158,
  'Pulmonary neoplasm': 0.028,
  'Stable angina': 0.162,
  'Pulmonary embolism': 0.085,
  'Spontaneous pneumothorax': 0.041,
  'Myocarditis': 0.165,
  'Anemia': 0.017,
  'Cluster headache': 0.012,
  'Acute otitis media': 0.006,
  'GERD': 0.008,
  'Sarcoidos

In [32]:
pred_miss_df = get_pred_miss_df(pred_miss_dict)
pred_miss_df

Unnamed: 0_level_0,Spontaneous pneumothorax,Cluster headache,Boerhaave,Spontaneous rib fracture,GERD,HIV (initial infection),Anemia,Viral pharyngitis,Inguinal hernia,Myasthenia gravis,...,Pneumonia,Acute rhinosinusitis,Chronic rhinosinusitis,Bronchiolitis,Pulmonary neoplasm,Possible NSTEMI / STEMI,Sarcoidosis,Pancreatic neoplasm,Acute pulmonary edema,Pericarditis
disease,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Spontaneous pneumothorax,0.0,0.0,0.002,0.0,0.01,0.0,0.0,0.002,0.0,0.0,...,0.0,0.0,0.012,0.0,0.046,0.0,0.0,0.0,0.0,0.376
Cluster headache,0.0,0.0,0.013,0.0,0.027,0.0,0.0,0.185,0.0,0.0,...,0.0,0.0,0.016,0.0,0.0,0.214,0.0,0.0,0.0,0.008
Boerhaave,0.041,0.012,0.0,0.0,0.008,0.0,0.017,0.004,0.0,0.0,...,0.0,0.0,0.0,0.0,0.028,0.183,0.004,0.0,0.0,0.158
Spontaneous rib fracture,0.0,0.0,0.167,0.0,0.333,0.0,0.0,0.0,0.167,0.0,...,0.0,0.0,0.0,0.0,0.167,0.0,0.0,0.0,0.0,0.0
GERD,0.0,0.079,0.018,0.009,0.0,0.0,0.009,0.211,0.0,0.0,...,0.0,0.0,0.001,0.0,0.0,0.035,0.0,0.0,0.0,0.013
HIV (initial infection),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333,0.0,0.0
Anemia,0.085,0.02,0.01,0.0,0.013,0.0,0.0,0.007,0.0,0.0,...,0.0,0.0,0.014,0.0,0.048,0.096,0.001,0.012,0.0,0.043
Viral pharyngitis,0.0,0.144,0.001,0.003,0.042,0.0,0.0,0.0,0.0,0.0,...,0.0,0.159,0.06,0.006,0.0,0.007,0.0,0.0,0.0,0.001
Inguinal hernia,0.0,0.114,0.044,0.021,0.018,0.0,0.003,0.026,0.0,0.0,...,0.0,0.0,0.01,0.0,0.0,0.052,0.0,0.233,0.0,0.008
Myasthenia gravis,0.0,0.0,0.011,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.106,0.0,0.0,0.0,0.0,0.0


In [33]:
pred_miss_graph = get_pred_miss_graph(pred_miss_df)
pred_miss_graph

Unnamed: 0,Actual,Prediction Miss,weight
526,Whooping cough,Pulmonary embolism,1.000
374,Viral pharyngitis,Acute otitis media,0.363
383,Viral pharyngitis,Acute rhinosinusitis,0.159
344,Viral pharyngitis,Cluster headache,0.144
357,Viral pharyngitis,Acute laryngitis,0.095
...,...,...,...
1733,Acute COPD exacerbation / infection,Bronchiectasis,0.260
1717,Acute COPD exacerbation / infection,Boerhaave,0.006
1749,Acute COPD exacerbation / infection,Bronchitis,0.006
1732,Acute COPD exacerbation / infection,Atrial fibrillation,0.004


In [34]:
pred_miss_graph.to_csv(f"{base_path}\\output\\error_analysis_questionnaire\\pred_miss_weigths_dt.csv", index=False)

In [35]:
G = nx.from_pandas_edgelist(pred_miss_graph, 'Actual', 'Prediction Miss', edge_attr='weight', create_using=nx.DiGraph())
pos = nx.spring_layout(G, seed=0)
print(nx.info(G))

DiGraph with 49 nodes and 593 edges


In [36]:
fig = plt.figure(figsize=(15, 8))
weights = [G[u][v]['weight'] for u,v in G.edges()]
nx.draw(G, pos, edge_color=weights, edge_cmap=plt.cm.Blues, with_labels=True, arrowsize=20)
plt.title("Prediction Miss", fontsize=20)
plt.tight_layout()
plt.savefig(f'{base_path}\\output\\error_analysis_questionnaire\\pred_miss_network_dt.jpg', bbox_inches='tight')
plt.clf()

  plt.tight_layout()


<Figure size 1500x800 with 0 Axes>