In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import json
from collections import Counter
import ast
import networkx as nx
import re
from constants import base_path

In [2]:
with open(f"{base_path}\\input\\release_conditions.json") as f:
  disease_dict = json.load(f)
disease_list = list(disease_dict.keys())

In [3]:
def get_pred_miss_dict(error_df):
    pred_miss_dict = {}
    for disease in disease_list:
        miss_list = []
        for idx, row in error_df[error_df["PATHOLOGY"]==disease].iterrows():
            miss_list.extend(ast.literal_eval(row["predicted_diagnosis"]))
        miss_list = [i for i in miss_list if i!=disease]
        if miss_list:
            count = dict(Counter(miss_list))
            pred_miss_dict[disease]={i: round(count[i] / len(miss_list), 3) for i in count}
            # pred_miss_dict[disease]=dict(Counter(miss_list))
    return pred_miss_dict

In [4]:
def get_pred_miss_df(pred_miss_dict):
    pred_miss_df = pd.DataFrame({"disease": pred_miss_dict.keys()})
    for disease in disease_list:
        pred_miss_df[disease]=[pred_miss_dict[i].get(disease, 0) for i in pred_miss_dict.keys()]
    pred_miss_df.set_index('disease', inplace=True)
    return pred_miss_df

In [5]:
def get_pred_miss_graph(pred_miss_df):
    pred_miss_graph = pred_miss_df.stack()
    pred_miss_graph = pred_miss_graph.rename_axis(('Actual', 'Prediction Miss')).reset_index(name='weight')
    pred_miss_graph = pred_miss_graph[pred_miss_graph["weight"]>0]
    pred_miss_graph = pred_miss_graph.sort_values(['Actual','weight'], ascending=False)
    return pred_miss_graph

## Random Forest

In [6]:
error_df = pd.read_csv(f"{base_path}\\output\\error_analysis_questionnaire\\validation_df_all_patients_questionnaire.csv")
error_df = error_df[error_df["is_matched"]==False]

In [7]:
error_df = error_df[["PATHOLOGY", "predicted_diagnosis"]]
error_df

Unnamed: 0,PATHOLOGY,predicted_diagnosis
3,Anemia,['Myocarditis']
4,Boerhaave,['Possible NSTEMI / STEMI']
5,Bronchospasm / acute asthma exacerbation,"['Bronchiectasis', 'Tuberculosis', 'Bronchospa..."
7,Acute otitis media,['GERD']
8,Pulmonary embolism,"['Myocarditis', 'Stable angina', 'Pericarditis']"
...,...,...
132413,Anaphylaxis,['Scombroid food poisoning']
132417,Chronic rhinosinusitis,"['Acute rhinosinusitis', 'Chronic rhinosinusit..."
132427,Viral pharyngitis,['Acute laryngitis']
132443,Viral pharyngitis,['Acute otitis media']


In [8]:
error_df["PATHOLOGY"].value_counts().sort_values().plot.barh(figsize=(6, 8))
plt.title("Prediction Error Frequency")
plt.xlabel("Count")
plt.savefig(f'{base_path}\\output\\error_analysis_questionnaire\\pred_miss_freq_questionnaire.jpg', bbox_inches='tight')
plt.clf()

<Figure size 600x800 with 0 Axes>

In [9]:
pred_miss_dict = get_pred_miss_dict(error_df)
pred_miss_dict

{'Spontaneous pneumothorax': {'Cluster headache': 0.132,
  'Pericarditis': 0.293,
  'Stable angina': 0.267,
  'Myocarditis': 0.069,
  'Possible NSTEMI / STEMI': 0.162,
  'SLE': 0.01,
  'Acute otitis media': 0.035,
  'Anemia': 0.005,
  'Pulmonary embolism': 0.005,
  'Unstable angina': 0.01,
  'Viral pharyngitis': 0.012},
 'Cluster headache': {'Acute otitis media': 0.696,
  'Viral pharyngitis': 0.262,
  'Possible NSTEMI / STEMI': 0.042},
 'Boerhaave': {'Possible NSTEMI / STEMI': 0.624,
  'Stable angina': 0.151,
  'Pericarditis': 0.145,
  'Cluster headache': 0.015,
  'Acute otitis media': 0.005,
  'Myocarditis': 0.03,
  'Unstable angina': 0.021,
  'Viral pharyngitis': 0.005,
  'SLE': 0.004},
 'Spontaneous rib fracture': {'Cluster headache': 0.083,
  'Viral pharyngitis': 0.161,
  'Bronchitis': 0.163,
  'Stable angina': 0.11,
  'Pericarditis': 0.099,
  'Possible NSTEMI / STEMI': 0.06,
  'Acute otitis media': 0.126,
  'Acute laryngitis': 0.067,
  'Pulmonary neoplasm': 0.083,
  'GERD': 0.032,

In [10]:
pred_miss_df = get_pred_miss_df(pred_miss_dict)
pred_miss_df

Unnamed: 0_level_0,Spontaneous pneumothorax,Cluster headache,Boerhaave,Spontaneous rib fracture,GERD,HIV (initial infection),Anemia,Viral pharyngitis,Inguinal hernia,Myasthenia gravis,...,Pneumonia,Acute rhinosinusitis,Chronic rhinosinusitis,Bronchiolitis,Pulmonary neoplasm,Possible NSTEMI / STEMI,Sarcoidosis,Pancreatic neoplasm,Acute pulmonary edema,Pericarditis
disease,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Spontaneous pneumothorax,0.0,0.132,0.0,0,0.0,0.0,0.005,0.012,0,0.0,...,0,0.0,0.0,0.0,0.0,0.162,0,0.0,0.0,0.293
Cluster headache,0.0,0.0,0.0,0,0.0,0.0,0.0,0.262,0,0.0,...,0,0.0,0.0,0.0,0.0,0.042,0,0.0,0.0,0.0
Boerhaave,0.0,0.015,0.0,0,0.0,0.0,0.0,0.005,0,0.0,...,0,0.0,0.0,0.0,0.0,0.624,0,0.0,0.0,0.145
Spontaneous rib fracture,0.0,0.083,0.0,0,0.032,0.0,0.0,0.161,0,0.0,...,0,0.0,0.0,0.0,0.083,0.06,0,0.0,0.0,0.099
GERD,0.0,0.128,0.001,0,0.0,0.0,0.001,0.297,0,0.0,...,0,0.0,0.0,0.0,0.0,0.009,0,0.0,0.0,0.002
HIV (initial infection),0.0,0.055,0.021,0,0.0,0.0,0.0,0.0,0,0.0,...,0,0.0,0.0,0.0,0.0,0.028,0,0.11,0.0,0.0
Anemia,0.0,0.135,0.0,0,0.0,0.0,0.0,0.02,0,0.0,...,0,0.0,0.0,0.0,0.0,0.159,0,0.001,0.0,0.244
Viral pharyngitis,0.0,0.213,0.0,0,0.01,0.0,0.0,0.0,0,0.0,...,0,0.09,0.072,0.0,0.0,0.001,0,0.0,0.0,0.0
Inguinal hernia,0.0,0.566,0.0,0,0.0,0.0,0.0,0.062,0,0.0,...,0,0.0,0.0,0.0,0.0,0.015,0,0.0,0.0,0.0
Myasthenia gravis,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,...,0,0.0,0.0,0.0,0.151,0.0,0,0.0,0.0,0.0


In [11]:
pred_miss_graph = get_pred_miss_graph(pred_miss_df)
pred_miss_graph

Unnamed: 0,Actual,Prediction Miss,weight
374,Viral pharyngitis,Acute otitis media,0.490
344,Viral pharyngitis,Cluster headache,0.213
357,Viral pharyngitis,Acute laryngitis,0.116
383,Viral pharyngitis,Acute rhinosinusitis,0.090
384,Viral pharyngitis,Chronic rhinosinusitis,0.072
...,...,...,...
1635,Acute COPD exacerbation / infection,Bronchiectasis,0.220
1641,Acute COPD exacerbation / infection,Acute dystonic reactions,0.026
1633,Acute COPD exacerbation / infection,PSVT,0.008
1660,Acute COPD exacerbation / infection,Pulmonary neoplasm,0.008


In [12]:
pred_miss_graph.to_csv(f"{base_path}\\output\\error_analysis_questionnaire\\pred_miss_weigths.csv", index=False)

In [13]:
G = nx.from_pandas_edgelist(pred_miss_graph, 'Actual', 'Prediction Miss', edge_attr='weight', create_using=nx.DiGraph())
pos = nx.spring_layout(G, seed=0)
print(nx.info(G))

DiGraph with 47 nodes and 390 edges


In [14]:
fig = plt.figure(figsize=(15, 8))
weights = [G[u][v]['weight'] for u,v in G.edges()]
nx.draw(G, pos, edge_color=weights, edge_cmap=plt.cm.Blues, with_labels=True, arrowsize=20)
plt.title("Prediction Miss", fontsize=20)
plt.tight_layout()
plt.savefig(f'{base_path}\\output\\error_analysis_questionnaire\\pred_miss_network.jpg', bbox_inches='tight')
plt.clf()

  """


<Figure size 1500x800 with 0 Axes>

In [15]:
for disease in pred_miss_dict:
    G = nx.from_pandas_edgelist(pred_miss_graph[(pred_miss_graph["Actual"]==disease) | (pred_miss_graph["Prediction Miss"]==disease) ], 'Actual', 'Prediction Miss', edge_attr='weight', create_using=nx.DiGraph())
    pos = nx.spring_layout(G, seed=0)
    fig = plt.figure(figsize=(15, 8))
    weights = [G[u][v]['weight'] for u,v in G.edges()]
    nx.draw(G, pos, edge_color=weights, edge_cmap=plt.cm.Blues, with_labels=True, arrowsize=20)
    plt.title(f"Prediction Miss - {disease}", fontsize=20)
    plt.tight_layout()
    img_filename = re.sub('[^a-zA-Z0-9 \n\.]', '', disease).replace(" ", "_")
    plt.savefig(f'{base_path}\\output\\error_analysis_questionnaire\\pred_miss_{img_filename}.jpg', bbox_inches='tight')
    plt.clf()

  
  after removing the cwd from sys.path.


<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

## Logistic Regression

In [16]:
error_df = pd.read_csv(f"{base_path}\\output\\error_analysis_questionnaire\\validation_logreg_df_all_patients_questionnaire.csv")
error_df = error_df[error_df["is_matched"]==False]

In [17]:
error_df = error_df[["PATHOLOGY", "predicted_diagnosis"]]
error_df

Unnamed: 0,PATHOLOGY,predicted_diagnosis
3,Anemia,['Stable angina']
7,Acute otitis media,['Viral pharyngitis']
22,Acute rhinosinusitis,['Chronic rhinosinusitis']
23,Acute otitis media,['Allergic sinusitis']
26,Atrial fibrillation,['Bronchospasm / acute asthma exacerbation']
...,...,...
132405,PSVT,['Stable angina']
132406,Anaphylaxis,['Pancreatic neoplasm']
132410,Pancreatic neoplasm,['SLE']
132427,Viral pharyngitis,['Acute laryngitis']


In [18]:
pred_miss_df = get_pred_miss_df(pred_miss_dict)
pred_miss_df

Unnamed: 0_level_0,Spontaneous pneumothorax,Cluster headache,Boerhaave,Spontaneous rib fracture,GERD,HIV (initial infection),Anemia,Viral pharyngitis,Inguinal hernia,Myasthenia gravis,...,Pneumonia,Acute rhinosinusitis,Chronic rhinosinusitis,Bronchiolitis,Pulmonary neoplasm,Possible NSTEMI / STEMI,Sarcoidosis,Pancreatic neoplasm,Acute pulmonary edema,Pericarditis
disease,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Spontaneous pneumothorax,0.0,0.132,0.0,0,0.0,0.0,0.005,0.012,0,0.0,...,0,0.0,0.0,0.0,0.0,0.162,0,0.0,0.0,0.293
Cluster headache,0.0,0.0,0.0,0,0.0,0.0,0.0,0.262,0,0.0,...,0,0.0,0.0,0.0,0.0,0.042,0,0.0,0.0,0.0
Boerhaave,0.0,0.015,0.0,0,0.0,0.0,0.0,0.005,0,0.0,...,0,0.0,0.0,0.0,0.0,0.624,0,0.0,0.0,0.145
Spontaneous rib fracture,0.0,0.083,0.0,0,0.032,0.0,0.0,0.161,0,0.0,...,0,0.0,0.0,0.0,0.083,0.06,0,0.0,0.0,0.099
GERD,0.0,0.128,0.001,0,0.0,0.0,0.001,0.297,0,0.0,...,0,0.0,0.0,0.0,0.0,0.009,0,0.0,0.0,0.002
HIV (initial infection),0.0,0.055,0.021,0,0.0,0.0,0.0,0.0,0,0.0,...,0,0.0,0.0,0.0,0.0,0.028,0,0.11,0.0,0.0
Anemia,0.0,0.135,0.0,0,0.0,0.0,0.0,0.02,0,0.0,...,0,0.0,0.0,0.0,0.0,0.159,0,0.001,0.0,0.244
Viral pharyngitis,0.0,0.213,0.0,0,0.01,0.0,0.0,0.0,0,0.0,...,0,0.09,0.072,0.0,0.0,0.001,0,0.0,0.0,0.0
Inguinal hernia,0.0,0.566,0.0,0,0.0,0.0,0.0,0.062,0,0.0,...,0,0.0,0.0,0.0,0.0,0.015,0,0.0,0.0,0.0
Myasthenia gravis,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,...,0,0.0,0.0,0.0,0.151,0.0,0,0.0,0.0,0.0


In [19]:
error_df["PATHOLOGY"].value_counts().sort_values().plot.barh(figsize=(6, 8))
plt.title("Prediction Error Frequency")
plt.xlabel("Count")
plt.savefig(f'{base_path}\\output\\error_analysis_questionnaire\\pred_miss_freq_logreg_questionnaire.jpg', bbox_inches='tight')
plt.clf()

<Figure size 600x800 with 0 Axes>

In [20]:
pred_miss_dict = get_pred_miss_dict(error_df)
pred_miss_dict

{'Spontaneous pneumothorax': {'Pericarditis': 0.886,
  'Stable angina': 0.1,
  'Cluster headache': 0.014},
 'Cluster headache': {'Guillain-Barré syndrome': 1.0},
 'Boerhaave': {'Possible NSTEMI / STEMI': 0.152,
  'Stable angina': 0.792,
  'Cluster headache': 0.055},
 'Spontaneous rib fracture': {'Spontaneous pneumothorax': 0.773,
  'Pericarditis': 0.227},
 'GERD': {'Viral pharyngitis': 0.536,
  'Pericarditis': 0.107,
  'Cluster headache': 0.343,
  'Tuberculosis': 0.006,
  'Boerhaave': 0.006,
  'Anemia': 0.003},
 'HIV (initial infection)': {'Influenza': 0.753,
  'Pancreatic neoplasm': 0.112,
  'SLE': 0.07,
  'Chagas': 0.057,
  'Inguinal hernia': 0.004,
  'Tuberculosis': 0.002,
  'Possible NSTEMI / STEMI': 0.002},
 'Anemia': {'Stable angina': 0.953,
  'Cluster headache': 0.041,
  'Bronchospasm / acute asthma exacerbation': 0.006},
 'Viral pharyngitis': {'Acute otitis media': 0.235,
  'Cluster headache': 0.359,
  'Acute laryngitis': 0.407},
 'Inguinal hernia': {'SLE': 0.725, 'Cluster head

In [21]:
pred_miss_df = get_pred_miss_df(pred_miss_dict)
pred_miss_df

Unnamed: 0_level_0,Spontaneous pneumothorax,Cluster headache,Boerhaave,Spontaneous rib fracture,GERD,HIV (initial infection),Anemia,Viral pharyngitis,Inguinal hernia,Myasthenia gravis,...,Pneumonia,Acute rhinosinusitis,Chronic rhinosinusitis,Bronchiolitis,Pulmonary neoplasm,Possible NSTEMI / STEMI,Sarcoidosis,Pancreatic neoplasm,Acute pulmonary edema,Pericarditis
disease,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Spontaneous pneumothorax,0.0,0.014,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.886
Cluster headache,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0
Boerhaave,0.0,0.055,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0.0,0.0,0.152,0.0,0.0,0,0.0
Spontaneous rib fracture,0.773,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.227
GERD,0.0,0.343,0.006,0,0.0,0.0,0.003,0.536,0.0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.107
HIV (initial infection),0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.004,0,...,0,0.0,0.0,0.0,0.0,0.002,0.0,0.112,0,0.0
Anemia,0.0,0.041,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0
Viral pharyngitis,0.0,0.359,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0
Inguinal hernia,0.0,0.275,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0
Myasthenia gravis,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0


In [22]:
pred_miss_graph = get_pred_miss_graph(pred_miss_df)
pred_miss_graph

Unnamed: 0,Actual,Prediction Miss,weight
357,Viral pharyngitis,Acute laryngitis,0.407
344,Viral pharyngitis,Cluster headache,0.359
374,Viral pharyngitis,Acute otitis media,0.235
1318,Unstable angina,Possible NSTEMI / STEMI,0.544
1303,Unstable angina,Stable angina,0.396
...,...,...,...
664,Acute laryngitis,Tuberculosis,0.008
1111,Acute dystonic reactions,Bronchospasm / acute asthma exacerbation,0.983
1096,Acute dystonic reactions,Bronchiectasis,0.017
1650,Acute COPD exacerbation / infection,Bronchospasm / acute asthma exacerbation,0.732


In [23]:
pred_miss_graph.to_csv(f"{base_path}\\output\\error_analysis_questionnaire\\pred_miss_weigths_logreg.csv", index=False)

In [24]:
G = nx.from_pandas_edgelist(pred_miss_graph, 'Actual', 'Prediction Miss', edge_attr='weight', create_using=nx.DiGraph())
pos = nx.spring_layout(G, seed=0)
print(nx.info(G))

DiGraph with 48 nodes and 207 edges


In [25]:
fig = plt.figure(figsize=(15, 8))
weights = [G[u][v]['weight'] for u,v in G.edges()]
nx.draw(G, pos, edge_color=weights, edge_cmap=plt.cm.Blues, with_labels=True, arrowsize=20)
plt.title("Prediction Miss", fontsize=20)
plt.tight_layout()
plt.savefig(f'{base_path}\\output\\error_analysis_questionnaire\\pred_miss_network_logreg.jpg', bbox_inches='tight')
plt.clf()

  """


<Figure size 1500x800 with 0 Axes>

In [26]:
for disease in pred_miss_dict:
    G = nx.from_pandas_edgelist(pred_miss_graph[(pred_miss_graph["Actual"]==disease) | (pred_miss_graph["Prediction Miss"]==disease) ], 'Actual', 'Prediction Miss', edge_attr='weight', create_using=nx.DiGraph())
    pos = nx.spring_layout(G, seed=0)
    fig = plt.figure(figsize=(15, 8))
    weights = [G[u][v]['weight'] for u,v in G.edges()]
    nx.draw(G, pos, edge_color=weights, edge_cmap=plt.cm.Blues, with_labels=True, arrowsize=20)
    plt.title(f"Prediction Miss - {disease}", fontsize=20)
    plt.tight_layout()
    img_filename = re.sub('[^a-zA-Z0-9 \n\.]', '', disease).replace(" ", "_")
    plt.savefig(f'{base_path}\\output\\error_analysis_questionnaire\\pred_miss_{img_filename}_logreg.jpg', bbox_inches='tight')
    plt.clf()

  
  after removing the cwd from sys.path.


<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1500x800 with 0 Axes>