# Before

In [None]:
import pandas as pd
import numpy as np

patient = pd.read_csv("../data/eicu/patient.csv")
medication = pd.read_csv("../data/eicu/medication.csv")
patient_medication = patient.merge(medication, on='patientunitstayid', how='inner')

In [None]:
import matplotlib.pyplot as plt

medication_not_nan_percentage = []
medication_nan_percentage = []
for hospitalid in pd.unique(patient_medication['hospitalid']).tolist():
    patient_medication_hospital = patient_medication[patient_medication['hospitalid'] == hospitalid]
    percentage = patient_medication_hospital[patient_medication_hospital['drugname'].isna()].shape[0] / patient_medication_hospital.shape[0]
    medication_not_nan_percentage.append(1 - percentage)
    medication_nan_percentage.append(percentage)

# Percentage Distribution of Drug Existance Across Hospitals
plt.figure(figsize=(6, 4.5))
plt.title("Before Imputation")
plt.bar(np.arange(len(medication_nan_percentage)), medication_not_nan_percentage, label='Exist', color="#5faffa")
plt.bar(np.arange(len(medication_nan_percentage)), medication_nan_percentage, label='Not Exist', color="#fa8296", bottom=medication_not_nan_percentage)
plt.xticks([])
plt.xlabel("Hospitals")
plt.ylabel("Percentage")
plt.legend(title="Drug Name", loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()

In [None]:
eicu = pd.read_csv("../data/one_hot_age_eicu_data_2.csv")
eicu.head()

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

hospital_data = eicu.to_numpy()

selected_hospitals = [420,110,122,167,183,199,243,252,281,416,443]
hospital_drug_mapping = {}
for hospital_id in selected_hospitals:
    data = hospital_data[hospital_data[:, 2] == hospital_id]
    hospital_drug_mapping[hospital_id] = np.where(np.sum(data[:, 8:-9], axis=0) > 0)[0]

drug_percentage = np.zeros((len(selected_hospitals), len(selected_hospitals)))
for i, hospital_id_1 in enumerate(selected_hospitals):
    for j, hospital_id_2 in enumerate(selected_hospitals):
        drug_names_1 = hospital_drug_mapping[hospital_id_1]
        drug_names_2 = hospital_drug_mapping[hospital_id_2]
        drug_percentage[i, j] = len(np.intersect1d(drug_names_1, drug_names_2)) / len(drug_names_2)

# Overlap Drugs Percentage in Hospitals: |A ∩ B| / |B|
plt.figure(figsize=(6, 4.5))
plt.title("Before Harmonization")
rdgn = sns.light_palette(color="#5faffa", n_colors=100)
ax = sns.heatmap(drug_percentage, cmap=rdgn, cbar_kws={'shrink': 0.5})
ax.set_yticklabels(selected_hospitals, rotation=0)
ax.set_xticklabels(selected_hospitals)
colorbar = ax.collections[0].colorbar
colorbar.set_label("Overlap Percentage")
plt.xlabel("Hospital B")
plt.ylabel("Hospital A")
plt.show()

# After

In [None]:
import pandas as pd
import numpy as np

patient = pd.read_csv("../data/eicu/patient.csv")
medication = pd.read_csv("../data/medication_imputed.csv")
patient_medication = patient.merge(medication, on='patientunitstayid', how='inner')

In [None]:
import matplotlib.pyplot as plt

medication_not_nan_percentage = []
medication_nan_percentage = []
for hospitalid in pd.unique(patient_medication['hospitalid']).tolist():
    patient_medication_hospital = patient_medication[patient_medication['hospitalid'] == hospitalid]
    percentage = patient_medication_hospital[patient_medication_hospital['drugname'].isna()].shape[0] / patient_medication_hospital.shape[0]
    medication_not_nan_percentage.append(1 - percentage)
    medication_nan_percentage.append(percentage)

# Percentage Distribution of Drug Existance Across Hospitals
plt.figure(figsize=(6, 4.5))
plt.title("After Imputation")
plt.bar(np.arange(len(medication_nan_percentage)), medication_not_nan_percentage, label='Exist', color="#5faffa")
plt.bar(np.arange(len(medication_nan_percentage)), medication_nan_percentage, label='Not Exist', color="#fa8296", bottom=medication_not_nan_percentage)
plt.xticks([])
plt.xlabel("Hospitals")
plt.ylabel("Percentage")
plt.legend(title="Drug Name", loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()

In [None]:
eicu = pd.read_csv("../data/eicu_harmonized.csv")
eicu.head()

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

hospital_data = eicu.to_numpy()

selected_hospitals = [420,110,122,167,183,199,243,252,281,416,443]
hospital_drug_mapping = {}
for hospital_id in selected_hospitals:
    data = hospital_data[hospital_data[:, 1] == hospital_id]
    hospital_drug_mapping[hospital_id] = np.where(np.sum(data[:, 5:242], axis=0) > 0)[0]

drug_percentage = np.zeros((len(selected_hospitals), len(selected_hospitals)))
for i, hospital_id_1 in enumerate(selected_hospitals):
    for j, hospital_id_2 in enumerate(selected_hospitals):
        drug_names_1 = hospital_drug_mapping[hospital_id_1]
        drug_names_2 = hospital_drug_mapping[hospital_id_2]
        drug_percentage[i, j] = len(np.intersect1d(drug_names_1, drug_names_2)) / len(drug_names_2)

# Overlap Drugs Percentage in Hospitals: |A ∩ B| / |B|
plt.figure(figsize=(6, 4.5))
plt.title("After Harmonization")
rdgn = sns.light_palette(color="#5faffa", n_colors=100)
ax = sns.heatmap(drug_percentage, cmap=rdgn, cbar_kws={'shrink': 0.5})
ax.set_yticklabels(selected_hospitals, rotation=0)
ax.set_xticklabels(selected_hospitals)
colorbar = ax.collections[0].colorbar
colorbar.set_label("Overlap Percentage")
plt.xlabel("Hospital B")
plt.ylabel("Hospital A")
plt.show()