In [11]:
import matplotlib.pyplot as plt
import numpy as np

import ehrapy as ep

In [12]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
current_notebook_dir = %pwd

_TEST_IMAGE_PATH = f"{current_notebook_dir}/../plot/_images"

In [14]:
adata_full = ep.dt.mimic_2(columns_obs_only=["service_unit", "day_icu_intime"])
adata_full = adata_full[~np.isnan(adata_full.X).any(axis=1)].copy()
adata = adata_full[:200, :].copy()
del adata_full

adata = ep.pp.encode(adata, autodetect=True)
ep.pp.simple_impute(adata)
ep.pp.log_norm(adata, offset=1)

[93m![0m Features 'aline_flg', 'gender_num', 'service_num', 'day_icu_intime_num', 'hour_icu_intime', 'hosp_exp_flg', 'icu_exp_flg', 'day_28_flg', 'censor_flg', 'sepsis_flg', 'chf_flg', 'afib_flg', 'renal_flg', 'liver_flg', 'copd_flg', 'cad_flg', 'stroke_flg', 'mal_flg', 'resp_flg' were detected as categorical features stored numerically.Please verify and correct using `ep.ad.replace_feature_types` if necessary.
[93m![0m Feature types were inferred and stored in adata.var[feature_type]. Please verify using `ep.ad.feature_type_overview` and adjust if necessary using `ep.ad.replace_feature_types`.
[93m![0m Detected no columns that need to be encoded. Leaving passed AnnData object unchanged.


AnnData object with n_obs × n_vars = 200 × 44
    obs: 'service_unit', 'day_icu_intime', 'missing_values_abs', 'missing_values_pct'
    var: 'feature_type', 'missing_values_abs', 'missing_values_pct', 'mean', 'median', 'standard_deviation', 'min', 'max', 'iqr_outliers'
    uns: 'normalization'
    layers: 'original', 'raw_norm'

In [15]:
ep.pp.neighbors(adata, random_state=0)
ep.tl.leiden(adata, resolution=0.5, key_added="leiden_0_5", random_state=0)
ep.tl.paga(adata, groups="leiden_0_5")

In [16]:
plt.gcf().set_size_inches(16, 6)
ep.pl.paga(
    adata,
    color=["leiden_0_5", "day_28_flg"],
    threshold=0.5,
    max_edge_width=1.0,
    random_state=0,
    cmap=ep.pl.Colormaps.grey_red.value,
    title=["Leiden 0.5", "Died in less than 28 days"],
    show=False,
)

plt.gcf().subplots_adjust(left=0.2, right=0.8, bottom=0.2, top=0.8)

plt.gcf().savefig(f"{_TEST_IMAGE_PATH}/paga_expected.png", dpi=80)
plt.close("all")