In [None]:
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'  # always print last expr.
%config InlineBackend.figure_format = 'svg'
%load_ext autoreload
%autoreload 2

import logging

logging.basicConfig(level=logging.INFO)

from tsdm.datasets import *

In [None]:
from getpass import getpass

import tsdm
from tsdm.utils.remote import download

In [None]:
ds = tsdm.datasets.MIMIC_III(initialize=False, version="1.2")

In [None]:
ds.download()

In [None]:
ds.HOME_URL

In [None]:
tuple(map(int, "1.3".split("."))) <= (1, 4)

In [None]:
from typing import Literal

In [None]:
x = Literal["a", "b"]

In [None]:
from typing import get_args

In [None]:
get_args(x)

In [None]:
url = ds.BASE_URL.format(version="1.4")
fname = "mimic-iii-clinical-database-1.4.zip"

In [None]:
username = input("MIMIC-III username: ")
password = getpass(prompt="MIMIC-III password: ", stream=None)

In [None]:
headers = {
    "User-Agent": "Wget/1.21.2",
}

In [None]:
download(url, fname, headers=headers, username=username, password=password)

In [None]:
self = MIMIC_III_DeBrouwer2019()

In [None]:
x: [1, 2] = 2

In [None]:
"0".isidentifier()

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(16, 6, figsize=(20, 32), constrained_layout=True, sharey=True)

for col, ax in zip(self.timeseries, axes.flatten()):
    self.timeseries[col].hist(ax=ax, density=True, log=True, bins=20)
    ax.set_ylim(10**-6, 1)

In [None]:
%matplotlib inline

(self.metadata["min"] == 0).mean()

In [None]:
import pandas as pd
from pandas import DataFrame

In [None]:
self.LOGGER.info("Loading main file.")
ts = pd.read_csv(self.rawdata_paths["complete_tensor.csv"], index_col=0)

# Check shape.
if ts.shape != self.rawdata_shapes["complete_tensor.csv"]:
    raise ValueError(
        f"The {ts.shape=} is not correct."
        "Please apply the modified preprocessing using bin_k=2, as outlined in"
        "the appendix. The resulting tensor should have 3082224 rows and 7 columns."
    )

ts = ts.astype(self.rawdata_schemas["complete_tensor.csv"]).sort_values(
    by=["UNIQUE_ID", "TIME_STAMP"]
)

In [None]:
means = ts.groupby("LABEL_CODE").mean()["VALUENUM"].rename("MEANS")
stdvs = ts.groupby("LABEL_CODE").std()["VALUENUM"].rename("STDVS")
stats = (
    DataFrame([means, stdvs])
    .T.reset_index()
    .astype(
        {
            "LABEL_CODE": "int16",
            "MEANS": "float32",
            "STDVS": "float32",
        }
    )
)

In [None]:
ts = (
    ts[["UNIQUE_ID", "TIME_STAMP", "LABEL_CODE", "VALUENUM"]]
    .reset_index(drop=True)
    .set_index(["UNIQUE_ID", "TIME_STAMP"])
    .pivot(columns="LABEL_CODE", values="VALUENUM")
    .sort_index()
    .sort_index(axis=1)
)

In [None]:
ts.describe().T

In [None]:
ts.mean()