In [1]:
import datasets
import os
import numpy as np
from pathlib import Path
from preprocessing.scalers import MinMaxScaler
os.getcwd()
wd = os.getenv("WORKINGDIR")
TEST_DATA_DIR = Path(wd, "tests", "data", "physionet.org", "files", "mimiciii-demo", "1.4")

In [2]:
proc_reader = datasets.load_data(chunksize=75835,
                                 source_path=TEST_DATA_DIR,
                                 storage_path="./tmp",
                                 discretize=True,
                                 deep_supervision=True,
                                 task="DECOMP")

INFO --- 06-20 13:02:26 : datasets/extraction/__init__.py  : L 293  - Iterative data extraction already finalized in directory:
                                    - tmp/extracted.
INFO --- 06-20 13:02:27 : datasets/processors/__init__.py  : L 347  - Preprocessing for DECOMP is already in directory:
                                    - tmp/processed/DECOMP.
INFO --- 06-20 13:02:27 : datasets/processors/__init__.py  : L 347  - Discretizing for DECOMP is already in directory:
                                    - tmp/discretized/DECOMP.


In [3]:
dataset = proc_reader.read_samples(read_masks=True)

In [4]:
scaler = MinMaxScaler().fit_reader(proc_reader)

INFO --- 06-20 13:02:29 : preprocessing/__init__.py  : L 106  - Loading minmax_scaler from:
                                    - tmp/discretized/DECOMP/minmax_scaler.pkl


In [5]:
dataset["X"] = [scaler.transform(sample) for sample in dataset["X"]]

In [6]:
def _zeropad_samples(data):
    max_len = max([x.shape[0] for x in data])
    ret = [
        np.concatenate([x, np.zeros((max_len - x.shape[0],) + x.shape[1:])],
                        axis=0,
                        dtype=np.float32) for x in data
    ]
    return np.atleast_3d(np.array(ret, dtype=np.float32))

In [7]:
X = _zeropad_samples(dataset["X"])
M = _zeropad_samples(dataset["M"])
Y = _zeropad_samples(dataset["yds"])
print(X.shape)
print(M.shape)
print(Y.shape)

(111, 845, 59)
(111, 845, 1)
(111, 845, 1)


In [8]:
from tensorflow.keras.layers import Layer


class ExtendMask(Layer):
    """ Inputs:      [X, M]
        Output:      X
        Output_mask: M
    """

    def __init__(self, add_epsilon=False, **kwargs):
        self.supports_masking = True
        self.add_epsilon = add_epsilon
        super(ExtendMask, self).__init__(**kwargs)

    def call(self, x, *args, **kwargs):
        return x[0]

    def compute_output_shape(self, input_shape):
        return input_shape[0]

    def compute_mask(self, input, *args, **kwargs):
        if self.add_epsilon:
            return input[1] + K.epsilon()
        return input[1]

    def get_config(self):
        return {'add_epsilon': self.add_epsilon}


2024-06-20 13:02:30.142619: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-20 13:02:30.142657: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-20 13:02:30.143664: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-20 13:02:30.149925: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
inputs = layers.Input(shape=(None, X.shape[2]))
mask = layers.Input(shape=(None,), name='M')
x = layers.Masking()(inputs)
x = layers.LSTM(units=128,
                activation='tanh',
                return_sequences=True,
                recurrent_dropout=0,
                dropout=0)(x)

y = layers.Dense(1, activation="sigmoid")(x)
y = layers.TimeDistributed(layers.Dense(1, activation="sigmoid"))(x)
y = ExtendMask()([y, mask])


2024-06-20 13:02:31.536809: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-06-20 13:02:31.588255: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-06-20 13:02:31.588512: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [11]:
print(f"Shape after TimeDistributed Dense layer: {y.shape}")
from tensorflow.keras.metrics import AUC
model = models.Model(inputs=[inputs, mask], outputs=y)
model.compile(optimizer=Adam(clipvalue=1.0, learning_rate=0.001),
              loss="binary_crossentropy",
              metrics=[AUC(curve="ROC"), AUC(curve="ROC")])
history = model.fit([X, M], Y, epochs=1000)

Shape after TimeDistributed Dense layer: (None, None, 1)
Epoch 1/1000


2024-06-20 13:02:54.303413: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8902
2024-06-20 13:02:56.464439: I external/local_xla/xla/service/service.cc:168] XLA service 0x7270a19f6a30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-06-20 13:02:56.464492: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3060 Ti, Compute Capability 8.6
2024-06-20 13:02:56.468858: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1718881376.577556 1435644 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000


KeyboardInterrupt: 

: 

In [None]:
from generators.tf2 import TFGenerator
train_generator = TFGenerator(reader=proc_reader,
                              scaler=scaler,
                              batch_size=32,
                              deep_supervision=True,
                              shuffle=True)

In [None]:
model = models.Model(inputs=[inputs, mask], outputs=y)
model.compile(optimizer=Adam(clipvalue=1.0, learning_rate=0.000001), loss="binary_crossentropy", metrics=["roc_auc", "pr_auc"])
history = model.fit(train_generator, epochs=1000)

Epoch 1/1000


2024-06-18 16:17:38.883787: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8902
2024-06-18 16:17:41.521981: I external/local_xla/xla/service/service.cc:168] XLA service 0x72478e485e30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-06-18 16:17:41.522018: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3060 Ti, Compute Capability 8.6
2024-06-18 16:17:41.526693: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1718720261.664122  570111 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000


In [None]:
from models.tf2.lstm import LSTMNetwork
from tensorflow.keras.optimizers import Adam

model = LSTMNetwork(1000,
                    59,
                    recurrent_dropout=0.,
                    output_dim=1,
                    depth=3,
                    final_activation='sigmoid')
model.compile(optimizer=Adam(learning_rate=0.000001, clipvalue=1.0), loss="binary_crossentropy")

In [None]:
history = model.fit(X, y, epochs=1000)