In [1]:
import tensorflow as tf

### Configuration
create_dataset = False

def in_google_colab():
    try:
        import google.colab
        return True
    except ImportError:
        return False

# Get the platform
if in_google_colab():
    print("Running in Google Colab")
    !git clone https://github.com/naderinho/anesthesia_anomaly_detection
    !pip install vitaldb astetik chances kerasplotlib statsmodels tqdm wrangle
    !pip install --no-deps talos
    directory = 'anesthesia_anomaly_detection/data/'
    create_dataset = False
else:
    print("Running locally")
    directory = 'data/'

### Datasetpath
datasetpath = 'dataset02/'
vitaldbpath = 'vitaldb_tiva/'

import numpy as np
import pandas as pd
import vitaldb as vf
import matplotlib.pyplot as plt

Running in Google Colab
Cloning into 'anesthesia_anomaly_detection'...
remote: Enumerating objects: 234, done.[K
remote: Counting objects: 100% (234/234), done.[K
remote: Compressing objects: 100% (180/180), done.[K
remote: Total 234 (delta 89), reused 184 (delta 48), pack-reused 0[K
Receiving objects: 100% (234/234), 40.43 MiB | 9.61 MiB/s, done.
Resolving deltas: 100% (89/89), done.
Updating files: 100% (58/58), done.
Collecting vitaldb
  Downloading vitaldb-1.4.9-py3-none-any.whl (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.5/57.5 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting astetik
  Downloading astetik-1.16-py2.py3-none-any.whl (5.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting chances
  Downloading chances-0.1.9-py2.py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 kB[0m [31m2.1 MB/s[0m

In [2]:
from scipy import ndimage

def outlierfilter(data: pd.DataFrame,threshhold: float, iterations: int, min: float, max: float):
    """
    A filter function, which calculates the gradient of a given Pandas DataFram Timeseries
    and performs a binary dilation on datapoints which exceed a certain treshhold, to detect
    and remove unwanted outliers in the dataset. Additionally all values exceeding a given
    min/max value are replaced with np.nan and linearly interpolated with the Pandas interpolate
    method.

    Args:
        data (pd.DataFrame): Timeseries Data
        threshhold (float): Gradient thresshold
        iterations (int): number of iterations of the binary dilation
        min (float): maximum expected value
        max (float): minimum expected value

    Returns:
        pd.DataFrame: _description_
    """
    gradient = np.diff(data,n=1, axis=0, append=0)
    gradientfilter = ndimage.binary_dilation(np.abs(gradient) > threshhold, iterations=iterations)

    # Apply Filter
    data[gradientfilter] = np.nan

    data[data <= min] = np.nan
    data[data > max] = np.nan

    data = data.interpolate(method = 'linear')
    data = data.bfill()
    return data

### Custom Normalization Functions

def NormStandard(dataset: np.array):
    mean = np.nanmean(dataset)
    std = np.nanstd(dataset)
    return (dataset - mean) / std

def NormMinMax(dataset: np.array):
    min = np.min(dataset)
    max = np.max(dataset)
    return (dataset - min) / (max - min)

def NormCustomBIS(dataset: np.array):
    return (100 - dataset) / 100

def NormNone(dataset: np.array):
    return dataset

In [3]:
from os import listdir
from sklearn.model_selection import train_test_split

class DatasetImport():
    def __init__(self, directory: str, dataset: str, vitalpath: str, interval: int = 10):
        self.directory = directory
        self.datasetpath = directory + dataset
        self.vitalpath = directory + vitalpath

        self.interval = interval

        self.train_dataset = None
        self.validation_dataset = None
        self.test_dataset = None

        self.index = pd.read_csv(self.datasetpath +'dataset.csv', index_col=0).index.to_numpy()

    def save(self, filename: str):
        np.savez_compressed(self.datasetpath+filename,
                            train = self.train_dataset,
                            validation = self.validation_dataset,
                            test = self.test_dataset,
                            timesteps = self.timesteps,
                            )

    def load(self, filename: str):
        data = np.load(self.datasetpath+filename)
        self.train_dataset = data['train']
        self.validation_dataset = data['validation']
        self.test_dataset = data['test']
        try:
            self.timesteps = data['timesteps']
        except:
            self.timesteps = []

    def split(self,data):
       train, test = train_test_split(data, test_size=0.15, random_state=42)
       train, validation = train_test_split(train, test_size=0.15, random_state=42)
       return train, validation, test

    def generateDataset(self, normalization):

        dataset, self.timesteps = self.generate(self.index, normalization)

        self.train_dataset, self.validation_dataset, self.test_dataset = self.split(dataset)
        print('Dataset succesfully generated                 ')

    def generate(self, dataset_index: list, normalization):
        batch_list = []
        timesteps = []

        for i, caseid in enumerate(dataset_index):
            filepath = self.vitalpath+str(caseid).zfill(4)+'.vital'
            data, importName = self.importFunction(filepath)
            timesteps.append(data.shape[0])
            batch_list.append(data)
            print(importName + " Fortschritt: %.1f" % (100 * (i+1) / len(dataset_index)),' % ', end='\r')

        ### Pad the dataset
        data = tf.keras.preprocessing.sequence.pad_sequences(batch_list, padding='post', dtype='float32', value=0.0)

        # Remove 0.0 padded values
        data[data == 0.0] = np.nan

        # Nomalization
        data = normalization(data)

        # restore padded values
        np.nan_to_num(data, copy=False, nan=0.0)

        return data, np.array(timesteps)

    def importFunction(self, filepath: str):
        return None, None

class infoImport(DatasetImport):
    def __init__(self, directory: str, dataset: str, vitalpath: str):
        super().__init__(directory,dataset,vitalpath)

        self.columns = ['sex','age','height','weight','bmi']

    def generate(self, dataset_index: list, normalization):

        data = pd.read_csv(self.directory+'info_vitaldb/cases.csv', index_col=0)
        data = data[self.columns].loc[dataset_index].to_numpy()

        sex = np.where(data[:, 0] == 'F', -0.5, 0.5)

        data = data[:,1:].astype(float)
        data = np.c_[sex, normalization(data)]

        return data, None

class VitalImport(DatasetImport):
    def __init__(self, directory: str, dataset: str, vitalpath: str):
        super().__init__(directory,dataset,vitalpath)

        self.tracks = []
        self.filter = [0,0,0]
        self.name = 'Vital'

    def importFunction(self, filepath: str):

        vitaldata = vf.VitalFile(ipath = filepath, track_names = self.tracks)

        data = vitaldata.to_pandas(track_names=self.tracks,interval=self.interval)
        data = data + 0.00001 # adds small value to avoid mix up with padding values
        data = outlierfilter(data, threshhold = self.filter[0] , iterations = 2, min = self.filter[1], max = self.filter[2])

        return data, self.name

class BPImport(DatasetImport):
    def __init__(self, directory: str, dataset: str, vitalpath: str):
        super().__init__(directory,dataset,vitalpath)

    def importFunction(self, filepath: str):
        pressureWave = vf.VitalFile(filepath).to_numpy(['SNUADC/ART'], 1/500)

        samples = self.interval * 500

        # Remove values which derivative is too large
        gradient = np.diff(pressureWave,n=1, axis=0, append=0)
        gradientfilter1 = ndimage.binary_dilation(np.abs(gradient) > 4,iterations=30)
        gradientfilter2 = ndimage.binary_dilation(np.abs(gradient) > 7,iterations=1000)
        pressureWave[gradientfilter1] = np.nan
        pressureWave[gradientfilter2] = np.nan

        # Remove the negative values and values above 250
        pressureWave[pressureWave <= 20] = np.nan
        pressureWave[pressureWave > 250] = np.nan

        pressureWave = self.imputer1.fit_transform(pressureWave)

        ### Reshape the pressureWave to 1000 samples (2 seconds) per row
        #if (pressureWave.shape[0] % samples) != 0 :
        #    steps2fill = samples - (pressureWave.shape[0] % samples)
        #    pressureWave = np.pad(array=pressureWave, pad_width=((0,steps2fill),(0,0)), mode='constant', constant_values=np.nan)
        length = pressureWave.shape[0] - (pressureWave.shape[0] % samples)
        pressureWave = pressureWave[0:length]
        return pressureWave.reshape(-1,samples), 'Blood Pressure'

In [4]:
###### Create Dataset
if create_dataset:
    bis = VitalImport(directory= directory, dataset=datasetpath, vitalpath=vitaldbpath)
    bis.name = 'Bispektralindex'
    bis.tracks = ['BIS/BIS']
    bis.filter = [20, 10, 100]
    bis.generateDataset(normalization=NormNone)
    bis.save('00_bis.npz')

    info = infoImport(directory= directory, dataset=datasetpath, vitalpath=vitaldbpath)
    info.generateDataset(normalization=NormStandard)
    info.save('01_info.npz')

    bloodpressure = VitalImport(directory= directory, dataset=datasetpath, vitalpath=vitaldbpath)
    bloodpressure.name = 'bloodpressure'
    bloodpressure.tracks = ['Solar8000/ART_DBP', 'Solar8000/ART_MBP', 'Solar8000/ART_SBP']
    bloodpressure.filter = [20, 20, 250]
    bloodpressure.generateDataset(normalization=NormStandard)
    bloodpressure.save('02_bloodpressure.npz')

    etCO2 = VitalImport(directory= directory, dataset=datasetpath, vitalpath=vitaldbpath)
    etCO2.name = 'End Tidal CO2'
    etCO2.tracks = ['Primus/ETCO2']
    etCO2.filter = [5, 15, 50]
    etCO2.generateDataset(normalization=NormStandard)
    etCO2.save('02_etCO2.npz')

    spO2 = VitalImport(directory= directory, dataset=datasetpath, vitalpath=vitaldbpath)
    spO2.name = 'SpO2'
    spO2.tracks = ['Solar8000/PLETH_SPO2']
    spO2.filter = [3, 80, 100]
    spO2.generateDataset(normalization=NormStandard)
    spO2.save('02_spO2.npz')

    hr = VitalImport(directory= directory, dataset=datasetpath, vitalpath=vitaldbpath)
    hr.name = 'Heart Rate'
    hr.tracks = ['Solar8000/HR']
    hr.filter = [20, 40, 180]
    hr.generateDataset(normalization=NormStandard)
    hr.save('02_hr.npz')



In [5]:
### Load the datasets
bis = VitalImport(directory= directory, dataset=datasetpath, vitalpath=vitaldbpath)
bis.load('00_bis.npz')

info = infoImport(directory= directory, dataset=datasetpath, vitalpath=vitaldbpath)
info.load('01_info.npz')

bloodpressure = VitalImport(directory= directory, dataset=datasetpath, vitalpath=vitaldbpath)
bloodpressure.load('02_bloodpressure.npz')

etCO2 = VitalImport(directory= directory, dataset=datasetpath, vitalpath=vitaldbpath)
etCO2.load('02_etCO2.npz')

spO2 = VitalImport(directory= directory, dataset=datasetpath, vitalpath=vitaldbpath)
spO2.load('02_spO2.npz')

hr = VitalImport(directory= directory, dataset=datasetpath, vitalpath=vitaldbpath)
hr.load('02_hr.npz')

In [30]:
########################################## COMBINED MODEL ##########################################
import tensorflow as tf
from keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, ReLU, Dropout, Concatenate, Masking, Conv1D, MaxPooling1D, BatchNormalization, RepeatVector, Lambda
from tensorflow.keras.metrics import MeanSquaredError, MeanAbsoluteError, R2Score
from tensorflow.keras.optimizers import Adam, Adagrad, RMSprop, SGD
from talos.utils import lr_normalizer

### Combine the vital data
vital_train = np.concatenate([bloodpressure.train_dataset, etCO2.train_dataset, spO2.train_dataset, hr.train_dataset], axis=2)
vital_validation = np.concatenate([bloodpressure.validation_dataset, etCO2.validation_dataset, spO2.validation_dataset, hr.validation_dataset], axis=2)
vital_test = np.concatenate([bloodpressure.test_dataset, etCO2.test_dataset, spO2.test_dataset, hr.test_dataset], axis=2)



def simple_model(x_train, y_train, x_val, y_val, params):
    ### LSTM layers for the vital data
    input_vital = Input(shape=(None, vital_train.shape[2]))
    vital_layer = Masking(mask_value=0.0)(input_vital)

    ### INFO layers
    input_info = Input(shape=(info.train_dataset.shape[1],))
    info_layer = RepeatVector(vital_train.shape[1])(input_info)

    ## Concatenate the LSTM output with the info layer
    comb_layer = Concatenate()([vital_layer, info_layer])
    comb_layer = LSTM(units=32, return_sequences=True)(comb_layer)
    comb_layer = BatchNormalization()(comb_layer)
    comb_layer = LSTM(units=32, return_sequences=True)(comb_layer)
    comb_layer = BatchNormalization()(comb_layer)
    comb_layer = LSTM(units=32, return_sequences=True)(comb_layer)
    comb_layer = BatchNormalization()(comb_layer)
    comb_layer = Dense(units=128, activation='relu')(comb_layer)
    comb_layer = BatchNormalization()(comb_layer)
    comb_layer = Dense(units=32, activation='relu')(comb_layer)
    comb_layer = BatchNormalization()(comb_layer)

    output = Dense(units=1, activation=params['output_activation'])(comb_layer)
    #output = Lambda(lambda x: x * 100)(output)

    # Define the model
    model = Model(inputs=[input_vital, input_info], outputs=output)

    # Compile the model

    model.compile(optimizer=params['optimizer'](learning_rate=params['lr']),
                loss=params['loss'],
                metrics=['MeanSquaredError', 'MeanAbsoluteError', 'RootMeanSquaredError']
                )

    y_train = pd.DataFrame(y_train[:,:,0].T).rolling(min_periods=1,window=params['y_smoothing'], center=True).mean().to_numpy().T[:,:,np.newaxis]

    # Train the model
    out = model.fit(x_train,
                        y_train,
                        validation_data=[x_val, y_val],
                        epochs=params['epochs'],
                        batch_size=params['batch_size'],
                        verbose=0
                        )
    return out, model

In [None]:
import talos

p = {
    'lr': [0.005],
    'epochs': [30],
    'optimizer': [Adam],
    'output_activation': [ReLU(max_value=100.0)],
    'y_smoothing': [1, 2, 4, 8, 16, 32, 64],
    'loss': ['MeanSquaredError'],
    'batch_size': [4],
}

scan_object = talos.Scan(x=[vital_train, info.train_dataset],
                         y=bis.train_dataset,
                         x_val=[vital_validation, info.validation_dataset],
                         y_val=bis.validation_dataset,
                         experiment_name='SimpleModel_HyperparameterScan',
                         multi_input=True,
                         params=p,
                         model=simple_model)



  0%|          | 0/7 [00:00<?, ?it/s][A[A