In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd

2024-02-10 14:55:27.707972: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [18]:
df = pd.read_csv("cleaned_data.csv")
df = df.sample(frac=1)
df['gender'].replace(["Male", "Female"], [0, 1], inplace=True)
df['disposition'].replace(["Discharge", "Admit"], [0,  1], inplace=True)
df['esi'] = df['esi'] - 1
df['age'] = df['age'] / 100

In [19]:
class_weights = dict(df['esi'].value_counts())
class_weights = {x: min(df.shape[0]/class_weights[x], 10) for x in class_weights}
print(class_weights)

{2.0: 2.362190924907611, 1.0: 3.4124323497936095, 3.0: 4.464036863115285, 4.0: 10, 0.0: 10}


In [24]:
cc_cols = [x for x in df.columns if x[:3] == "cc_"]
med_cols = [x for x in df.columns if x[:5] == "meds_"]
with open("pmh_cols.txt") as f:
    pmh_cols = f.readlines()
pmh_cols = list(map(lambda x: x[:-1], pmh_cols))

x_cols = ["age", "gender"] + cc_cols + pmh_cols
y_cols = ["disposition"] + med_cols

esi_data = pd.get_dummies(df['esi'], dtype=int)

In [25]:
val_split = 0.9

np_x_train = np.array(df[x_cols])[:int(val_split * df.shape[0]), :]
np_esi_train = np.array(esi_data)[:int(val_split * df.shape[0]), :]
np_y_train = np.array(df[y_cols])[:int(val_split * df.shape[0]), :]

np_x_val = np.array(df[x_cols])[int(val_split * df.shape[0]):, :]
np_esi_val = np.array(esi_data)[int(val_split * df.shape[0]):, :]
np_y_val = np.array(df[y_cols])[int(val_split * df.shape[0]):, :]

print(np_x_train.shape)
print(np_esi_train.shape)
print()
print(np_x_val.shape)
print(np_esi_val.shape)

(502216, 483)
(502216, 5)

(55802, 483)
(55802, 5)


In [26]:
gen_model = tf.keras.Sequential([
    tf.keras.layers.Input(483),
    tf.keras.layers.Dense(500, activation="relu"),
    tf.keras.layers.Dense(300, activation="relu"),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(49, activation="sigmoid")
])

smooth = 1.
def dice_coef(y_true, y_pred):
    intersection = tf.keras.backend.sum(y_true * y_pred)
    return (2. * intersection + smooth) / (tf.keras.backend.sum(y_true) + tf.keras.backend.sum(y_pred) + smooth)

def dice_coef_loss(y_true, y_pred):
    return 1-dice_coef(y_true, y_pred)

gen_model.compile(
    loss=dice_coef_loss,
    metrics=[
        "accuracy",
        dice_coef,
        tf.keras.metrics.AUC(curve="PR"),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall()
    ],
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)
)

In [None]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    0.01, decay_steps=5, decay_rate=0.99, staircase=True
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=1,
    min_lr=0.0005
)

gen_model.fit(
    x=np_x_train,
    y=np_y_train,
    epochs=30,
    validation_data=(np_x_val, np_y_val),
    batch_size=32,
    callbacks=[reduce_lr]
)

In [None]:
gen_model.save("meds_admission.keras")

In [31]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    0.01, decay_steps=5, decay_rate=0.99, staircase=True
)

esi_model = tf.keras.Sequential([
    tf.keras.layers.Input(483),
    tf.keras.layers.Dense(500, activation="relu"),
    tf.keras.layers.Dense(300, activation="relu"),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(5, activation="softmax")
])

esi_model.compile(
    loss="categorical_crossentropy",
    metrics=["accuracy"],
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01)
)

In [32]:
esi_model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 500)               242000    
                                                                 
 dense_13 (Dense)            (None, 300)               150300    
                                                                 
 dense_14 (Dense)            (None, 100)               30100     
                                                                 
 dense_15 (Dense)            (None, 5)                 505       
                                                                 
Total params: 422905 (1.61 MB)
Trainable params: 422905 (1.61 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [33]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=2,
    min_lr=0.0005
)

esi_model.fit(
    x=np_x_train,
    y=np_esi_train,
    epochs=30,
    validation_data=(np_x_val, np_esi_val),
    batch_size=32,
    callbacks=[reduce_lr]
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
 2747/15695 [====>.........................] - ETA: 55s - loss: 0.6897 - accuracy: 0.7107

KeyboardInterrupt: 

In [None]:
esi_model.save("65_model.keras")