In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

2024-02-10 17:51:15.430599: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
df = pd.read_csv("cleaned_data.csv")
df = df.sample(frac=1)
df['gender'].replace(["Male", "Female"], [0, 1], inplace=True)
df['disposition'].replace(["Discharge", "Admit"], [0,  1], inplace=True)
df['esi'] = df['esi'] - 1
df['age'] = df['age'] / 100

In [4]:
cc_cols = [x for x in df.columns if x[:3] == "cc_"]
med_cols = [x for x in df.columns if x[:5] == "meds_"]
with open("pmh_cols.txt") as f:
    pmh_cols = f.readlines()
pmh_cols = list(map(lambda x: x[:-1], pmh_cols))

x_cols = ["age", "gender"] + cc_cols + pmh_cols
y_cols = ["disposition"] + med_cols

In [5]:
val_split = 0.9

np_x_train = np.array(df[x_cols])[:int(val_split * df.shape[0]), :]
np_y_train = np.array(df[y_cols])[:int(val_split * df.shape[0]), :]

np_x_val = np.array(df[x_cols])[int(val_split * df.shape[0]):, :]
np_y_val = np.array(df[y_cols])[int(val_split * df.shape[0]):, :]

In [7]:
gen_model = tf.keras.Sequential([
    tf.keras.layers.Input(483),
    tf.keras.layers.Dense(300, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(200, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(49, activation="sigmoid")
])

In [None]:
smooth = 1.
def dice_coef(y_true, y_pred):
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    intersection = tf.keras.backend.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (tf.keras.backend.sum(y_true_f) + tf.keras.backend.sum(y_pred_f) + smooth)

def dice_coef_loss(y_true, y_pred):
    return 1-dice_coef(y_true, y_pred)

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    0.001, decay_steps=15, decay_rate=0.96, staircase=True
)

gen_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
    loss=dice_coef_loss,
    metrics=[
        dice_coef,
        tf.keras.metrics.AUC(curve="PR"),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall()
    ]
)

In [None]:
gen_model.fit(
    x=np_x_train,
    y=np_y_train,
    epochs=30,
    validation_data=(np_x_val, np_y_val),
    batch_size=32,
    callbacks=[tf.keras.callbacks.ModelCheckpoint(
        "./model_best.h5",
        save_weights_only=True,
        save_best_only=True
    )]
)

In [8]:
gen_model.load_weights("model_best.h5")

In [13]:
pd.set_option('display.max_rows', 600)
df.iloc[-10]

age                                                   0.85
gender                                                1.00
cc_abdominalcramping                                  0.00
cc_abdominaldistention                                0.00
cc_abdominalpain                                      0.00
cc_abdominalpainpregnant                              0.00
cc_abnormallab                                        1.00
cc_abscess                                            0.00
cc_addictionproblem                                   0.00
cc_agitation                                          0.00
cc_alcoholintoxication                                0.00
cc_alcoholproblem                                     0.00
cc_allergicreaction                                   0.00
cc_alteredmentalstatus                                0.00
cc_animalbite                                         0.00
cc_ankleinjury                                        0.00
cc_anklepain                                          0.

In [15]:
model_out = gen_model.predict(np.expand_dims(np_x_val[-10], axis=0))



In [20]:
model_out

array([[9.9988478e-01, 4.0541650e-04, 9.9657977e-01, 6.2565308e-04,
        3.1057853e-04, 3.3671467e-04, 5.4021762e-03, 4.7418097e-01,
        8.1794692e-04, 4.0480111e-02, 8.3097024e-04, 4.9283141e-03,
        2.2254491e-04, 1.5829614e-03, 9.3927812e-03, 2.9486211e-04,
        8.3623454e-04, 2.1958223e-03, 3.3196595e-03, 5.2604610e-01,
        1.8258396e-03, 4.1793617e-03, 4.7770154e-04, 1.0195873e-03,
        2.1984468e-01, 9.9850476e-01, 9.5613629e-02, 1.2869954e-03,
        7.9875393e-04, 1.3644198e-03, 4.4017886e-03, 3.4761041e-01,
        6.7783281e-02, 6.0620344e-01, 9.9980211e-01, 7.4308657e-04,
        2.5018172e-03, 3.9998982e-03, 6.2591257e-04, 3.6833974e-04,
        4.2711049e-03, 1.2501939e-03, 9.8183566e-01, 1.2303835e-03,
        1.2188799e-03, 1.8607369e-03, 8.7277722e-03, 7.1268859e-03,
        9.9976832e-01]], dtype=float32)

In [16]:
to_admit = model_out[0][0] > 0.5

In [21]:
meds_out = model_out[0][1:]
meds = []
for i in range(len(meds_out)):
    if meds_out[i] > 0.5:
        meds.append(med_cols[i])

In [22]:
meds

['meds_analgesics',
 'meds_antiplateletdrugs',
 'meds_cardiovascular',
 'meds_elect/caloric/h2o',
 'meds_gastrointestinal',
 'meds_psychotherapeuticdrugs',
 'meds_vitamins']