In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
df = pd.read_csv("cleaned_data.csv")
df = df.sample(frac=1)
df['gender'].replace(["Male", "Female"], [0, 1], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['gender'].replace(["Male", "Female"], [0, 1], inplace=True)
  df['gender'].replace(["Male", "Female"], [0, 1], inplace=True)


In [3]:
cc_cols = [x for x in df.columns if x[:3] == "cc_"]
med_cols = [x for x in df.columns if x[:5] == "meds_"]
with open("pmh_cols.txt") as f:
    pmh_cols = f.readlines()
pmh_cols = list(map(lambda x:x[:-1], pmh_cols))

x_cols = ["age", "gender"] + cc_cols + pmh_cols
y_cols = ["disposition"] + med_cols

esi_data = pd.get_dummies(df['esi'], dtype=int)

In [4]:
val_split = 0.9

np_x_train = np.array(df[x_cols])[:int(val_split * df.shape[0]), :]
np_esi_train = np.array(esi_data)[:int(val_split * df.shape[0]), :]

np_x_val = np.array(df[x_cols])[int(val_split * df.shape[0]):, :]
np_esi_val = np.array(esi_data)[int(val_split * df.shape[0]):, :]

print(np_x_train.shape)
print(np_esi_train.shape)
print()
print(np_x_val.shape)
print(np_esi_val.shape)

(502216, 483)
(502216, 5)

(55802, 483)
(55802, 5)


In [5]:
gen_model = tf.keras.Sequential([
    tf.keras.layers.Input(483),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(49, activation="softmax")
])

gen_model.compile(
    loss="binary_crossentropy",
    metrics=[
        tf.keras.metrics.AUC(curve="PR"),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall()
    ],
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)
)



In [6]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    0.01, decay_steps=5, decay_rate=0.99, staircase=True
)

esi_model = tf.keras.Sequential([
    tf.keras.layers.Input(483),
    tf.keras.layers.Dense(200, activation="relu"),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(5, activation="softmax")
])

esi_model.compile(
    loss="categorical_crossentropy",
    metrics=["accuracy"],
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01)
)



In [7]:
esi_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 200)               96800     
                                                                 
 dense_3 (Dense)             (None, 100)               20100     
                                                                 
 dense_4 (Dense)             (None, 5)                 505       
                                                                 
Total params: 117,405
Trainable params: 117,405
Non-trainable params: 0
_________________________________________________________________


In [8]:
esi_model.fit(
    x=np_x_train,
    y=np_esi_train,
    epochs=10,
    validation_data=(np_x_val, np_esi_val),
    batch_size=16,
)

Epoch 1/10
    1/31389 [..............................] - ETA: 1:11:25 - loss: 2.3910 - accuracy: 0.0000e+00

2024-02-09 23:16:44.545102: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2bf3bda90>