In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import tensorflow as tf
from tensorflow import keras

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import wandb
from wandb.integration.keras import WandbCallback

# Load the data
data = pd.read_csv("./MIMIC_data.csv")


  from pandas.core.computation.check import NUMEXPR_INSTALLED
2024-10-25 17:47:59.847376: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
BATCH_SIZE = 64

data_clean = data.dropna()
y = data_clean['outcome']
X = data_clean.drop(columns='outcome')


X_standard = StandardScaler()
X_standard = X_standard.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_standard, y, test_size=0.2, random_state=42)

# build input pipeline using tf.data
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(BATCH_SIZE)

val_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
val_dataset = val_dataset.batch(BATCH_SIZE)

In [3]:
def make_model(x_train, input_name):
    inputs = keras.Input((x_train.shape[1],), name=input_name)
    x1 = keras.layers.Dense(64, activation="relu")(inputs)
    x1 = keras.layers.Dropout(0.3)(x1)

    x2 = keras.layers.Dense(64, activation="relu")(x1)
    x2 = keras.layers.Dropout(0.3)(x2)

    outputs = keras.layers.Dense(10, name="predictions")(x2)

    return keras.Model(inputs=inputs, outputs=outputs)

model = make_model(X_train, "default")

In [6]:
def train_step(x, y, model, optimizer, loss_fn, train_acc_metric):
    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        loss_value = loss_fn(y, logits)

    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    train_acc_metric.update_state(y, logits)

    return loss_value

def test_step(x, y, model, loss_fn, val_acc_metric):
    val_logits = model(x, training=False)
    loss_value = loss_fn(y, val_logits)
    val_acc_metric.update_state(y, val_logits)

    return loss_value

def train(train_dataset, val_dataset,  model, optimizer,
          train_acc_metric, val_acc_metric,
          epochs=10,  log_step=200, val_log_step=50):
  
    for epoch in range(epochs):
        print("\nStart of epoch %d" % (epoch,))

        train_loss = []   
        val_loss = []

        # Iterate over the batches of the dataset
        for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
            loss_value = train_step(x_batch_train, y_batch_train, 
                                    model, optimizer, 
                                    loss_fn, train_acc_metric)
            train_loss.append(float(loss_value))

        # Run a validation loop at the end of each epoch
        for step, (x_batch_val, y_batch_val) in enumerate(val_dataset):
            val_loss_value = test_step(x_batch_val, y_batch_val, 
                                       model, loss_fn, 
                                       val_acc_metric)
            val_loss.append(float(val_loss_value))
            
        # Display metrics at the end of each epoch
        train_acc = train_acc_metric.result()
        print("Training acc over epoch: %.4f" % (float(train_acc),))

        val_acc = val_acc_metric.result()
        print("Validation acc: %.4f" % (float(val_acc),))

        # Reset metrics at the end of each epoch
        train_acc_metric.reset_state()
        val_acc_metric.reset_state()

        # ⭐: log metrics using wandb.log
        wandb.log({'epochs': epoch,
                   'loss': np.mean(train_loss),
                   'acc': float(train_acc), 
                   'val_loss': np.mean(val_loss),
                   'val_acc':float(val_acc)})

In [7]:
# initialize wandb with your project name and optionally with configutations.
# play around with the config values and see the result on your wandb dashboard.
config = {
              "learning_rate": 0.001,
              "epochs": 100,
              "batch_size": 32,
              "log_step": 200,
              "val_log_step": 50,
              "architecture": "CNN",
              "dataset": "MIMIC"
           }

run = wandb.init(project='my-tf-integration', config=config)
config = wandb.config

# Initialize model.
model = make_model(X_train)

# Instantiate an optimizer to train the model.
optimizer = keras.optimizers.SGD(learning_rate=config.learning_rate)
# Instantiate a loss function.
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# Prepare the metrics.
train_acc_metric = keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = keras.metrics.SparseCategoricalAccuracy()

train(train_dataset,
      val_dataset, 
      model,
      optimizer,
      train_acc_metric,
      val_acc_metric,
      epochs=config.epochs, 
      log_step=config.log_step, 
      val_log_step=config.val_log_step)

run.finish()  # In Jupyter/Colab, let us know you're finished!


Start of epoch 0
Training acc over epoch: 0.1550
Validation acc: 0.1047

Start of epoch 1
Training acc over epoch: 0.1433
Validation acc: 0.1395

Start of epoch 2
Training acc over epoch: 0.1725
Validation acc: 0.1860

Start of epoch 3
Training acc over epoch: 0.1930
Validation acc: 0.1860

Start of epoch 4
Training acc over epoch: 0.2018
Validation acc: 0.2093

Start of epoch 5
Training acc over epoch: 0.1901
Validation acc: 0.2209

Start of epoch 6
Training acc over epoch: 0.2456
Validation acc: 0.2674

Start of epoch 7
Training acc over epoch: 0.3158
Validation acc: 0.2907

Start of epoch 8
Training acc over epoch: 0.3070
Validation acc: 0.3140

Start of epoch 9
Training acc over epoch: 0.3450
Validation acc: 0.3721

Start of epoch 10
Training acc over epoch: 0.4094
Validation acc: 0.4302

Start of epoch 11
Training acc over epoch: 0.3772
Validation acc: 0.5000

Start of epoch 12
Training acc over epoch: 0.3947
Validation acc: 0.5814

Start of epoch 13
Training acc over epoch: 0.44

VBox(children=(Label(value='0.015 MB of 0.015 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
acc,▁▁▁▃▄▄▆▅▅▆▆▇▇▇▇▇████████████████████████
epochs,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇██
loss,███▇▇▇▆▆▆▅▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▃▃▅▆▆▇▇██████████████████████████████
val_loss,██▇▇▇▆▅▅▅▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
acc,0.84795
epochs,99.0
loss,0.79971
val_acc,0.83721
val_loss,0.70923
