## Setup

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import IntegerLookup
from tensorflow.keras.layers import Normalization
from tensorflow.keras.layers import StringLookup
import hypertune
import argparse

## Preparing the data

Let's download the data and load it into a Pandas dataframe:

In [None]:
def download_dataset(file_url="http://storage.googleapis.com/download.tensorflow.org/data/heart.csv"):

    dataframe = pd.read_csv(file_url)
    return dataframe

In [None]:
def split_data(dataframe):
    val_dataframe = dataframe.sample(frac=0.2, random_state=1337)
    train_dataframe = dataframe.drop(val_dataframe.index)

    print(
        "Using %d samples for training and %d for validation"
        % (len(train_dataframe), len(val_dataframe))
    )
    return val_dataframe,train_dataframe

In [None]:

def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("target")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds



In [None]:

def encode_numerical_feature(feature, name, dataset):
    # Create a Normalization layer for our feature
    normalizer = Normalization()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the statistics of the data
    normalizer.adapt(feature_ds)

    # Normalize the input feature
    encoded_feature = normalizer(feature)
    return encoded_feature


def encode_categorical_feature(feature, name, dataset, is_string):
    lookup_class = StringLookup if is_string else IntegerLookup
    # Create a lookup layer which will turn strings into integer indices
    lookup = lookup_class(output_mode="binary")

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the set of possible string values and assign them a fixed integer index
    lookup.adapt(feature_ds)

    # Turn the string input into integer indices
    encoded_feature = lookup(feature)
    return encoded_feature


In [None]:
# Categorical features encoded as integers
def get_features(train_ds):
    sex = keras.Input(shape=(1,), name="sex", dtype="int64")
    cp = keras.Input(shape=(1,), name="cp", dtype="int64")
    fbs = keras.Input(shape=(1,), name="fbs", dtype="int64")
    restecg = keras.Input(shape=(1,), name="restecg", dtype="int64")
    exang = keras.Input(shape=(1,), name="exang", dtype="int64")
    ca = keras.Input(shape=(1,), name="ca", dtype="int64")

    # Categorical feature encoded as string
    thal = keras.Input(shape=(1,), name="thal", dtype="string")

    # Numerical features
    age = keras.Input(shape=(1,), name="age")
    trestbps = keras.Input(shape=(1,), name="trestbps")
    chol = keras.Input(shape=(1,), name="chol")
    thalach = keras.Input(shape=(1,), name="thalach")
    oldpeak = keras.Input(shape=(1,), name="oldpeak")
    slope = keras.Input(shape=(1,), name="slope")

    all_inputs = [
        sex,
        cp,
        fbs,
        restecg,
        exang,
        ca,
        thal,
        age,
        trestbps,
        chol,
        thalach,
        oldpeak,
        slope,
    ]

    # Integer categorical features
    sex_encoded = encode_categorical_feature(sex, "sex", train_ds, False)
    cp_encoded = encode_categorical_feature(cp, "cp", train_ds, False)
    fbs_encoded = encode_categorical_feature(fbs, "fbs", train_ds, False)
    restecg_encoded = encode_categorical_feature(restecg, "restecg", train_ds, False)
    exang_encoded = encode_categorical_feature(exang, "exang", train_ds, False)
    ca_encoded = encode_categorical_feature(ca, "ca", train_ds, False)

    # String categorical features
    thal_encoded = encode_categorical_feature(thal, "thal", train_ds, True)

    # Numerical features
    age_encoded = encode_numerical_feature(age, "age", train_ds)
    trestbps_encoded = encode_numerical_feature(trestbps, "trestbps", train_ds)
    chol_encoded = encode_numerical_feature(chol, "chol", train_ds)
    thalach_encoded = encode_numerical_feature(thalach, "thalach", train_ds)
    oldpeak_encoded = encode_numerical_feature(oldpeak, "oldpeak", train_ds)
    slope_encoded = encode_numerical_feature(slope, "slope", train_ds)

    all_features = layers.concatenate(
        [
            sex_encoded,
            cp_encoded,
            fbs_encoded,
            restecg_encoded,
            exang_encoded,
            slope_encoded,
            ca_encoded,
            thal_encoded,
            age_encoded,
            trestbps_encoded,
            chol_encoded,
            thalach_encoded,
            oldpeak_encoded,
        ]
    )
    return all_features,all_inputs


In [None]:
def create_model(all_features,all_inputs,num_neurons=32, learning_rate=0.1, dropout=0.5):
    
    x = layers.Dense(num_neurons, activation="relu")(all_features)
    x = layers.Dropout(dropout)(x)
    output = layers.Dense(1, activation="sigmoid")(x)
    #tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="logging/")

    model = keras.Model(all_inputs, output)
    model.compile("adam", "binary_crossentropy", metrics=["accuracy"])

    return model

In [15]:
def get_args():
  '''Parses args. Must include all hyperparameters you want to tune.'''

  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--learning_rate',
      required=False,
      type=float,
      help='learning rate',
      default=0.1)
  parser.add_argument(
      '--num_neurons',
      required=False,
      type=int,
      default=64,
      help='number of units in last hidden layer')
  parser.add_argument(
      '--num_epochs',
      required=False,
      type=int,
      default=50,
      help='number of units in last hidden layer')
  parser.add_argument(
      '--dropout',
      required=False,
      type=float,
      default=0.5,
      help='number of units in last hidden layer')
  parser.add_argument(
      '--working_dir',
      required=False,
      default="gs://mlops-vertex-jsk/output/",
      help='number of units in last hidden layer')  
  args = parser.parse_args()
  return args
#new_model=redesign_model(64,0.1,0.1)


usage: ipykernel_launcher.py [-h] [--learning_rate LEARNING_RATE]
                             [--num_neurons NUM_NEURONS]
                             [--num_epochs NUM_EPOCHS] [--dropout DROPOUT]
                             [--working_dir WORKING_DIR]
ipykernel_launcher.py: error: unrecognized arguments: -f /home/jupyter/.local/share/jupyter/runtime/kernel-e740d9e3-c997-45ca-ad55-c864aca5a4cb.json


SystemExit: 2

In [None]:
hyperparams = {
    "hidden_units": 64,
    "epochs":50,
    "learning_rate":0.1,
    "dropout":0.1,
}

In [11]:
def main():
    args = get_args()

    LOG_DIR='gs://mlops-vertex-jsk/hearts/training/logging'
    dataframe=download_dataset("http://storage.googleapis.com/download.tensorflow.org/data/heart.csv")
    
    train_dataframe, val_dataframe = split_data(dataframe)
    

    train_ds = dataframe_to_dataset(train_dataframe)
    val_ds = dataframe_to_dataset(val_dataframe)
    
    train_ds = train_ds.batch(32)
    val_ds = val_ds.batch(32)
    
    features,inputs=get_features(train_ds)
    
    model = create_model(features,inputs,args.num_neurons, args.learning_rate, args.dropout)
    
    
    #tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR)
    history = model.fit(train_ds, epochs=args.num_epochs, validation_data=val_ds)
    
    # DEFINE METRIC
    hp_metric = history.history['val_accuracy'][-1]

    model.save(args.working_dir)




In [12]:
if __name__ == "__main__":
    main()

usage: ipykernel_launcher.py [-h] [--learning_rate LEARNING_RATE]
                             [--num_neurons NUM_NEURONS]
                             [--num_epochs NUM_EPOCHS] [--dropout DROPOUT]
ipykernel_launcher.py: error: unrecognized arguments: -f /home/jupyter/.local/share/jupyter/runtime/kernel-e740d9e3-c997-45ca-ad55-c864aca5a4cb.json


SystemExit: 2