<a href="https://colab.research.google.com/github/osagieomigie/TensorFlow_ML/blob/main/coronary_heart_disease/coronaryHeartDisease.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This is an exercise based on the tutorial found here: https://www.tensorflow.org/tutorials/load_data/csv 

In [16]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

In [17]:
import pandas as pd
import numpy as np
import io

import tensorflow as tf
from tensorflow.keras import layers, regularizers
from tensorflow.keras.layers.experimental import preprocessing

Upload heart_test.csv and heart_train.csv

In [6]:
# Upload heart csv file
from google.colab import files
uploaded = files.upload()

Saving heart_test.csv to heart_test.csv
Saving heart_train.csv to heart_train.csv


In [18]:
# read file
heart_train_data = pd.read_csv(io.BytesIO(uploaded['heart_train.csv']))
heart_test_data = pd.read_csv(io.BytesIO(uploaded['heart_test.csv']))

# visualize
heart_train_data.head()
heart_test_data.head()


Unnamed: 0,row.names,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
0,268,118,0.12,4.16,9.37,Absent,57,19.61,0.0,17,0
1,398,162,7.0,7.67,34.34,Present,33,30.77,0.0,62,0
2,408,180,25.01,3.7,38.11,Present,57,30.54,0.0,61,1
3,146,144,0.0,3.84,18.72,Absent,56,22.1,4.8,40,0
4,73,130,5.6,3.37,24.8,Absent,58,25.76,43.2,36,0


In [19]:
# seperate features from labels 
hTrainFeatures = heart_train_data.copy()
chd_train_labels = hTrainFeatures.pop('chd')
hTestFeatures = heart_test_data.copy()
chd_test_labels = hTestFeatures.pop('chd')

In [20]:
def build_symbolic_objs(features):
  inputs = {}

  for name, column in features.items():
    dtype = column.dtype
    if dtype == object:
      dtype = tf.string
    else:
      dtype = tf.float32

    inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)
  return inputs 

h_train_inputs = build_symbolic_objs(hTrainFeatures)

In [21]:
#Normalization layer 

def get_all_numeric_inputs(inputs, inputSet):
  numeric_inputs = {name:input for name,input in inputs.items()
                  if input.dtype==tf.float32}

  x = layers.Concatenate()(list(numeric_inputs.values()))
  norm = preprocessing.Normalization()
  norm.adapt(np.array(inputSet[numeric_inputs.keys()]))
  all_numeric_inputs = norm(x)
  return all_numeric_inputs

train_all_num_inputs = get_all_numeric_inputs(h_train_inputs, heart_train_data)

# Collect all the symbolic preprocessing results, to concatenate them later
train_preprocessed_inputs = [train_all_num_inputs]


In [22]:
# map from strings to integer indices in a vocabulary
def map_strings_to_ints(inputs, preprocessed_inputs, features):
  for name, input in inputs.items():
    if input.dtype == tf.float32:
      continue
    
    lookup = preprocessing.StringLookup(vocabulary=np.unique(features[name]))
    one_hot = preprocessing.CategoryEncoding(max_tokens=lookup.vocab_size())

    x = lookup(input)
    x = one_hot(x)
    preprocessed_inputs.append(x)

map_strings_to_ints(h_train_inputs, train_preprocessed_inputs, hTrainFeatures) 

In [23]:
train_preprocessed_inputs_cat = layers.Concatenate()(train_preprocessed_inputs)

chd_preprocessing = tf.keras.Model(h_train_inputs, train_preprocessed_inputs_cat)
train_features_dict = {name: np.array(value) 
                         for name, value in hTrainFeatures.items()}

test_features_dict = {name: np.array(value) 
                         for name, value in hTestFeatures.items()}

In [28]:
# build model 
def chd_model(preprocessing_head, inputs):
  body = tf.keras.Sequential([
    layers.Dense(256, activation='elu', kernel_regularizer=regularizers.l2(0.001)),
    tf.keras.layers.Dropout(0.5),
    layers.Dense(256, activation='elu', kernel_regularizer=regularizers.l2(0.001)),
    tf.keras.layers.Dropout(0.5),
    layers.Dense(1)
  ])

  preprocessed_inputs = preprocessing_head(inputs)
  result = body(preprocessed_inputs)
  model = tf.keras.Model(inputs, result)

  model.compile(loss=tf.losses.BinaryCrossentropy(from_logits=True),
                optimizer='adam', metrics=['accuracy'])
  return model

chd_model = chd_model(chd_preprocessing, h_train_inputs)

# train model 
chd_model.fit(x=train_features_dict, y=chd_train_labels, epochs=1000, verbose=2)

print("--Evaluate model--")
model_loss, model_acc = chd_model.evaluate(x=test_features_dict, y=chd_test_labels, verbose=2)
print(f"Model Loss:    {model_loss:.2f}")
print(f"Model Accuracy: {model_acc*100:.1f}%")

Epoch 1/1000
12/12 - 0s - loss: 0.9002 - accuracy: 0.6883
Epoch 2/1000
12/12 - 0s - loss: 0.8040 - accuracy: 0.7290
Epoch 3/1000
12/12 - 0s - loss: 0.8327 - accuracy: 0.7073
Epoch 4/1000
12/12 - 0s - loss: 0.8060 - accuracy: 0.7100
Epoch 5/1000
12/12 - 0s - loss: 0.8130 - accuracy: 0.7182
Epoch 6/1000
12/12 - 0s - loss: 0.7705 - accuracy: 0.7182
Epoch 7/1000
12/12 - 0s - loss: 0.7841 - accuracy: 0.7073
Epoch 8/1000
12/12 - 0s - loss: 0.7917 - accuracy: 0.6694
Epoch 9/1000
12/12 - 0s - loss: 0.7545 - accuracy: 0.7263
Epoch 10/1000
12/12 - 0s - loss: 0.7447 - accuracy: 0.7371
Epoch 11/1000
12/12 - 0s - loss: 0.7456 - accuracy: 0.7182
Epoch 12/1000
12/12 - 0s - loss: 0.7510 - accuracy: 0.6992
Epoch 13/1000
12/12 - 0s - loss: 0.7385 - accuracy: 0.7073
Epoch 14/1000
12/12 - 0s - loss: 0.7312 - accuracy: 0.7263
Epoch 15/1000
12/12 - 0s - loss: 0.7342 - accuracy: 0.7127
Epoch 16/1000
12/12 - 0s - loss: 0.7424 - accuracy: 0.7046
Epoch 17/1000
12/12 - 0s - loss: 0.7256 - accuracy: 0.7209
Epoch 