In [81]:
!pip install sklearn
!pip install tensorflow==2.0.0 



Import Required libraries

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

#Import pandas and seaborn
import pandas as pd
import numpy as np

#tensorflow and tf.keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow import feature_column


from sklearn.model_selection import train_test_split

**Importing clinical data** using pandas

Get the data using pandas

In [83]:
URL = "https://storage.googleapis.com/applied-dl/heart.csv"
dataframe = pd.read_csv(URL)
dataframe.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0,fixed,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,normal,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2,reversible,0
3,37,1,3,130,250,0,0,187,0,3.5,3,0,normal,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,normal,0


**Split dataframe into Train, Validation and Test**

In [84]:
train, test = train_test_split(dataframe, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'Train Example')
print(len(val), 'Validation Example')
print(len(test), 'Test Example')

193 Train Example
49 Validation Example
61 Test Example


**Create an Input pipeline using tf.data**
Following function create tensor slices by providing dictionary representation of the data frame.
We've removed target to be predicted from the dataframe

In [0]:
# An utility method to create a tf.data dateset from a Pandas dataframe
def df_to_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('target')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

Get a batch of tensor data

In [0]:
batch_size = 5 # Small batch size is used for demonstration
train_ds = df_to_to_dataset(train, batch_size=batch_size)
val_ds = df_to_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_to_dataset(test, shuffle=False, batch_size=batch_size)

**Understand the input pipeline**

In [87]:
for feature_batch, label_batch in train_ds.take(1):
  print('Every Feature: ', list(feature_batch.keys()))
  print('A batch of ages: ', feature_batch['age'])
  print('Batch of labels: ', label_batch)

Every Feature:  ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal']
A batch of ages:  tf.Tensor([61 58 63 37 54], shape=(5,), dtype=int32)
Batch of labels:  tf.Tensor([0 0 0 0 1], shape=(5,), dtype=int32)


**Demonstrate several types of feature column**

In [0]:
# We will use this batch to demonstrate several types of feature column
example_batch = next(iter(train_ds))[0]

In [0]:
# A utility method to create a feature column and to transfer a batch of data
def demo(feature_column):
  feature_layer = layers.DenseFeatures(feature_column)
  print(feature_layer(example_batch).numpy())

**Numeric Column:** The output of a feature column becomes input to the model (using demo function defined above, we will be able to see how each column from the dataframe transformed)

In [90]:
age = feature_column.numeric_column('age')
demo(age)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

[[62.]
 [63.]
 [56.]
 [69.]
 [50.]]


**Bucketized Column:**
Sometimes, we dont need numbers, instead want to split its values into different categories based on numerical ranges. Eg. We can split AGE into several ranges called buckets.

In [91]:
age_bucket = feature_column.bucketized_column(age, boundaries=[18,25,30,35,40,45,50,55,60,65])
demo(age_bucket)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]]


**Categorical column**: We can't feed the strings directly, one solution is a list using categorical_column_with_vocabulary_list

---



In [92]:
thal = feature_column.categorical_column_with_vocabulary_list(
    'thal', ['fixed','normal','reversible'])
thal_one_hot = feature_column.indicator_column(thal)
demo(thal_one_hot)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

[[0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]]


**Embedding column**: We can't use one-hot encoding if options are large. Then use lower dimensional Dense vecctor

In [93]:
thal_embedding = feature_column.embedding_column(thal, dimension=8)
demo(thal_embedding)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

[[-0.0093684  -0.57367176 -0.03364039 -0.45214114  0.03246557  0.15541436
   0.21647228  0.25836986]
 [-0.44763964 -0.5155916   0.44865325 -0.15138236  0.21890582 -0.15757757
  -0.23467873 -0.3158418 ]
 [-0.0093684  -0.57367176 -0.03364039 -0.45214114  0.03246557  0.15541436
   0.21647228  0.25836986]
 [-0.0093684  -0.57367176 -0.03364039 -0.45214114  0.03246557  0.15541436
   0.21647228  0.25836986]
 [-0.0093684  -0.57367176 -0.03364039 -0.45214114  0.03246557  0.15541436
   0.21647228  0.25836986]]


**Hashed feature column**: This feature column calculates a hash value of the input and selects one of the bucket to encode a string. We can also make nummber of categories smaller than the number of categories. Although, there are chances of collision: different may map to same bucket.

In [94]:
thal_hashed = feature_column.categorical_column_with_hash_bucket('thal', hash_bucket_size=1000)
demo(feature_column.indicator_column(thal_hashed))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


**Crossed Feature Column**: To add extra complexity, also doesn't builds all the columns

In [95]:
crossed_feature = feature_column.crossed_column([age_bucket, thal], hash_bucket_size=1000)
demo(feature_column.indicator_column(crossed_feature))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


Choose Which columns to use to train the model

In [0]:
feature_columns = []

# Numeric Columns
for header in ['age','trestbps','chol','thalach','oldpeak','slope','ca']:
  feature_columns.append(feature_column.numeric_column(header))

# Bucketized cols
age_bucket = feature_column.bucketized_column(age, boundaries=[18,25,30,35,40,45,50,55,60,65])
feature_columns.append(age_bucket)

# Indicator Columns
thal = feature_column.categorical_column_with_vocabulary_list(
    'thal', ['fixed','normal','reversible'])
thal_one_hot = feature_column.indicator_column(thal)
feature_columns.append(thal_one_hot)

# Embedding cols
thal_embedding = feature_column.embedding_column(thal, dimension=8)
feature_columns.append(thal_embedding)

#crossed cols
crossed_feature = feature_column.crossed_column([age_bucket, thal], hash_bucket_size=1000)
crossed_feature = feature_column.indicator_column(crossed_feature)
feature_columns.append(crossed_feature)

Create a feature layer

In [0]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

Now creating a larger pipeline with more dataset

In [0]:
batch_size = 32
train_ds = df_to_to_dataset(train, batch_size=batch_size)
val_ds = df_to_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_to_dataset(test, shuffle=False, batch_size=batch_size)

**Create, Compile and Build the Model**

**Create a baseline Model with Logistic Regression:**

In [0]:
model = tf.keras.Sequential([
                             feature_layer,
                             layers.Dense(1, activation='sigmoid')
])

In [100]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'],
              run_eagerly=True)

model.fit(train_ds,
          validation_data=val_ds,
          epochs=5)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f8c8c2a5518>

In [107]:
loss, accuracy = model.evaluate(test_ds)
print("Accuracy", accuracy)

Accuracy 0.704918


Build Neural Network Based Model

In [108]:
model_nn = tf.keras.Sequential([
                                feature_layer,
                                layers.Dense(128, activation='relu'),
                                layers.Dense(128, activation='relu'),
                                layers.Dense(1, activation='sigmoid'),
])

model_nn.compile(optimizer='adam',
                 loss='binary_crossentropy',
                 metrics=['accuracy'],
                 run_eagerly=True)

model_nn.fit(train_ds, validation_data=val_ds, epochs=5)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f8c84e31f60>

In [109]:
print(model_nn.summary())

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_features_19 (DenseFeat multiple                  24        
_________________________________________________________________
dense_11 (Dense)             multiple                  131840    
_________________________________________________________________
dense_12 (Dense)             multiple                  16512     
_________________________________________________________________
dense_13 (Dense)             multiple                  129       
Total params: 148,505
Trainable params: 148,505
Non-trainable params: 0
_________________________________________________________________
None


In [0]:
loss, accuracy = model_nn.evaluate(test_ds)
print("Accuracy", accuracy)

**Conclusion**