In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.model_selection import train_test_split

%matplotlib inline
import matplotlib.pyplot as plt


In [3]:
df = pd.read_csv("https://raw.githubusercontent.com/vyomshm/predicting-coronary-heart-disease-with-tensorflow-and-tensorboard/master/data/heart.csv")

In [4]:
df.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
0,160,12.0,5.73,23.11,Present,49,25.3,97.2,52,1
1,144,0.01,4.41,28.61,Absent,55,28.87,2.06,63,1
2,118,0.08,3.48,32.28,Present,52,29.14,3.81,46,0
3,170,7.5,6.41,38.03,Present,51,31.99,24.26,58,1
4,134,13.6,3.5,27.78,Present,60,25.99,57.34,49,1


In [5]:
df.shape

(462, 10)

In [6]:
# convert category column to numeric
one_hot_famhist = pd.get_dummies(df['famhist'],prefix='famhist')
one_hot_famhist.head()

Unnamed: 0,famhist_Absent,famhist_Present
0,0,1
1,1,0
2,0,1
3,0,1
4,0,1


In [7]:
# combine data
df = pd.concat([df, one_hot_famhist],axis=1)
df.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd,famhist_Absent,famhist_Present
0,160,12.0,5.73,23.11,Present,49,25.3,97.2,52,1,0,1
1,144,0.01,4.41,28.61,Absent,55,28.87,2.06,63,1,1,0
2,118,0.08,3.48,32.28,Present,52,29.14,3.81,46,0,0,1
3,170,7.5,6.41,38.03,Present,51,31.99,24.26,58,1,0,1
4,134,13.6,3.5,27.78,Present,60,25.99,57.34,49,1,0,1


In [8]:
df = df.drop(['famhist'],axis=1)
df.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,typea,obesity,alcohol,age,chd,famhist_Absent,famhist_Present
0,160,12.0,5.73,23.11,49,25.3,97.2,52,1,0,1
1,144,0.01,4.41,28.61,55,28.87,2.06,63,1,1,0
2,118,0.08,3.48,32.28,52,29.14,3.81,46,0,0,1
3,170,7.5,6.41,38.03,51,31.99,24.26,58,1,0,1
4,134,13.6,3.5,27.78,60,25.99,57.34,49,1,0,1


In [9]:
# get features
features = df.drop(['chd'],axis=1)
features.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,typea,obesity,alcohol,age,famhist_Absent,famhist_Present
0,160,12.0,5.73,23.11,49,25.3,97.2,52,0,1
1,144,0.01,4.41,28.61,55,28.87,2.06,63,1,0
2,118,0.08,3.48,32.28,52,29.14,3.81,46,0,1
3,170,7.5,6.41,38.03,51,31.99,24.26,58,0,1
4,134,13.6,3.5,27.78,60,25.99,57.34,49,0,1


In [10]:
labels = df['chd']

In [14]:
inputs_to_normalize = list(features.keys())

In [18]:
inputs_to_normalize.remove('famhist_Absent')
inputs_to_normalize.remove('famhist_Present')
inputs_to_normalize

['sbp', 'tobacco', 'ldl', 'adiposity', 'typea', 'obesity', 'alcohol', 'age']

In [19]:
for each in inputs_to_normalize:
    features[each]=(features[each]-features[each].min())/features[each].max()
features.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,typea,obesity,alcohol,age,famhist_Absent,famhist_Present
0,0.270642,0.384615,0.30985,0.385267,0.461538,0.227565,0.660371,0.578125,0,1
1,0.197248,0.000321,0.223744,0.514709,0.538462,0.304208,0.013996,0.75,1,0
2,0.077982,0.002564,0.163079,0.601083,0.5,0.310004,0.025885,0.484375,0,1
3,0.316514,0.240385,0.354207,0.736409,0.487179,0.371189,0.164821,0.671875,0,1
4,0.151376,0.435897,0.164384,0.495175,0.602564,0.242379,0.389565,0.53125,0,1


In [20]:
train_x, test_x, train_y, test_y = train_test_split(features,labels,test_size=0.2)

In [21]:
print(train_x.shape, train_y.shape)

(369, 10) (369,)


In [22]:
n_features =10
n_labels=2

epochs = 500
learning_rate = 0.1


In [23]:
columns = [tf.feature_column.numeric_column(column_name) for column_name in inputs_to_normalize]
columns

[NumericColumn(key='sbp', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='tobacco', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='ldl', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='adiposity', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='typea', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='obesity', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='alcohol', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [24]:
columns.append(tf.feature_column.categorical_column_with_vocabulary_list('famhist_Absent',[0,1]))
columns.append(tf.feature_column.categorical_column_with_vocabulary_list('famhist_Present',[0,1]))

In [25]:
estimator= tf.estimator.LinearClassifier(feature_columns=columns)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/9_/3m_lvmzs5t75xty5gvwr6wd558fj_g/T/tmpkxe4o7y0', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [27]:
# due to the different tf version in below example needs to modify tf

import tensorflow.compat.v1 as tf

tf.disable_v2_behavior()

train_input_func = tf.estimator.inputs.pandas_input_fn(
    x = train_x,
    y = train_y,
    batch_size = 100,
    num_epochs = epochs,
    shuffle = True
)


Instructions for updating:
non-resource variables are not supported in the long term



In [28]:
estimator.train(
    input_fn = train_input_func,
    steps=1000)

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/9_/3m_lvmzs5t75xty5gvwr6wd558fj_g/T/tm

<tensorflow_estimator.python.estimator.canned.linear.LinearClassifierV2 at 0x13ef5a278>

In [29]:
test_input_func = tf.estimator.inputs.pandas_input_fn(
    x = test_x,
    y = test_y,
    batch_size=1,
    shuffle=False)

In [30]:
results = estimator.evaluate(input_fn=test_input_func,steps=None)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-10-23T11:27:40Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/9_/3m_lvmzs5t75xty5gvwr6wd558fj_g/T/tmpkxe4o7y0/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.85463s
INFO:tensorflow:Finished evaluation at 2020-10-23-11:27:41
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.72043014, accuracy_baseline = 0.60215056, auc = 0.7861969, auc_precision_recall = 0.72025657, average_loss = 0.54534656, global_step = 1000, label/mean = 0.39784947, loss = 0.54534656, precision = 0.7037037, prediction/mean = 0.35917705, recall = 0.5135135
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: /var/folders/9_/3m_lvmzs5t75xty5gvwr6wd558fj_g/T/tmpkxe4o7y0/model.ckpt-1000


In [31]:
results

{'accuracy': 0.72043014,
 'accuracy_baseline': 0.60215056,
 'auc': 0.7861969,
 'auc_precision_recall': 0.72025657,
 'average_loss': 0.54534656,
 'label/mean': 0.39784947,
 'loss': 0.54534656,
 'precision': 0.7037037,
 'prediction/mean': 0.35917705,
 'recall': 0.5135135,
 'global_step': 1000}

![image.png](attachment:image.png)


![image.png](attachment:image.png)

![image.png](attachment:image.png)