In [5]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

from __future__ import absolute_import, division, print_function, unicode_literals
import functools

import numpy as np
import tensorflow as tf

from tensorflow.keras import regularizers

#TRAIN_DATA_URL = "heart_train.csv"
#TEST_DATA_URL = "heart_test.csv"



#train_file_path = tf.keras.utils.get_file("heart_train.csv",TRAIN_DATA_URL)
#test_file_path = tf.keras.utils.get_file("heart_test.csv",TEST_DATA_URL)



# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)



#!head {'heart_train.csv'}



LABEL_COLUMN = 'chd'
LABELS = [0, -1]



def get_dataset(file_path, **kwargs):
  dataset = tf.data.experimental.make_csv_dataset(
      file_path,
      batch_size=32, # Artificially small to make examples easier to show.
      label_name=LABEL_COLUMN,
      na_value="?",
      num_epochs=1,
      ignore_errors=True, 
      **kwargs)
  return dataset



raw_train_data = get_dataset("heart_train.csv")
raw_test_data = get_dataset("heart_test.csv")


def show_batch(dataset):
  for batch, label in dataset.take(1):
    for key, value in batch.items():
       print("{:20s}: {}".format(key,value.numpy()))


show_batch(raw_train_data)



CSV_COLUMNS = ['row.names', 'sbp', 'tobacco', 'ldl', 'adiposity', 'famhist', 'typea', 'obesity', 'alcohol', 'age','chd']

temp_dataset = get_dataset("heart_train.csv", column_names=CSV_COLUMNS)

show_batch(temp_dataset)



SELECT_COLUMNS = ['sbp', 'tobacco', 'ldl', 'adiposity', 'famhist', 'typea', 'obesity', 'alcohol', 'age','chd']

temp_dataset = get_dataset("heart_train.csv", select_columns=SELECT_COLUMNS)

show_batch(temp_dataset)



#def pack(features, label):
#  return tf.stack(list(features.values()), axis=-1), label


#packed_dataset = temp_dataset.map(pack)

#for features, labels in packed_dataset.take(1):
#  print(features.numpy())
#  print()
#  print(labels.numpy())



#show_batch(raw_train_data)

example_batch, labels_batch = next(iter(temp_dataset)) 


class PackNumericFeatures(object):
  def __init__(self, names):
    self.names = names

  def __call__(self, features, labels):
    numeric_features = [features.pop(name) for name in self.names]
    numeric_features = [tf.cast(feat, tf.float32) for feat in numeric_features]
    numeric_features = tf.stack(numeric_features, axis=-1)
    features['numeric'] = numeric_features

    return features, labels


NUMERIC_FEATURES = ['sbp', 'tobacco', 'ldl', 'adiposity', 'typea', 'obesity', 'alcohol', 'age']

packed_train_data = raw_train_data.map(
    PackNumericFeatures(NUMERIC_FEATURES))

packed_test_data = raw_test_data.map(
    PackNumericFeatures(NUMERIC_FEATURES))

#show_batch("heart_train.csv")

example_batch, labels_batch = next(iter(packed_train_data)) 

import pandas as pd
desc = pd.read_csv("heart_train.csv")[NUMERIC_FEATURES].describe()
desc


MEAN = np.array(desc.T['mean'])
STD = np.array(desc.T['std'])


def normalize_numeric_data(data, mean, std):
  # Center the data
  return (data-mean)/std


# See what you just created.
normalizer = functools.partial(normalize_numeric_data, mean=MEAN, std=STD)

numeric_column = tf.feature_column.numeric_column('numeric', normalizer_fn=normalizer, shape=[len(NUMERIC_FEATURES)])
numeric_columns = [numeric_column]
numeric_column


example_batch['numeric']

numeric_layer = tf.keras.layers.DenseFeatures(numeric_columns)
numeric_layer(example_batch).numpy()


CATEGORIES = {
    'famhist' : ['Present', 'Absent']
}

categorical_columns = []
for feature, vocab in CATEGORIES.items():
  cat_col = tf.feature_column.categorical_column_with_vocabulary_list(
        key=feature, vocabulary_list=vocab)
  categorical_columns.append(tf.feature_column.indicator_column(cat_col))


# See what you just created.
categorical_columns

categorical_layer = tf.keras.layers.DenseFeatures(categorical_columns)
print(categorical_layer(example_batch).numpy()[0])


preprocessing_layer = tf.keras.layers.DenseFeatures(categorical_columns+numeric_columns)

print(preprocessing_layer(example_batch).numpy()[0])

#Build the model

model = tf.keras.Sequential([
  preprocessing_layer,
  tf.keras.layers.Dense(1000, activation='elu', kernel_regularizer=regularizers.l2(0.001)),
  tf.keras.layers.Dropout(0.25),
  tf.keras.layers.Dense(1000, activation='elu', kernel_regularizer=regularizers.l2(0.001)),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(1000, activation='elu', kernel_regularizer=regularizers.l2(0.001)),
  tf.keras.layers.Dropout(0.5),
  #tf.keras.layers.Dense(256, activation='elu', kernel_regularizer=regularizers.l2(0.001)),
  #tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(1, activation='sigmoid', kernel_regularizer=regularizers.l2(0.001)),

])

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy'])



#Train, evaluate, and predict

train_data = packed_train_data.shuffle(500)

#train_data = packed_train_data
test_data = packed_test_data


model.fit(train_data, epochs=30)

test_loss, test_accuracy = model.evaluate(test_data)

print('\n\nTest Loss {}, Test Accuracy {}'.format(test_loss, test_accuracy))



predictions = model.predict(test_data)

# Show some results
for prediction, survived in zip(predictions[:10], list(test_data)[0][1][:10]):
  print("Predicted survival: {:.2%}".format(prediction[0]),
        " | Actual outcome: ",
        ("1" if bool(survived) else "0"))




row.names           : [435 409 459 434 425 439 444 457 426 461 452 451 443 414 402 429 448 432
 413 407 398 397 463 417 423 400 431 438 433 445 399 455]
sbp                 : [120 200 214 136 176 138 166 128 142 108 136 144 120 164 134 146 142 118
 166 116 162 142 132 134 174 126 120 138 108 134 218 124]
tobacco             : [ 0.   19.2   0.4   0.    6.    0.06  6.    2.24  2.2   3.    1.81  4.
  0.    8.2   6.1   1.16  0.    0.    0.8   2.38  7.    0.    0.    1.1
  0.    8.75  0.    0.    0.    0.57 11.2   1.6 ]
ldl                 : [ 2.46  4.43  5.98  4.    3.98  4.15  8.8   2.83  3.29  1.59  3.31  5.03
  3.98 14.16  4.77  2.28  4.32  3.89  5.63  5.67  7.67  3.54  4.82  3.54
  3.86  6.06  3.57  1.86  1.43  4.75  2.77  7.22]
adiposity           : [13.39 40.6  31.72 19.06 17.2  20.66 37.89 26.48 22.7  15.23  6.74 25.78
 13.19 36.85 26.08 34.53 25.22 15.96 36.21 29.01 34.34 16.64 33.41 20.41
 21.73 32.72 23.22 18.35 26.26 23.07 30.79 39.68]
famhist             : [b'Absent' b'Present'