In [59]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

from __future__ import absolute_import, division, print_function, unicode_literals
import functools

import numpy as np
import tensorflow as tf

from tensorflow.keras import regularizers

#TRAIN_DATA_URL = "heart_train.csv"
#TEST_DATA_URL = "heart_test.csv"



#train_file_path = tf.keras.utils.get_file("heart_train.csv",TRAIN_DATA_URL)
#test_file_path = tf.keras.utils.get_file("heart_test.csv",TEST_DATA_URL)



# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)



#!head {'heart_train.csv'}



LABEL_COLUMN = 'chd'
LABELS = [0, -1]



def get_dataset(file_path, **kwargs):
  dataset = tf.data.experimental.make_csv_dataset(
      file_path,
      batch_size=32, # Artificially small to make examples easier to show.
      label_name=LABEL_COLUMN,
      na_value="?",
      num_epochs=1,
      ignore_errors=True, 
      **kwargs)
  return dataset



raw_train_data = get_dataset("heart_train.csv")
raw_test_data = get_dataset("heart_test.csv")


#def show_batch(dataset):
#  for batch, label in dataset.take(1):
#    for key, value in batch.items():
#    print("{:20s}: {}".format(key,value.numpy()))


#show_batch(raw_train_data)



CSV_COLUMNS = ['row.names', 'sbp', 'tobacco', 'ldl', 'adiposity', 'famhist', 'typea', 'obesity', 'alcohol', 'age','chd']

temp_dataset = get_dataset("heart_train.csv", column_names=CSV_COLUMNS)

show_batch(temp_dataset)



SELECT_COLUMNS = ['sbp', 'tobacco', 'ldl', 'adiposity', 'famhist', 'typea', 'obesity', 'alcohol', 'age','chd']

temp_dataset = get_dataset("heart_train.csv", select_columns=SELECT_COLUMNS)

show_batch(temp_dataset)



#def pack(features, label):
#  return tf.stack(list(features.values()), axis=-1), label


#packed_dataset = temp_dataset.map(pack)

#for features, labels in packed_dataset.take(1):
#  print(features.numpy())
#  print()
#  print(labels.numpy())



#show_batch(raw_train_data)

example_batch, labels_batch = next(iter(temp_dataset)) 


class PackNumericFeatures(object):
  def __init__(self, names):
    self.names = names

  def __call__(self, features, labels):
    numeric_features = [features.pop(name) for name in self.names]
    numeric_features = [tf.cast(feat, tf.float32) for feat in numeric_features]
    numeric_features = tf.stack(numeric_features, axis=-1)
    features['numeric'] = numeric_features

    return features, labels


NUMERIC_FEATURES = ['sbp', 'tobacco', 'ldl', 'adiposity', 'typea', 'obesity', 'alcohol', 'age']

packed_train_data = raw_train_data.map(
    PackNumericFeatures(NUMERIC_FEATURES))

packed_test_data = raw_test_data.map(
    PackNumericFeatures(NUMERIC_FEATURES))

#show_batch("heart_train.csv")

example_batch, labels_batch = next(iter(packed_train_data)) 

import pandas as pd
desc = pd.read_csv("heart_train.csv")[NUMERIC_FEATURES].describe()
desc


MEAN = np.array(desc.T['mean'])
STD = np.array(desc.T['std'])


def normalize_numeric_data(data, mean, std):
  # Center the data
  return (data-mean)/std


# See what you just created.
normalizer = functools.partial(normalize_numeric_data, mean=MEAN, std=STD)

numeric_column = tf.feature_column.numeric_column('numeric', normalizer_fn=normalizer, shape=[len(NUMERIC_FEATURES)])
numeric_columns = [numeric_column]
numeric_column


example_batch['numeric']

numeric_layer = tf.keras.layers.DenseFeatures(numeric_columns)
numeric_layer(example_batch).numpy()


CATEGORIES = {
    'famhist' : ['Present', 'Absent']
}

categorical_columns = []
for feature, vocab in CATEGORIES.items():
  cat_col = tf.feature_column.categorical_column_with_vocabulary_list(
        key=feature, vocabulary_list=vocab)
  categorical_columns.append(tf.feature_column.indicator_column(cat_col))


# See what you just created.
categorical_columns

categorical_layer = tf.keras.layers.DenseFeatures(categorical_columns)
print(categorical_layer(example_batch).numpy()[0])


preprocessing_layer = tf.keras.layers.DenseFeatures(categorical_columns+numeric_columns)

print(preprocessing_layer(example_batch).numpy()[0])

#Build the model

model = tf.keras.Sequential([
  preprocessing_layer,
  tf.keras.layers.Dense(1000, activation='elu', kernel_regularizer=regularizers.l2(0.001)),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(1000, activation='elu', kernel_regularizer=regularizers.l2(0.001)),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(1000, activation='elu', kernel_regularizer=regularizers.l2(0.001)),
  tf.keras.layers.Dropout(0.5),
  #tf.keras.layers.Dense(256, activation='elu', kernel_regularizer=regularizers.l2(0.001)),
  #tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(1, activation='sigmoid', kernel_regularizer=regularizers.l2(0.001)),

])

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy'])



#Train, evaluate, and predict

train_data = packed_train_data.shuffle(500)

#train_data = packed_train_data
test_data = packed_test_data


model.fit(train_data, epochs=30)

test_loss, test_accuracy = model.evaluate(test_data)

print('\n\nTest Loss {}, Test Accuracy {}'.format(test_loss, test_accuracy))



predictions = model.predict(test_data)

# Show some results
for prediction, survived in zip(predictions[:10], list(test_data)[0][1][:10]):
  print("Predicted survival: {:.2%}".format(prediction[0]),
        " | Actual outcome: ",
        ("1" if bool(survived) else "0"))




row.names           : [453 436 406 407 413 419 427 417 461 410 421 412 437 450 456 443 438 444
 401 448 460 420 411 428 409 416 447 458 446 452 442 397]
sbp                 : [120 132 160 116 166 176 132 134 108 112 126 178 136 124 146 120 138 166
 126 142 182 132 120 142 200 146 136 170 142 136 110 142]
tobacco             : [ 0.    0.    1.15  2.38  0.8   0.    0.    1.1   3.    4.2   0.   20.
  0.    1.8   0.64  0.    0.    6.    0.    0.    4.2   2.8   0.    1.32
 19.2   6.4   2.8   0.4   3.    1.81  0.    0.  ]
ldl                 : [ 2.77  3.55 10.19  5.67  5.63  3.14  3.3   3.54  1.59  3.58  4.55  9.78
  1.77  3.74  4.82  3.98  1.86  8.8   3.57  4.32  4.41  4.79  3.1   7.63
  4.43  5.62  2.53  4.11  3.69  3.31  7.14  3.54]
adiposity           : [13.35  8.66 39.71 29.01 36.21 31.04 21.61 20.41 15.23 27.14 29.18 33.55
 20.37 16.64 28.02 13.19 18.35 37.89 26.01 25.22 32.1  20.47 26.97 29.98
 40.6  33.05  9.28 42.06 25.1   6.74 28.28 16.64]
famhist             : [b'Absent' b'Present