In [1]:
import functools

import numpy as np
import tensorflow as tf

import ssl

ssl._create_default_https_context = ssl._create_unverified_context

In [2]:
TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
TEST_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv"

train_file_path = tf.keras.utils.get_file('titanic/train.csv', TRAIN_DATA_URL)
test_file_path = tf.keras.utils.get_file('titanic/eval.csv', TEST_DATA_URL)

In [3]:
np.set_printoptions(precision=3, suppress=True)

In [4]:
# load data
LABEL_COLUMN = 'survived'
LABELS = [0, 1]


In [5]:
def get_dataset(file_path, **kwargs):
    dataset = tf.data.experimental.make_csv_dataset(
        file_path,
        batch_size=5,
        label_name=LABEL_COLUMN,
        num_epochs=1,
        ignore_errors=True,
        **kwargs)
    return dataset

In [6]:
raw_train_data = get_dataset(train_file_path)
raw_test_data = get_dataset(test_file_path)

In [7]:
def show_batch(dataset):
    for batch, label in dataset.take(1):
        for key, value in batch.items():
            print("{:20s}: {}".format(key, value.numpy()))

In [8]:
show_batch(raw_train_data)

sex                 : [b'male' b'male' b'male' b'male' b'female']
age                 : [40. 26. 50. 28. 28.]
n_siblings_spouses  : [0 0 1 0 1]
parch               : [0 0 0 0 0]
fare                : [31.     7.896 55.9    8.05  16.1  ]
class               : [b'First' b'Third' b'First' b'Third' b'Third']
deck                : [b'A' b'unknown' b'E' b'unknown' b'unknown']
embark_town         : [b'Cherbourg' b'Southampton' b'Southampton' b'Southampton' b'Southampton']
alone               : [b'y' b'y' b'n' b'y' b'n']


In [9]:
CSV_COLUMNS = ['survived', 'sex', 'age', 'n_siblings_spouses',
               'parch', 'fare', 'class', 'deck', 'embark_twon', 'alone']
temp_dataset = get_dataset(train_file_path, column_names=CSV_COLUMNS)
show_batch(temp_dataset)

sex                 : [b'male' b'female' b'male' b'male' b'male']
age                 : [19. 28. 30. 36. 39.]
n_siblings_spouses  : [0 0 0 0 0]
parch               : [0 1 0 0 0]
fare                : [ 8.158 55.    13.    13.    26.   ]
class               : [b'Third' b'First' b'Second' b'Second' b'Second']
deck                : [b'unknown' b'E' b'unknown' b'unknown' b'unknown']
embark_twon         : [b'Southampton' b'Southampton' b'Southampton' b'Southampton'
 b'Southampton']
alone               : [b'y' b'n' b'y' b'y' b'y']


In [11]:
SELECT_COLUMNS = ['survived', 'age', 'n_siblings_spouses', 'class', 'deck', 'alone']
temp_dataset = get_dataset(train_file_path, select_columns=SELECT_COLUMNS)
show_batch(temp_dataset)

age                 : [28. 45. 40. 45. 28.]
n_siblings_spouses  : [0 1 0 0 0]
class               : [b'Third' b'Second' b'First' b'Third' b'Third']
deck                : [b'unknown' b'unknown' b'unknown' b'unknown' b'unknown']
alone               : [b'y' b'n' b'y' b'n' b'y']


In [13]:
SELECT_COLUMNS = ['survived', 'age', 'n_siblings_spouses', 'parch', 'fare']
DEFAULTS = [0, 0.0, 0.0, 0.0, 0.0]
temp_dataset = get_dataset(train_file_path,
                          select_columns=SELECT_COLUMNS,
                          column_defaults=DEFAULTS)
show_batch(temp_dataset)

age                 : [28. 18. 30. 32. 48.]
n_siblings_spouses  : [0. 0. 0. 0. 1.]
parch               : [0. 2. 0. 0. 2.]
fare                : [35.5  13.   93.5   7.75 65.  ]
