In [None]:
import pprint as pp

import pandas as pd
import numpy as np

from matplotlib import pyplot as plt

# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)

import tensorflow as tf
import keras
from keras import layers
from keras.losses import BinaryCrossentropy


In [None]:
# Load the Titanic dataset.
titanic_df = pd.read_csv("https://storage.googleapis.com/tf-datasets/titanic/train.csv")
titanic = titanic_df.astype({
    'survived': 'int8',
    'age': 'int8',
    'n_siblings_spouses': 'int8',
    'parch': 'int8',
    'fare': 'int16',
})

titanic.info()

titanic_features = titanic.copy()
titanic_labels = titanic_features.pop('survived')


In [None]:

# attempts to (eventually) preprocess a dataframe of features
# returns all of the collected inputs and preprocessed features 
def process_dataframe_inputs(feature_df):

  # first we walk through each feature in the dataframe, make an input for it,
  # normalize it using the feature_df data, then collect it into lists of
  # inputs and preprocessed inputs, respectively

  input_layers, preprocessed_inputs  = [], []
  for name, col in feature_df.items():

    # TODO do this with log() ... print("processing %s ..." % name)

    # convert to tensorflow types. this is straightforward except 
    # for objects, treat them as strings.
    dtype_str = str(col.dtype) if col.dtype != object else "string"
    cur_dtype = tf.as_dtype(dtype_str)
    cur_input = tf.keras.Input(shape=(1,), name=name, dtype=cur_dtype)
    input_layers.append(cur_input)  

    if cur_dtype == tf.string:
      lookup = layers.StringLookup(vocabulary=np.unique(col))    
      norm = layers.CategoryEncoding(num_tokens=lookup.vocabulary_size())
      normalized_input = norm(lookup(cur_input))
      preprocessed_inputs.append(normalized_input)

    else:
      norm = layers.Normalization(axis=None)
      norm.adapt(col)
      normalized_input = norm(cur_input)
      preprocessed_inputs.append( normalized_input )

  # prepare the return values by concatentating the preprocessed inputs
  # and creating a model for which they serve as outputs.
  # 
  # returns the input layers and the processed_inputs, respectively

  preprocessed_inputs_cat = layers.Concatenate()(preprocessed_inputs)
  preprocessing_model = tf.keras.Model(input_layers, preprocessed_inputs_cat)
  preprocessed_inputs = preprocessing_model(input_layers)

  return input_layers, preprocessed_inputs


In [None]:

input_layers, preprocessed_inputs = process_dataframe_inputs(titanic_features)

seqential_hidden_model = tf.keras.Sequential([
  layers.Dropout(0.10),
  layers.Dense(16),
  layers.Dense(16),
  layers.Dense(8),
  layers.Dense(1)
])
result = seqential_hidden_model(preprocessed_inputs)
titanic_model = tf.keras.Model(input_layers, result)
titanic_model.compile("adam", "binary_crossentropy", metrics=["accuracy"])

titanic_features_dict = {
  name: np.array(value) for name, value in titanic_features.items()
}

hist = titanic_model.fit(x=titanic_features_dict, y=titanic_labels, epochs=100, batch_size=25)


In [None]:

fig, ax1 = plt.subplots()
ax1.plot(hist.epoch, hist.history['loss'], label='loss', color="blue")
ax2 = ax1.twinx()
ax2.plot(hist.epoch, hist.history['accuracy'], label='accuracy', color="red")

plt.show()

# tf.keras.utils.plot_model(model = titanic_model, rankdir="LR",)

