Tutorial from https://www.youtube.com/watch?v=tPYj3fFJGjk

In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import pandas as pd

In [None]:
# training_ds, ds_info = tfds.load('titanic',split='train',shuffle_files=True,as_supervised=True, with_info=True)

# as_supervised: if True, the returned tf.data.Dataset will have a 2-tuple structure (input, label) according to
# builder.info.supervised_keys. If False the default, the returned tf.data.Dataset will have a dictionary with all the features.

# shuffle_files: generally leave it as True to have randomization

# split is usually training vs testing, sometimes might have validation need to check docu. In titanic, only got testo

# with_info: will load extract parameter which is the documentation info of the dataset

# tfds.show_examples() shows examples but need the dataset to be as_supervised = False.

# ds_info

In [2]:
training_data = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
testing_data = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')

In [None]:
dict(training_data['survived'])

In [3]:
training_label = training_data.pop('survived')
testing_label = testing_data.pop('survived')

In [None]:
training_data.head()

In [None]:
training_data[['fare','class']] #print specific column but must be a list of items

In [None]:
training_data.age.count() #able to call dataframe column names as methods as well

In [None]:
training_data.age.hist(bins=40) #able to plot graphs as well

In [4]:
CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck','embark_town', 'alone']
NUMERIC_COLUMNS = ['age', 'fare']

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
    vocabulary = training_data[feature_name].unique()  # gets a list of all unique values from given feature column
    feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

print(feature_columns)

[VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='n_siblings_spouses', vocabulary_list=(1, 0, 3, 4, 2, 5, 8), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='parch', vocabulary_list=(0, 1, 2, 5, 3, 4), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='deck', vocabulary_list=('unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='embark_town', vocabulary_list=('Southampton', 'Cherbourg', 'Queenstown', 'unknown'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='alone', vocabulary_list=('n', 'y'), dtype=tf.string, def

https://www.tensorflow.org/api_docs/python/tf/feature_column/categorical_column_with_vocabulary_list?version=stable

In [5]:
def make_input_fn(input_data, input_label, epochs=10, shuffle=True, batch_size=32):

    def input_function():  # inner function, this will be returned
        ds = tf.data.Dataset.from_tensor_slices((dict(input_data), input_label))  # create tf.data.Dataset object with data and its label
        if shuffle:
            ds = ds.shuffle(1000)  # randomize order of data
        ds = ds.batch(batch_size).repeat(epochs)  # split dataset into batches of 32 and repeat process for number of epochs
        return ds  # return a batch of the dataset

    return input_function  # return a function object for use

train_input_fn = make_input_fn(training_data, training_label)  # here we will call the input_function that was returned to us to get a dataset object we can feed to the model
eval_input_fn = make_input_fn(testing_data, testing_label, epochs=1, shuffle=False)

In [None]:
test = train_input_fn()
print(test)

In [8]:
model1= tf.estimator.LinearClassifier(feature_columns=feature_columns, optimizer = 'RMSProp')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\9029\\AppData\\Local\\Temp\\tmp71iclca7', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [9]:
model1.train(train_input_fn)

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Instructions for updating:
Please use `layer.add_weight` method instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\9029\AppData\Local\Temp\tmp71iclca7\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 0.6931472, step = 0
INFO:tensorflow:global_step/sec: 327.98
INFO:tensorflow:loss = 1.7766234, step = 100 (0.321 se

<tensorflow_estimator.python.estimator.canned.linear.LinearClassifierV2 at 0x255d89929a0>

In [10]:
result = model1.evaluate(eval_input_fn)

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-08-04T15:03:36Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\9029\AppData\Local\Temp\tmp71iclca7\model.ckpt-200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.61872s
INFO:tensorflow:Finished evaluation at 2021-08-04-15:03:37
INFO:tensorflow:Saving dict for global step 200: accuracy = 0.70075756, accuracy_baseline = 0.625, auc = 0.75194365, auc_precision_recall = 0.68880326, average_loss = 1.8465143, global_step = 200, label/mean = 0.375, loss = 1.891852, precision

In [11]:
print(result)

{'accuracy': 0.70075756, 'accuracy_baseline': 0.625, 'auc': 0.75194365, 'auc_precision_recall': 0.68880326, 'average_loss': 1.8465143, 'label/mean': 0.375, 'loss': 1.891852, 'precision': 0.8125, 'prediction/mean': 0.13447866, 'recall': 0.26262626, 'global_step': 200}


In [18]:
CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck','embark_town', 'alone']
NUMERIC_COLUMNS = ['age', 'fare']

feature_columns2 = []
temp = []
for feature_name in CATEGORICAL_COLUMNS:
    vocabulary = training_data[feature_name].unique()  # gets a list of all unique values from given feature column
    temp.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))
    
feature_columns2 = [tf.feature_column.indicator_column(c) for c in temp]
for feature_name in NUMERIC_COLUMNS:
    feature_columns2.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

print(feature_columns2)

# need to wrap categorical columns into indicator column or embedding column!

[IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0)), IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='n_siblings_spouses', vocabulary_list=(1, 0, 3, 4, 2, 5, 8), dtype=tf.int64, default_value=-1, num_oov_buckets=0)), IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='parch', vocabulary_list=(0, 1, 2, 5, 3, 4), dtype=tf.int64, default_value=-1, num_oov_buckets=0)), IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0)), IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='deck', vocabulary_list=('unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'), dtype=tf.string, default_value=-1, num_oov_buckets=0)), IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='embark_town', voca

In [27]:
model2 = tf.estimator.DNNClassifier([64,32],feature_columns=feature_columns2,optimizer = 'RMSProp')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\9029\\AppData\\Local\\Temp\\tmpgr70jex3', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [28]:
model2.train(train_input_fn)

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\9029\AppData\Local\Temp\tmpgr70jex3\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 1.2851797, step = 0
INFO:tensorflow:global_step/sec: 493.802
INFO:tensorflow:loss = 0.57499516, step = 100 (0.203 sec)
INFO:tensorflow:Calling checkpoint listeners before saving checkpoin

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x255cf3bb820>

In [23]:
result2 = model2.evaluate(eval_input_fn)

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-08-04T15:19:34Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\9029\AppData\Local\Temp\tmp8lo5rq49\model.ckpt-200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.56442s
INFO:tensorflow:Finished evaluation at 2021-08-04-15:19:34
INFO:tensorflow:Saving dict for global step 200: accuracy = 0.68560606, accuracy_baseline = 0.625, auc = 0.80226505, auc_precision_recall = 0.72167003, average_loss = 0.62089276, global_step = 200, label/mean = 0.375, loss = 0.62139386, precis

In [24]:
result2

{'accuracy': 0.68560606,
 'accuracy_baseline': 0.625,
 'auc': 0.80226505,
 'auc_precision_recall': 0.72167003,
 'average_loss': 0.62089276,
 'label/mean': 0.375,
 'loss': 0.62139386,
 'precision': 0.5506329,
 'prediction/mean': 0.57415354,
 'recall': 0.8787879,
 'global_step': 200}