In [14]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

import tensorflow.compat.v2.feature_column as fc

import tensorflow as tf

In [36]:
# load dataset

trainingData = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
evalData = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')

# get the answers, index => column
trainResults = trainingData.pop('survived')
evalResults = evalData.pop('survived')

# print(trainingData.head())

In [35]:
# graph just for fun
# trainingData.age.hist(bins=100)

In [27]:
# getting the features, and simplifing those that are not numbers

categoricalColumns = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', 'embark_town', 'alone']
numericColumns = ['age', 'fare']

featureColumns = []

for featureName in categoricalColumns:
    vocabulary = trainingData[featureName].unique()
    featureColumns.append(tf.feature_column.categorical_column_with_vocabulary_list(featureName, vocabulary))

for featureName in numericColumns:
    featureColumns.append(tf.feature_column.numeric_column(featureName, dtype=tf.float32))
    
print(featureColumns)

[VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='n_siblings_spouses', vocabulary_list=(1, 0, 3, 4, 2, 5, 8), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='parch', vocabulary_list=(0, 1, 2, 5, 3, 4), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='deck', vocabulary_list=('unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='embark_town', vocabulary_list=('Southampton', 'Cherbourg', 'Queenstown', 'unknown'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='alone', vocabulary_list=('n', 'y'), dtype=tf.string, def

In [29]:
# next we start training the data

# we will input data into the model in batches of 32
# and we will input the data multiple times, in different orders, which is called the epoch
# the epoch has to be not too much, as then the model just memorizes those datapoints, but too little will cause it to be inaccurate

In [40]:
# we first make an input function, which tells it how to address the data

def makeInputFn(data, labels, epochs=10, shuffle=True, batchSize=32):
    def inputFunction():
#         create a dataframe object with the data and the labels
        dataset = tf.data.Dataset.from_tensor_slices((dict(data), labels))
        if shuffle:
            dataset = dataset.shuffle(1000)
        dataset = dataset.batch(batchSize).repeat(epochs)
        return dataset
    return inputFunction

trainInputFunction = makeInputFn(trainingData, trainResults)
evalInputFunction = makeInputFn(evalData, evalResults, epochs = 1, shuffle = False)

In [43]:
linearEstimator = tf.estimator.LinearClassifier(feature_columns = featureColumns)
linearEstimator.train(trainInputFunction)
result = linearEstimator.evaluate(evalInputFunction)

clear_output()
print(result['accuracy'])
print(result)

0.7462121
{'accuracy': 0.7462121, 'accuracy_baseline': 0.625, 'auc': 0.83422107, 'auc_precision_recall': 0.79559267, 'average_loss': 0.47268212, 'label/mean': 0.375, 'loss': 0.46536317, 'precision': 0.6666667, 'prediction/mean': 0.38049263, 'recall': 0.64646465, 'global_step': 200}


In [48]:
results = list(linearEstimator.predict(evalInputFunction))
# for result in results:
#     print (result['probabilities'])


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\owenm\AppData\Local\Temp\tmpj4p6ybt3\model.ckpt-200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
[0.921097   0.07890306]
[0.6627439  0.33725616]
[0.28489915 0.7151009 ]
[0.3523236 0.6476764]
[0.7296085  0.27039152]
[0.25661772 0.7433823 ]
[0.434301 0.565699]
[0.89131635 0.10868357]
[0.3916335  0.60836655]
[0.2764681  0.72353184]
[0.3242148 0.6757852]
[0.89142805 0.10857191]
[0.15551491 0.84448504]
[0.626478 0.373522]
[0.16129273 0.8387073 ]
[0.47310045 0.5268996 ]
[0.8408533  0.15914668]
[0.17626677 0.82373327]
[0.62172145 0.37827852]
[0.9093746  0.09062541]
[0.88893825 0.11106174]
[0.9171823  0.08281767]
[0.35138455 0.6486154 ]
[0.92647207 0.07352798]
[0.34285513 0.65714484]
[0.43537036 0.5646297 ]
[0.7830283  0.21697173]
[0.71476763 0.28523234]
[0.7902318  0.20976822]
[0.17414916 0.825850