In [33]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

import tensorflow.compat.v2.feature_column as fc
import tensorflow as tf

In [34]:
# Making of feature columns (unifying categorical and numeric data frame columns into all numeric columns)

In [35]:
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')

CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', 'embark_town', 'alone']
NUMERIC_COLUMNS = ['age', 'fare']   # caps lock values are constants

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
    vocabulary = dftrain[feature_name].unique()    # returns back a list of all values from etc. sex column coded uniquely
    feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))    # puts all the new values into a new list feature_columns

for feature_name in NUMERIC_COLUMNS:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))    # puts numeric values into the list

print(feature_columns)

[VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='n_siblings_spouses', vocabulary_list=(1, 0, 3, 4, 2, 5, 8), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='parch', vocabulary_list=(0, 1, 2, 5, 3, 4), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='deck', vocabulary_list=('unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='embark_town', vocabulary_list=('Southampton', 'Cherbourg', 'Queenstown', 'unknown'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='alone', vocabulary_list=('n', 'y'), dtype=tf.string, def

In [36]:
dftrain['sex'].unique()    # prints out all unique values inside a data frame column
# dftrain['embark_town'].unique()

array(['male', 'female'], dtype=object)

In [37]:
# Input function - makes and trains a model with data we have created above. It uses feature columns at the end to bind all the data to values we have transformed from categorical to numeric + pure numeric. Our evaluation data is used to check the accuracy of our trained model

In [38]:
# this piece of function code is usually always the same
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):   # makes an input function
    def input_function():   # inner function that will be returned
        ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))    # created tf dataset with data (input) and its label (output)
        if shuffle:
            ds = ds.shuffle(1000)   # randomize order of data
        ds = ds.batch(batch_size).repeat(num_epochs)    # split data into batches of 32 and repeat process for 10 epochs
        return ds
    return input_function

train_input_fn = make_input_fn(dftrain, y_train)
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)

# Model that uses Logistic Regression (LinearClassifier is using logistic regression)
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)    # creates a model and uses feature columns initilized above

linear_est.train(train_input_fn)   # trains the model
result = linear_est.evaluate(eval_input_fn)    # get model metrics/stats by testing on test data

clear_output()  # clears console output
print(result['accuracy'])   # gives back accuracy metric/stat

0.7651515


In [42]:
result = list(linear_est.predict(eval_input_fn)) # gives back a prediction of an input
a = 5
print(dfeval.loc[a])    # prints out a person we want evaluated (or row we are evaluating)
print(y_eval.loc[a])    # prints out if that person has survived or not (data is placed at the same indexes so we can check all our data like this to be unified with values above and below this line of code)
print(result[a]['probabilities'][1])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\DZILAJ~1\AppData\Local\Temp\tmpubva8omo\model.ckpt-200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
sex                       female
age                         15.0
n_siblings_spouses             0
parch                          0
fare                      8.0292
class                      Third
deck                     unknown
embark_town           Queenstown
alone                          y
Name: 5, dtype: object
1
0.68845195
