In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

In [2]:
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') # training data
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') # testing data

In [3]:
dftrain.head()

Unnamed: 0,survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,0,male,22.0,1,0,7.25,Third,unknown,Southampton,n
1,1,female,38.0,1,0,71.2833,First,C,Cherbourg,n
2,1,female,26.0,0,0,7.925,Third,unknown,Southampton,y
3,1,female,35.0,1,0,53.1,First,C,Southampton,n
4,0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y


In [7]:
dfeval.tail()

Unnamed: 0,survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
259,1,female,25.0,0,1,26.0,Second,unknown,Southampton,n
260,0,male,33.0,0,0,7.8958,Third,unknown,Southampton,y
261,0,female,39.0,0,5,29.125,Third,unknown,Queenstown,n
262,0,male,27.0,0,0,13.0,Second,unknown,Southampton,y
263,1,male,26.0,0,0,30.0,First,C,Cherbourg,y


In [10]:
y_train = dftrain.pop('survived')

In [11]:
y_eval = dfeval.pop('survived')

In [46]:
CATEGORICAL_COLUMNS = [
    'sex', 'class', 'deck', 'embark_town', 'alone'
]

NUMERIC_COLUMNS = ['age', 'n_siblings_spouses', 'parch', 'fare']


feature_cols = []
for feature_name in CATEGORICAL_COLUMNS:
    vocabulary = dftrain[feature_name].unique()
    feature_cols.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))
    
for feature_name in NUMERIC_COLUMNS:
    feature_cols.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))


In [39]:
def make_input_function(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
    def input_function():
        ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
        
        if shuffle:
            ds.shuffle(1000)
            
        ds = ds.batch(batch_size).repeat(num_epochs)
        return ds
    
    return input_function

In [40]:
train_input_function = make_input_function(dftrain, y_train)
evel_input_function = make_input_function(dfeval, y_eval, num_epochs=1, shuffle=False)

In [41]:
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_cols)
clear_output()

In [42]:
linear_est.train(train_input_function)

INFO:tensorflow:Calling model_fn.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpidl_eugt/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 0.6931472, step = 0
INFO:tensorflow:global_step/sec: 357.816
INFO:tensorflow:loss = 0.5111773, step = 100 (0.280 sec)
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 200...
INFO:tensorflow:Saving checkpoints for 200 into /tmp/tmpidl_eugt/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 200...
INFO:tensorflow:Loss for final step: 0.34793538.


<tensorflow_estimator.python.estimator.canned.linear.LinearClassifierV2 at 0x7fa239bc37c0>

In [44]:
result = linear_est.evaluate(evel_input_function)
clear_output()
print(result['accuracy'])

0.7651515
