In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

In [None]:
tf.version.VERSION

Loading dataset

In [None]:
train_df = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
eval_df = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
y_train = train_df.pop('survived')
y_eval = eval_df.pop('survived')

In [None]:
train_df.head()

In [None]:
train_df.shape

In [None]:
eval_df.shape

Data Analysis

In [None]:
train_df.age.hist(bins=20)

In [None]:
train_df.sex.value_counts().plot(kind='barh')

In [None]:
train_df['class'].value_counts().plot(kind='barh')

In [None]:
pd.concat([train_df, y_train], axis=1).groupby('sex').survived.mean().plot(kind='barh').set_xlabel('% survive')

In [None]:
train_df.head()

In [None]:
categorical_cols = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', 'embark_town', 'alone']
numeric_cols = ['age', 'fare']

feature_columns = []
for feature_name in categorical_cols:
  vocabulary = train_df[feature_name].unique()
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in numeric_cols:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

feature_columns

In [None]:
train_df['sex'].unique()

In [None]:
def make_input_fn(train_df, y_train, epoches=10, batch_size=32, shuffle=True):
    def input_function():
        ds = tf.data.Dataset.from_tensor_slices((dict(train_df), y_train))
        if shuffle:
            ds = ds.shuffle(1000)
        ds = ds.batch(batch_size).repeat(epoches)
        return ds
    return input_function

In [None]:
train_input_fn = make_input_fn(train_df, y_train)
eval_input_fn = make_input_fn(eval_df, y_eval, epoches=1, shuffle=False)

Training linear model

In [None]:
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)

In [None]:
linear_est.train(train_input_fn)

In [None]:
linear_est.evaluate(eval_input_fn)