In [None]:
!pip install -q sklearn

In [None]:
%tensorflow_version 2.x  # this line is not required unless you are in a notebook

`%tensorflow_version` only switches the major version: 1.x or 2.x.
You set: `2.x  # this line is not required unless you are in a notebook`. This will be interpreted as: `2.x`.


TensorFlow is already loaded. Please restart the runtime to change versions.


In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

import tensorflow.compat.v2.feature_column as fc

import tensorflow as tf

In [None]:
# Load dataset.
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') # training data
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') # testing data
print(dftrain.head())
y_train = dftrain.pop('survived') #removes the survived coulumn from dataset, and saves it in a variable
y_eval = dfeval.pop('survived')
"""
print(dftrain.head())
print(dftrain.loc[0], y_train.loc[0]) #helps locate one specific row/column
print(dftrain["age"])
"""

CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck',
                       'embark_town', 'alone']
# Categorical data  is something that is not numeric. We always need to trasnform this data
# into numbers somehow, so program can understand and read it.
NUMERIC_COLUMNS = ['age', 'fare']

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = dftrain[feature_name].unique()  # gets a list of all unique values from given feature column
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))
  # creates column of feature names(sex) and vocab/unique values(male, female)
for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

print(feature_columns)


   survived     sex   age  ...     deck  embark_town  alone
0         0    male  22.0  ...  unknown  Southampton      n
1         1  female  38.0  ...        C    Cherbourg      n
2         1  female  26.0  ...  unknown  Southampton      y
3         1  female  35.0  ...        C  Southampton      n
4         0    male  28.0  ...  unknown   Queenstown      y

[5 rows x 10 columns]
[VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='n_siblings_spouses', vocabulary_list=(1, 0, 3, 4, 2, 5, 8), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='parch', vocabulary_list=(0, 1, 2, 5, 3, 4), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='deck', vocabu

In [None]:
dftrain["sex"].unique() #shows all unique values from a certain section
dftrain["embark_town"].unique()

In [None]:
dftrain.head()

In [None]:
dftrain.describe()

In [None]:
y_train.head()

0    0
1    1
2    1
3    1
4    0
Name: survived, dtype: int64

In [None]:
dftrain.shape # shows the shape of data

(627, 9)

In [None]:
dftrain.age.hist(bins=20) #shows histogram of age data

In [None]:
dftrain.sex.value_counts().plot(kind='barh') # shows bar graph of gender data

In [None]:
dftrain['class'].value_counts().plot(kind='barh') # shows bar graph of class data

In [None]:
pd.concat([dftrain, y_train], axis=1).groupby('sex').survived.mean().plot(kind='barh').set_xlabel('% survive') # shows bargraph of genders compared to
                                                                                                              # those who survived!

In [None]:
dfeval.shape

(264, 9)

In [None]:
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function():  # inner function, this will be returned
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))  # create tf.data.Dataset object with data and its label
    if shuffle:
      ds = ds.shuffle(1000)  # randomize order of data
    ds = ds.batch(batch_size).repeat(num_epochs)  # split dataset into batches of 32 and repeat process for number of epochs
    return ds  # return a batch of the dataset
  return input_function  # return a function object for use

train_input_fn = make_input_fn(dftrain, y_train)  # here we will call the input_function that was returned to us to get a dataset object we can feed to the model
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)
#If we called train_input_fn(), we would be calling the input function.

linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
# We create a linear estimtor by passing the feature columns we created earlier

In [None]:
linear_est.train(train_input_fn)  # grab all input  we need and train the model
result = linear_est.evaluate(eval_input_fn)  # get model metrics/stats by testing on testing data
# stored in variable so we can see the result, and access it cmponents like 'accuracy'

clear_output()  # clears console output
print(result['accuracy'])  # the result variable is simply a dict of stats about our model
print(result)

0.75
{'accuracy': 0.75, 'accuracy_baseline': 0.625, 'auc': 0.8275482, 'auc_precision_recall': 0.7980995, 'average_loss': 0.50213933, 'label/mean': 0.375, 'loss': 0.4963032, 'precision': 0.64102566, 'prediction/mean': 0.4619135, 'recall': 0.75757575, 'global_step': 200}


In [None]:
result = list(linear_est.predict(eval_input_fn))
print(result[0]['probabilities'])
# Just like we need to pass an input function to train the model, we also need to pass in an input function to predict values
# Result: percent of not surviving, percent of surviving
print(result[0]['probabilities'][1])

print(dfeval.loc[0]) #Did they survive or not
print("Chance of not surviving: " + str(result[0]['probabilities'][1]))

print(y_eval.loc[3])

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpehhw0a55/model.ckpt-200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
[0.86744106 0.13255894]
0.13255894
sex                          male
age                            35
n_siblings_spouses              0
parch                           0
fare                         8.05
class                       Third
deck                      unknown
embark_town           Southampton
alone                           y
Name: 0, dtype: object
Chance of not surviving: 0.13255894
1
