<h1> TENSORFLOW MACHINE LEARNING </h1>

In [13]:
# CORE ALGORITHMS

# importing modules
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

import tensorflow.compat.v2.feature_column as fc

import tensorflow as tf

# load dataset.
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') # training data
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') # testing data
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')

In [14]:
# showing the first 5 rows
dftrain.head()

Unnamed: 0,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,male,22.0,1,0,7.25,Third,unknown,Southampton,n
1,female,38.0,1,0,71.2833,First,C,Cherbourg,n
2,female,26.0,0,0,7.925,Third,unknown,Southampton,y
3,female,35.0,1,0,53.1,First,C,Southampton,n
4,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y


In [15]:
# training the feature columns

CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck',
                       'embark_town', 'alone']
NUMERIC_COLUMNS = ['age', 'fare']

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = dftrain[feature_name].unique()  # gets a list of all unique values from given feature column
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary)) # associate every feature name with a list of unique vocabulary

for feature_name in NUMERIC_COLUMNS: 
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32)) # for the ones with numerical features


# print it
print(feature_columns)

[VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='n_siblings_spouses', vocabulary_list=(1, 0, 3, 4, 2, 5, 8), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='parch', vocabulary_list=(0, 1, 2, 5, 3, 4), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='deck', vocabulary_list=('unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='embark_town', vocabulary_list=('Southampton', 'Cherbourg', 'Queenstown', 'unknown'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='alone', vocabulary_list=('n', 'y'), dtype=tf.string, def

In [16]:
# The training process - Input Function
def make_input_fn(data_df, label_df, num_epochs = 10, shuffle = True, batch_size = 32):
    def input_function(): # inner function, this will be returned
        ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df)) # creates dataset object with data and its features
        if shuffle:
            ds = ds.shuffle(1000) # randomize the data
        ds = ds.batch(batch_size).repeat(num_epochs) # split dataset into baches of 32 and repeat the process of num of epochs
        return ds # return the batch dataset
    return input_function # return a function to use

train_input_fn = make_input_fn(dftrain, y_train) # get a dataset to train
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs = 1, shuffle = False) # for testing

In [21]:
# Creating the model
linear_est = tf.estimator.LinearClassifier(feature_columns = feature_columns)

# Training the model
linear_est.train(train_input_fn) # train the model
result = linear_est.evaluate(eval_input_fn) # testing the model
clear_output() # clear the console for useless training outputs
print(result['accuracy']) # see the accuracy

0.77272725


In [36]:
# dictionary of one prediction
print(dfeval.loc[0]) # print the person
print(result[0]['probabilities'][1]) # change of survival: index 1

sex                          male
age                          35.0
n_siblings_spouses              0
parch                           0
fare                         8.05
class                       Third
deck                      unknown
embark_town           Southampton
alone                           y
Name: 0, dtype: object
0.063969314


In [35]:
print(result[0]['probabilities'][0]) # change of not survival: index 0

0.9360307


In [37]:
print(y_eval.loc[0]) # if they survive or not

0


In [None]:
# Classification