### Setup and Imports

In [1]:
#!pip install tensorflow

In [2]:
#!pip install -q scikit-learn

In [3]:
#%tensorflow_version 2.x  # this line is not required unless you are in a notebook

In [4]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

import tensorflow.compat.v2.feature_column as fc
import tensorflow as tf

In [5]:
#VocabularyListCategoricalColumn
#(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0)
#(key='n_siblings_spouses', vocabulary_list=(1, 0, 3, 4, 2, 5, 8), dtype=tf.int64, default_value=-1, num_oov_buckets=0), 
#(key='parch', vocabulary_list=(0, 1, 2, 5, 3, 4), dtype=tf.int64, default_value=-1, num_oov_buckets=0), 
#(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0), 
#(key='deck', vocabulary_list=('unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'), dtype=tf.string, default_value=-1, num_oov_buckets=0), 
#(key='embark_town', vocabulary_list=('Southampton', 'Cherbourg', 'Queenstown', 'unknown'), dtype=tf.string, default_value=-1, num_oov_buckets=0), 
#(key='alone', vocabulary_list=('n', 'y'), dtype=tf.string, default_value=-1, num_oov_buckets=0), 
#NumericColumn
#(key='age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 
#(key='fare', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [6]:
### Load dataset.

dftrain = pd.read_csv('data/Titanic_train.csv') # training data
dfeval = pd.read_csv('data/Titanic_eval.csv') # testing data
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')



### Feature Columns 特色欄目分類
CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', 'embark_town', 'alone']
NUMERIC_COLUMNS = ['age', 'fare']

# 空白 儲存特徵列
feature_columns = []

# 循環遍歷每個功能名稱
for feature_name in CATEGORICAL_COLUMNS:
    # 定義一個詞彙表，儲存所有我們預先分類的資料
    vocabulary = dftrain[feature_name].unique()  # gets a list of all unique values from given feature column
    # 創造一個 columns 使用不同的列來建立我們的模型
    feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

print(feature_columns)

Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.
Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.
[VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='n_siblings_spouses', vocabulary_list=(1, 0, 3, 4, 2, 5, 8), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='parch', vocabulary_list=(0, 1, 2, 5, 3, 4), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='class', 

In [7]:
### Input Function：我們預設 10次 epochs 並且打亂數據，資料將以 32 個為一組的小批量

def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
    
    # inner function：this will be returned
    def input_function():  
        # 使用資料及其標籤建立 tf.data.Dataset 對象
        ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
        
        # 將資料洗牌 不要想太多
        if shuffle:
            ds = ds.shuffle(1000) 
        
        # 將資料集分成 32 個批次，並重複處理 epoch 數
        ds = ds.batch(batch_size).repeat(num_epochs)
        
        return ds          # return a batch of the dataset
    return input_function  # return a function object for use

# 這裡我們將呼叫傳回給我們的 input_function 以取得提供給模型的資料集對象
train_input_fn = make_input_fn(dftrain, y_train) 
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False) #我們不訓練他 所以不須打亂



### Building the Model 建立模型

# 利用線性模型，放入特徵欄目
linear_est = tf.estimator.LinearClassifier(
    feature_columns=feature_columns)

# Training the Model 訓練模型：呼叫我們輸入函數的資料
linear_est.train(train_input_fn)

Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/rf/cjpyn_m574x3_dg2jvdc2dbw0000gn/T/tmplwi96vn4', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'wor

2023-11-06 01:40:03.760400: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled


INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/rf/cjpyn_m574x3_dg2jvdc2dbw0000gn/T/tmplwi96vn4/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
INFO:tensorflow:loss = 0.6931472, step = 0
INFO:tensorflow:global_step/sec: 616.394
INFO:tensorflow:loss = 0.6909045, step = 100 (0.163 sec)
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 200...
INFO:tensorflow:Saving checkpoints for 200 into /var/folders/rf/cjpyn_m574x3_dg2jvdc2dbw0000gn/T/tmplwi96vn4/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 200...
INFO:tensorflow:Loss for final step: 0.50155634.


<tensorflow_estimator.python.estimator.canned.linear.LinearClassifierV2 at 0x16c47f730>

In [8]:
# Evaluation 
result = linear_est.evaluate(eval_input_fn)  # 透過測試數據統計數據/模型指標
clear_output()  # clears consoke output
print(result)
print(result['accuracy'])

{'accuracy': 0.74242425, 'accuracy_baseline': 0.625, 'auc': 0.8330579, 'auc_precision_recall': 0.7884418, 'average_loss': 0.47518036, 'label/mean': 0.375, 'loss': 0.46720648, 'precision': 0.65048546, 'prediction/mean': 0.39985767, 'recall': 0.67676765, 'global_step': 200}
0.74242425


In [9]:
### Predictions 預測

result = list(linear_est.predict(eval_input_fn))
print(result[0])  # 我們取用整個字典中的一筆資料來查看
print(result[0]["probabilities"])  # 查看存活率為 A(死亡率）, B(存活率）

INFO:tensorflow:Calling model_fn.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/rf/cjpyn_m574x3_dg2jvdc2dbw0000gn/T/tmplwi96vn4/model.ckpt-200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
{'logits': array([-2.5259154], dtype=float32), 'logistic': array([0.07406127], dtype=float32), 'probabilities': array([0.9259388 , 0.07406127], dtype=float32), 'class_ids': array([0]), 'classes': array([b'0'], dtype=object), 'all_class_ids': array([0, 1], dtype=int32), 'all_classes': array([b'0', b'1'], dtype=object)}
[0.9259388  0.07406127]


In [10]:
print(dfeval.loc[0])
print(result[0]["probabilities"][1])
# 列出完整的資料和他可能生存結果，這個人存活率，他是男性 生存機率為 6%

sex                          male
age                          35.0
n_siblings_spouses              0
parch                           0
fare                         8.05
class                       Third
deck                      unknown
embark_town           Southampton
alone                           y
Name: 0, dtype: object
0.07406127


In [11]:
print(dfeval.loc[2])
print(result[2]["probabilities"][1])
# 一個 58歲的女性 位在 Fist calss，他的生存率為 73%

sex                        female
age                          58.0
n_siblings_spouses              0
parch                           0
fare                        26.55
class                       First
deck                            C
embark_town           Southampton
alone                           y
Name: 2, dtype: object
0.758659


In [12]:
print(dfeval.loc[6])
print(result[6]["probabilities"][1])
# 一個 7 歲的女性，他的生存率為 61%

sex                        female
age                           8.0
n_siblings_spouses              3
parch                           1
fare                       21.075
class                       Third
deck                      unknown
embark_town           Southampton
alone                           n
Name: 6, dtype: object
0.585832
