In [47]:
import pandas as pd
import tensorflow as tf
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') 
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')

In [48]:
dfeval.shape, dftrain.shape

((264, 9), (627, 9))

In [49]:
dftrain.dtypes

sex                    object
age                   float64
n_siblings_spouses      int64
parch                   int64
fare                  float64
class                  object
deck                   object
embark_town            object
alone                  object
dtype: object

In [50]:
dftrain["embark_town"].unique()

array(['Southampton', 'Cherbourg', 'Queenstown', 'unknown'], dtype=object)

In [51]:
CATEGORICAL_COLUMNS = ["sex","n_siblings_spouses","parch","class","deck",
                       "embark_town","alone"]
NUMERIC_COLUMNS = ["age","fare"]
feature_columns = []

In [52]:
for feature_name in CATEGORICAL_COLUMNS:
    vocabulary = dftrain[feature_name].unique()
    feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))
feature_columns

[VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0),
 VocabularyListCategoricalColumn(key='n_siblings_spouses', vocabulary_list=(np.int64(1), np.int64(0), np.int64(3), np.int64(4), np.int64(2), np.int64(5), np.int64(8)), dtype=tf.int64, default_value=-1, num_oov_buckets=0),
 VocabularyListCategoricalColumn(key='parch', vocabulary_list=(np.int64(0), np.int64(1), np.int64(2), np.int64(5), np.int64(3), np.int64(4)), dtype=tf.int64, default_value=-1, num_oov_buckets=0),
 VocabularyListCategoricalColumn(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0),
 VocabularyListCategoricalColumn(key='deck', vocabulary_list=('unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'), dtype=tf.string, default_value=-1, num_oov_buckets=0),
 VocabularyListCategoricalColumn(key='embark_town', vocabulary_list=('Southampton', 'Cherbourg', 'Queenstown', 'unknown'), dtype=tf.stri

In [53]:
for feature_name in NUMERIC_COLUMNS:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype = tf.float32))
feature_columns

[VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0),
 VocabularyListCategoricalColumn(key='n_siblings_spouses', vocabulary_list=(np.int64(1), np.int64(0), np.int64(3), np.int64(4), np.int64(2), np.int64(5), np.int64(8)), dtype=tf.int64, default_value=-1, num_oov_buckets=0),
 VocabularyListCategoricalColumn(key='parch', vocabulary_list=(np.int64(0), np.int64(1), np.int64(2), np.int64(5), np.int64(3), np.int64(4)), dtype=tf.int64, default_value=-1, num_oov_buckets=0),
 VocabularyListCategoricalColumn(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0),
 VocabularyListCategoricalColumn(key='deck', vocabulary_list=('unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'), dtype=tf.string, default_value=-1, num_oov_buckets=0),
 VocabularyListCategoricalColumn(key='embark_town', vocabulary_list=('Southampton', 'Cherbourg', 'Queenstown', 'unknown'), dtype=tf.stri

In [54]:
BATCH = 32
EPOCHS = 20

In [55]:
dftrain_encoded = pd.get_dummies(dftrain)
dfeval_encoded = pd.get_dummies(dfeval)

dftrain_encoded, dfeval_encoded = dftrain_encoded.align(dfeval_encoded,
                                                        join="left",
                                                        axis=1,
                                                        fill_value=0)

In [56]:
def input_function(data_df, label_df, num_epochs = EPOCHS, shuffle = True, batch_size = BATCH):
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
    if shuffle:
        ds = ds.shuffle(1000)
    ds = ds.batch(batch_size)
    return ds

train_ds = input_function(dftrain,y_train)
eval_ds = input_function(dfeval, y_eval, shuffle=False)

X_train = dftrain_encoded.to_numpy().astype("float32")
X_eval = dfeval_encoded.to_numpy("float32")
y_train_np = y_train.to_numpy("float32")
y_eval_np = y_eval.to_numpy("float32")


In [57]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1, activation = 'sigmoid', input_shape = (X_train.shape[1],))
])

model.compile(optimizer = "adam", 
              loss = "binary_crossentropy",
               metrics = ["accuracy"])
model.fit(X_train, y_train_np,
         epochs = EPOCHS, 
         batch_size = BATCH, 
          validation_data = (X_eval, y_eval_np))


Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.6415 - loss: 9.0442 - val_accuracy: 0.6250 - val_loss: 6.0507
Epoch 2/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6154 - loss: 7.9842 - val_accuracy: 0.6250 - val_loss: 5.5493
Epoch 3/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5962 - loss: 8.8585 - val_accuracy: 0.6136 - val_loss: 5.0637
Epoch 4/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5770 - loss: 6.6992 - val_accuracy: 0.6061 - val_loss: 4.6459
Epoch 5/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5716 - loss: 6.0631 - val_accuracy: 0.5682 - val_loss: 4.2873
Epoch 6/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5295 - loss: 5.5651 - val_accuracy: 0.5417 - val_loss: 3.9744
Epoch 7/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x298f62819a0>

In [58]:
result = model.evaluate(X_eval, y_eval_np)
result

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5196 - loss: 1.0357 


[1.1251825094223022, 0.4886363744735718]

In [59]:
result[1] #accuracy

0.4886363744735718

In [None]:
import numpy as np

result = model.predict(X_eval).flatten()

dfeval = dfeval.copy()
dfeval["survived"] = result
print(dfeval[['sex', 'age', 'class', 'survived']].head(10))


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
      sex   age   class  survived
0    male  35.0   Third  0.572147
1    male  54.0   First  0.329082
2  female  58.0   First  0.886741
3  female  55.0  Second  0.824631
4    male  34.0  Second  0.641993
5  female  15.0   Third  0.462792
6  female   8.0   Third  0.302254
7    male  21.0   Third  0.372083
8  female  18.0   Third  0.556858
9  female  19.0   Third  0.523437


In [61]:
y_pred_binary = (np.array(result) > 0.5).astype(int).flatten()
from sklearn.metrics import classification_report
print(classification_report(y_eval_np, y_pred_binary))  

              precision    recall  f1-score   support

         0.0       0.60      0.55      0.57       165
         1.0       0.34      0.38      0.36        99

    accuracy                           0.49       264
   macro avg       0.47      0.47      0.47       264
weighted avg       0.50      0.49      0.49       264

