In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D,MaxPool2D,Dense,BatchNormalization,Dropout,Input,Activation,Flatten
from tensorflow.keras.layers.experimental.preprocessing import Rescaling,RandomRotation,RandomZoom,RandomFlip,Resizing
from tensorflow.keras.models import Model
import tensorflow_datasets as tfds
import numpy as np 
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import re

In [2]:
#train_ds, train_info=tfds.load('titanic', split='train', with_info=True,shuffle_files=True, as_supervised=True,data_dir='./data')
train_ds,ds_info  = tfds.load('titanic', split='train',data_dir='./data',as_supervised=True, with_info=True)

In [3]:
next(iter(train_ds))

({'age': <tf.Tensor: shape=(), dtype=float32, numpy=30.0>,
  'boat': <tf.Tensor: shape=(), dtype=string, numpy=b'Unknown'>,
  'body': <tf.Tensor: shape=(), dtype=int32, numpy=-1>,
  'cabin': <tf.Tensor: shape=(), dtype=string, numpy=b'Unknown'>,
  'embarked': <tf.Tensor: shape=(), dtype=int64, numpy=2>,
  'fare': <tf.Tensor: shape=(), dtype=float32, numpy=13.0>,
  'home.dest': <tf.Tensor: shape=(), dtype=string, numpy=b'Sarnia, ON'>,
  'name': <tf.Tensor: shape=(), dtype=string, numpy=b'McCrie, Mr. James Matthew'>,
  'parch': <tf.Tensor: shape=(), dtype=int32, numpy=0>,
  'pclass': <tf.Tensor: shape=(), dtype=int64, numpy=1>,
  'sex': <tf.Tensor: shape=(), dtype=int64, numpy=0>,
  'sibsp': <tf.Tensor: shape=(), dtype=int32, numpy=0>,
  'ticket': <tf.Tensor: shape=(), dtype=string, numpy=b'233478'>},
 <tf.Tensor: shape=(), dtype=int64, numpy=0>)

In [4]:
df = tfds.as_dataframe(train_ds,ds_info)
for col, dtype in df.dtypes.items():
    if dtype == np.object:  # Only process byte object columns.
        df[col] = df[col].apply(lambda x: x.decode("utf-8"))
df.head(5)

Unnamed: 0,features/age,features/boat,features/body,features/cabin,features/embarked,features/fare,features/home.dest,features/name,features/parch,features/pclass,features/sex,features/sibsp,features/ticket,survived
0,30.0,Unknown,-1,Unknown,2,13.0,"Sarnia, ON","McCrie, Mr. James Matthew",0,1,0,0,233478,0
1,37.0,Unknown,98,Unknown,2,7.925,"Ruotsinphytaa, Finland New York, NY","Gustafsson, Mr. Anders Vilhelm",0,2,0,2,3101276,0
2,28.0,9,-1,Unknown,2,13.0,Spain,"Reynaldo, Ms. Encarnacion",0,1,1,0,230434,1
3,18.0,Unknown,-1,Unknown,2,73.5,"Lyndhurst, England","Davies, Mr. Charles Henry",0,1,0,0,S.O.C. 14879,0
4,-1.0,Unknown,-1,Unknown,0,7.8958,Unknown,"Gheorgheff, Mr. Stanio",0,2,0,0,349254,0


In [5]:
df['features/cabin'].unique()

array(['Unknown', 'B73', 'D26', 'D', 'B10', 'A5', 'C124', 'B96 B98',
       'E50', 'C99', 'B94', 'E68', 'C6', 'B57 B59 B63 B66', 'E8', 'E24',
       'D20', 'B71', 'E67', 'B22', 'C65', 'D7', 'B39', 'B51 B53 B55',
       'C103', 'C68', 'E46', 'B58 B60', 'G6', 'C126', 'B19', 'F G73',
       'B35', 'D47', 'A29', 'D28', 'D21', 'B41', 'C78', 'F33', 'C110',
       'D48', 'D45', 'D33', 'A23', 'D10 D12', 'E44', 'B20', 'B77', 'B101',
       'E52', 'E60', 'D19', 'E38', 'C101', 'C30', 'C23 C25 C27', 'C111',
       'F2', 'D15', 'E12', 'E101', 'A6', 'C50', 'B49', 'D22', 'A34',
       'D30', 'C55 C57', 'A24', 'C106', 'E33', 'F', 'C2', 'B69', 'C125',
       'C87', 'B42', 'B80', 'F G63', 'B28', 'E77', 'E31', 'D36', 'D17',
       'C92', 'C39', 'B78', 'B52 B54 B56', 'C80', 'C7', 'C104', 'B18',
       'D46', 'B30', 'E49', 'A31', 'F38', 'C132', 'E121', 'A14', 'B45',
       'E63', 'D35', 'B38', 'C118', 'E45', 'C91', 'C54', 'C22 C26', 'B50',
       'C93', 'C52', 'C85', 'D40', 'A7', 'A18', 'C89', 'C123', 'B82

In [6]:
categorical_columns=['features/boat','features/cabin','features/home.dest','features/name','features/ticket']
numeric_columns=['features/age', 'features/body','features/embarked','features/fare','features/parch','features/pclass','features/sex','features/sibsp','features/ticket']

In [7]:

one_hot_encoder=OneHotEncoder(drop='first',sparse=False)
one_hot_encoder.fit(df[categorical_columns])
train_categorical=one_hot_encoder.transform(df[categorical_columns]) 
#one_hot_columns=[x + '_' + str(y)  for y in one_hot_encoder.categories_[i] for i,x in enumerate(one_hot_encoder.categories_)]
col_ohe_nm=[]
for i1,x1 in enumerate(one_hot_encoder.categories_):     
    col_ohe_nm.extend([categorical_columns[i1] + '_' + str(y1)  for y1 in x1.tolist()[1:]])

print(train_categorical.shape)
categorical_df=pd.DataFrame(train_categorical,columns=col_ohe_nm)

numeric_df=df[numeric_columns]
categorical_df


(1309, 2816)


Unnamed: 0,features/boat_10,features/boat_11,features/boat_12,features/boat_13,features/boat_13 15,features/boat_13 15 B,features/boat_14,features/boat_15,features/boat_15 16,features/boat_16,...,features/ticket_W./C. 14258,features/ticket_W./C. 14260,features/ticket_W./C. 14263,features/ticket_W./C. 14266,features/ticket_W./C. 6607,features/ticket_W./C. 6608,features/ticket_W./C. 6609,features/ticket_W.E.P. 5734,features/ticket_W/C 14208,features/ticket_WE/P 5735
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1304,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1306,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1307,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
numeric_df['features/ticket']=numeric_df['features/ticket'].apply(lambda x: re.sub('[A-Za-z\\. /:]*','',x))
numeric_df['features/ticket']=numeric_df['features/ticket'].apply(lambda x: 0 if x=='' else int(x))

In [9]:
final_df=pd.concat([categorical_df,numeric_df],axis=1)
final_df

Unnamed: 0,features/boat_10,features/boat_11,features/boat_12,features/boat_13,features/boat_13 15,features/boat_13 15 B,features/boat_14,features/boat_15,features/boat_15 16,features/boat_16,...,features/ticket_WE/P 5735,features/age,features/body,features/embarked,features/fare,features/parch,features/pclass,features/sex,features/sibsp,features/ticket
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,30.0,-1,2,13.0000,0,1,0,0,233478
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,37.0,98,2,7.9250,0,2,0,2,3101276
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,28.0,-1,2,13.0000,0,1,1,0,230434
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,18.0,-1,2,73.5000,0,1,0,0,14879
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-1.0,-1,0,7.8958,0,2,0,0,349254
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1304,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,32.0,-1,1,7.7500,0,2,0,0,370376
1305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,19.0,-1,2,7.7750,0,2,0,0,347069
1306,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-1.0,-1,1,8.1375,0,2,1,0,330935
1307,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,31.0,-1,2,20.5250,1,2,1,1,363291


In [10]:
X_train, X_test, y_train, y_test = train_test_split(final_df,df['survived'], test_size=0.20, random_state=42)

In [11]:
inpt=Input(2825,name='features')
x=BatchNormalization()(inpt)
x=Dense(32)(x)
x=BatchNormalization()(x)
x=Activation(activation='swish')(x)
x=Dense(1,activation='sigmoid',name='survived')(x)
model=Model(inputs=inpt,outputs=x)
model.compile(loss="binary_crossentropy",
              optimizer='adam',
              metrics=[tf.keras.metrics.AUC(num_thresholds=200, curve='ROC'),tf.keras.metrics.PrecisionAtRecall(1, num_thresholds=200, name=None, dtype=None),'accuracy'])
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
features (InputLayer)        [(None, 2825)]            0         
_________________________________________________________________
batch_normalization (BatchNo (None, 2825)              11300     
_________________________________________________________________
dense (Dense)                (None, 32)                90432     
_________________________________________________________________
batch_normalization_1 (Batch (None, 32)                128       
_________________________________________________________________
activation (Activation)      (None, 32)                0         
_________________________________________________________________
survived (Dense)             (None, 1)                 33        
Total params: 101,893
Trainable params: 96,179
Non-trainable params: 5,714
____________________________________________________

In [12]:
X_train_ds=tf.data.Dataset.from_tensor_slices((tf.constant(X_train.astype('float32')),tf.constant(y_train.astype('float32')))).batch(100).prefetch(1)
X_test_ds=tf.data.Dataset.from_tensor_slices((tf.constant(X_test.astype('float32')),tf.constant(y_test.astype('float32')))).batch(100).prefetch(1)

In [13]:
model.fit(X_train_ds,
          validation_data=X_test_ds,epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1a21641b130>

In [15]:
!py -m pip install shap

Collecting shap
  Using cached shap-0.39.0-cp38-cp38-win_amd64.whl (414 kB)
Collecting numba
  Using cached numba-0.52.0-cp38-cp38-win_amd64.whl (2.3 MB)
Collecting slicer==0.0.7
  Using cached slicer-0.0.7-py3-none-any.whl (14 kB)
Collecting cloudpickle
  Using cached cloudpickle-1.6.0-py3-none-any.whl (23 kB)
Collecting llvmlite<0.36,>=0.35.0
  Using cached llvmlite-0.35.0-cp38-cp38-win_amd64.whl (16.0 MB)
Installing collected packages: llvmlite, slicer, numba, cloudpickle, shap
Successfully installed cloudpickle-1.6.0 llvmlite-0.35.0 numba-0.52.0 shap-0.39.0 slicer-0.0.7


In [24]:
X_train

Unnamed: 0,features/boat_10,features/boat_11,features/boat_12,features/boat_13,features/boat_13 15,features/boat_13 15 B,features/boat_14,features/boat_15,features/boat_15 16,features/boat_16,...,features/ticket_WE/P 5735,features/age,features/body,features/embarked,features/fare,features/parch,features/pclass,features/sex,features/sibsp,features/ticket
772,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,62.0,-1,3,80.000000,0,0,1,0,113572
543,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,24.0,-1,2,27.000000,1,1,1,2,243847
289,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,23.0,-1,2,7.550000,0,2,1,0,2314
10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,20.0,-1,2,7.050000,0,2,0,0,3101307
147,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,21.0,-1,2,7.925000,0,2,0,0,23101280
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,56.0,-1,0,35.500000,0,0,0,0,13213
1130,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-1.0,-1,2,0.000000,0,0,0,0,112051
1294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-1.0,-1,0,7.229200,0,2,0,0,2629
860,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,23.0,-1,2,13.000000,0,1,0,0,233639
