In [1]:
import pandas as pd
import numpy as np

import seaborn as sns

In [2]:
data_train = pd.read_csv("./datasets/titanic/train.csv")
data_test = pd.read_csv("./datasets/titanic/test.csv")

In [3]:
def createTitle(df):
    df['Title'] = df['Name'].str.extract(r' ([A-Za-z]+)\.', expand=False)
    df['Title'] = df['Title'].replace(['Lady', 'Countess','Capt', 'Col', 'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
    df['Title'] = df['Title'].replace('Mlle', 'Miss')
    df['Title'] = df['Title'].replace('Ms', 'Miss')
    df['Title'] = df['Title'].replace('Mme', 'Mrs')
    
    return df

In [4]:
def titleMapping(df):
    title_mapping = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}
    df['Title'] = df['Title'].map(title_mapping)
    df['Title'] = df['Title'].fillna(0)
    
    return df

In [5]:
def dropColumns(df):
    df = df.drop(['Name','PassengerId'], axis=1)
    df = df.drop(['Ticket', 'Cabin'], axis=1)
    return df

In [6]:
def sexMapping(df):
    df['Sex'] = df['Sex'].map( {'female': 1, 'male': 0} ).astype(int)
    
    return df

In [7]:
def setAge(df):
    df.dropna(subset=['Age'], inplace=True)
    df['Age'] = df['Age'].astype(int)

    df.loc[df['Age'] <= 16, 'Age'] = 0
    df['Age'] = df.loc[(df['Age'] > 16) & df['Age'] <= 32, 'Age'] = 1
    df['Age'] = df.loc[(df['Age'] > 32) & df['Age'] <= 48, 'Age'] = 2
    df['Age'] = df.loc[(df['Age'] > 48) & df['Age'] <= 64, 'Age'] = 3
    df['Age'] = df.loc[df['Age'] > 64, 'Age'] = 4
    
    return df

In [8]:
def createAgeClass(df):
    df['Age_Class'] = df['Age']*df['Pclass']
    
    return df

In [9]:
def setEmbarked(df):
    df['Embarked'] = df['Embarked'].fillna('S')
    df['Embarked'] = df['Embarked'].map( {'S': 0, 'C': 1, 'Q':2} ).astype(int)
    
    return df

In [10]:
def setFare(df):
    df['Fare'] = df['Fare'].dropna()
    df.loc[df['Fare'] <= 7.91, 'Fare'] = 0
    df['Fare'] = df.loc[(df['Fare'] > 7.91) & df['Fare'] <= 14.454, 'Fare'] = 1
    df['Fare'] = df.loc[(df['Fare'] > 14.454) & df['Fare'] <= 31, 'Fare'] = 2
    df['Fare'] = df.loc[df['Fare'] > 31, 'Fare'] = 3
    
    return df

In [11]:
def dataPipeline(df):
    df = createTitle(df)
    df = titleMapping(df)
    df = dropColumns(df)
    df = sexMapping(df)
    df = setAge(df)
    df = createAgeClass(df)
    df = setEmbarked(df)
    df = setFare(df)
    
    return df

In [12]:
data_train = dataPipeline(data_train)
data_test = dataPipeline(data_test)

In [19]:
data_train.head(10)

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title,Age_Class
0,0,3,0,4,1,0,3,0,1,12
1,1,1,1,4,1,0,3,1,3,4
2,1,3,1,4,0,0,3,0,2,12
3,1,1,1,4,1,0,3,0,3,4
4,0,3,0,4,0,0,3,0,1,12
6,0,1,0,4,0,0,3,0,1,4
7,0,3,0,4,3,1,3,0,4,12
8,1,3,1,4,0,2,3,0,3,12
9,1,2,1,4,1,0,3,1,3,8
10,1,3,1,4,1,1,3,0,2,12


In [15]:
import tensorflow as tf

from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

In [16]:
train, test = train_test_split(data_train, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

456 train examples
115 validation examples
143 test examples


In [22]:
# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('Survived')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds


In [23]:
train_ds = df_to_dataset(train)
val_ds = df_to_dataset(val, shuffle=False)
test_ds = df_to_dataset(test, shuffle=False)

In [25]:
data_train.columns.tolist()

['Survived',
 'Pclass',
 'Sex',
 'Age',
 'SibSp',
 'Parch',
 'Fare',
 'Embarked',
 'Title',
 'Age_Class']

In [27]:
feature_columns = []

# numeric cols
for header in ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Title', 'Age_Class']:
  feature_columns.append(feature_column.numeric_column(header))

In [28]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [56]:
batch_size = 32
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [60]:
model = tf.keras.Sequential([
  feature_layer,
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='rmsprop',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(train_ds,
          validation_data=val_ds,
          epochs=100)

Train for 15 steps, validate for 4 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100


Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x1e8ff833848>