In [1]:
import pandas as pd
import numpy as np

import seaborn as sns

In [2]:
data_train = pd.read_csv("./datasets/titanic/train.csv")
data_test = pd.read_csv("./datasets/titanic/test.csv")

In [3]:
def createTitle(df):
    df['Title'] = df['Name'].str.extract(r' ([A-Za-z]+)\.', expand=False)
    df['Title'] = df['Title'].replace(['Lady', 'Countess','Capt', 'Col', 'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
    df['Title'] = df['Title'].replace('Mlle', 'Miss')
    df['Title'] = df['Title'].replace('Ms', 'Miss')
    df['Title'] = df['Title'].replace('Mme', 'Mrs')
    
    return df

In [4]:
def titleMapping(df):
    title_mapping = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}
    df['Title'] = df['Title'].map(title_mapping)
    df['Title'] = df['Title'].fillna(0)
    
    return df

In [5]:
def dropColumns(df):
    df = df.drop(['Name','PassengerId'], axis=1)
    df = df.drop(['Ticket', 'Cabin'], axis=1)
    return df

In [6]:
def sexMapping(df):
    df['Sex'] = df['Sex'].map( {'female': 1, 'male': 0} ).astype(int)
    
    return df

In [7]:
def setAge(df):
    df.dropna(subset=['Age'], inplace=True)
    df['Age'] = df['Age'].astype(int)

    df.loc[df['Age'] <= 16, 'Age'] = 0
    df['Age'] = df.loc[(df['Age'] > 16) & df['Age'] <= 32, 'Age'] = 1
    df['Age'] = df.loc[(df['Age'] > 32) & df['Age'] <= 48, 'Age'] = 2
    df['Age'] = df.loc[(df['Age'] > 48) & df['Age'] <= 64, 'Age'] = 3
    df['Age'] = df.loc[df['Age'] > 64, 'Age'] = 4
    
    return df

In [8]:
def createAgeClass(df):
    df['Age_Class'] = df['Age']*df['Pclass']
    
    return df

In [9]:
def setEmbarked(df):
    df['Embarked'] = df['Embarked'].fillna('S')
    df['Embarked'] = df['Embarked'].map( {'S': 0, 'C': 1, 'Q':2} ).astype(int)
    
    return df

In [10]:
def setFare(df):
    df['Fare'] = df['Fare'].dropna()
    df.loc[df['Fare'] <= 7.91, 'Fare'] = 0
    df['Fare'] = df.loc[(df['Fare'] > 7.91) & df['Fare'] <= 14.454, 'Fare'] = 1
    df['Fare'] = df.loc[(df['Fare'] > 14.454) & df['Fare'] <= 31, 'Fare'] = 2
    df['Fare'] = df.loc[df['Fare'] > 31, 'Fare'] = 3
    
    return df

In [11]:
def dataPipeline(df):
    df = createTitle(df)
    df = titleMapping(df)
    df = dropColumns(df)
    df = sexMapping(df)
    df = setAge(df)
    df = createAgeClass(df)
    df = setEmbarked(df)
    df = setFare(df)
    
    return df

In [12]:
data_train = dataPipeline(data_train)
data_test = dataPipeline(data_test)

In [13]:
data_train.head(5)

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title,Age_Class
0,0,3,0,4,1,0,3,0,1,12
1,1,1,1,4,1,0,3,1,3,4
2,1,3,1,4,0,0,3,0,2,12
3,1,1,1,4,1,0,3,0,3,4
4,0,3,0,4,0,0,3,0,1,12


In [14]:
data_test.head(5)

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title,Age_Class
0,3,0,4,0,0,3,2,1,12
1,3,1,4,1,0,3,0,3,12
2,2,0,4,0,0,3,2,1,8
3,3,0,4,0,0,3,0,1,12
4,3,1,4,1,1,3,0,3,12


In [20]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)

2.1.0


In [27]:
def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=[len(data_test.keys())]),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.RMSprop(0.001)

  model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae', 'mse'])
  return model

In [28]:
model = build_model()

In [29]:
example_batch = data_test[:10]
example_result = model.predict(example_batch)
example_result

array([[1.2269244 ],
       [1.7444925 ],
       [0.83349746],
       [1.4412856 ],
       [1.7008339 ],
       [1.4412856 ],
       [1.3122013 ],
       [1.1414022 ],
       [1.4057767 ],
       [1.9465573 ]], dtype=float32)