# Titanic 

Take up the Titanic project again, and insert a deep-learning into it to see if you can improve your results!

https://www.kaggle.com/c/titanic

![](https://plus.lesoir.be/sites/default/files/dpistyles_v2/ena_16_9_extra_big/2019/04/12/node_218060/26251893/public/2019/04/12/B9719141564Z.1_20190412142710_000+GO7DBN7S7.1-0.jpg?itok=UrOcJg8T)

In [2]:
import pandas as pd
import numpy as np

In [4]:
titanic_train = pd.read_csv('train.csv')
print(titanic_train.head(1))

   PassengerId  Survived  Pclass                     Name   Sex   Age  SibSp  \
0            1         0       3  Braund, Mr. Owen Harris  male  22.0      1   

   Parch     Ticket  Fare Cabin Embarked  
0      0  A/5 21171  7.25   NaN        S  


In [5]:
titanic_test = pd.read_csv('test.csv')
print(titanic_test.head(1))

   PassengerId  Pclass              Name   Sex   Age  SibSp  Parch  Ticket  \
0          892       3  Kelly, Mr. James  male  34.5      0      0  330911   

     Fare Cabin Embarked  
0  7.8292   NaN        Q  


In [6]:
dataset = [titanic_train, titanic_test]

In [7]:
for data in dataset:
    data.drop(['Cabin', 'Ticket', 'PassengerId'], axis=1, inplace=True)

In [8]:
for data in dataset:
    data['Title'] = data.Name.str.extract(' ([A-Za-z]+)\.', expand=False)

    data['Title'] = data['Title'].replace(['Lady', 'Countess','Capt', 'Col',\
    'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')

    data['Title'] = data['Title'].replace('Mlle', 'Miss')
    data['Title'] = data['Title'].replace('Ms', 'Miss')
    data['Title'] = data['Title'].replace('Mme', 'Mrs')

    title_mapping = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}
    data['Title'] = data['Title'].map(title_mapping)
    data['Title'] = data['Title'].fillna(0)
    
    data.drop(['Name'], axis=1, inplace=True)

In [9]:
from sklearn.preprocessing import LabelEncoder

# Set up the LabelEncoder object
enc = LabelEncoder()
for data in dataset:
    # Apply the encoding to the "Accessible" column
    data['Sex'] = enc.fit_transform(data['Sex'])

In [10]:
guess_ages = np.zeros((2,3))
for data in dataset:
    for i in range(0, 2):
        for j in range(0, 3):
            guess_df = data[(data['Sex'] == i) & \
                                  (data['Pclass'] == j+1)]['Age'].dropna()

            age_guess = guess_df.mean()

            # round float number
            guess_ages[i,j] = int( age_guess/0.5 + 0.5 ) * 0.5
            
    for i in range(0, 2):
        for j in range(0, 3):
            data.loc[ (data.Age.isnull()) & (data.Sex == i) & (data.Pclass == j+1),\
                    'Age'] = guess_ages[i,j]

    data['Age'] = data['Age'].astype(int)

In [11]:
for data in dataset:    
    data.loc[ data['Age'] <= 16, 'Age'] = 0
    data.loc[(data['Age'] > 16) & (data['Age'] <= 32), 'Age'] = 1
    data.loc[(data['Age'] > 32) & (data['Age'] <= 48), 'Age'] = 2
    data.loc[(data['Age'] > 48) & (data['Age'] <= 64), 'Age'] = 3
    data.loc[ data['Age'] > 64, 'Age']

In [12]:
for data in dataset:
    data['FamilySize'] = data['SibSp'] + data['Parch'] + 1

for data in dataset:
    data['IsAlone'] = 0
    data.loc[data['FamilySize'] == 1, 'IsAlone'] = 1
   
    data = data.drop(['Parch', 'SibSp', 'FamilySize'], axis=1, inplace=True)

In [13]:
for data in dataset:
    freq_port = data.Embarked.dropna().mode()[0]
    data['Embarked'] = data['Embarked'].fillna(freq_port)
    data['Embarked'] = data['Embarked'].map( {'S': 0, 'C': 1, 'Q': 2} ).astype(int)

In [14]:
for data in dataset:
    data['Fare'].fillna(data['Fare'].dropna().median(), inplace=True)
        
    data.loc[ data['Fare'] <= 7.91, 'Fare'] = 0
    data.loc[(data['Fare'] > 7.91) & (data['Fare'] <= 14.454), 'Fare'] = 1
    data.loc[(data['Fare'] > 14.454) & (data['Fare'] <= 31), 'Fare']   = 2
    data.loc[ data['Fare'] > 31, 'Fare'] = 3
    data['Fare'] = data['Fare'].astype(int)

In [25]:
from keras.utils import to_categorical

X_train = titanic_train.drop("Survived", axis=1).values
y_train = to_categorical(titanic_train["Survived"])
X_test  = titanic_test.values

# Save the number of columns in predictors: n_cols
n_cols = X_train.shape[1]
n_cats = y_train.shape[1]

In [82]:
# Import necessary modules
import keras
from keras.layers import Dense
from keras.models import Sequential

# Create the model: model
model = Sequential()

# Add the first hidden layer
model.add(Dense(100, activation='relu', input_shape=(n_cols,)))

# Add the second hidden layer
model.add(Dense(100, activation='relu'))
model.add(Dense(100, activation='relu'))

# Add the output layer
model.add(Dense(n_cats, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Fit the model
model.fit(X_train, y_train, epochs=60)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<tensorflow.python.keras.callbacks.History at 0x7ffa05cacc50>

In [78]:
# Calculate predictions: predictions
predictions = model.predict_classes(X_test)

In [79]:
titanic_test = pd.read_csv('test.csv')

In [80]:
result = pd.DataFrame(predictions, index=titanic_test.PassengerId, columns=['survived'])
result.head()

Unnamed: 0_level_0,survived
PassengerId,Unnamed: 1_level_1
892,0
893,0
894,0
895,0
896,1


In [81]:
result.to_csv('./results7.csv')

In [44]:
ynew = model.predict_classes(X_test)

In [49]:
ynew = (model.predict(X_test) > 0.5).astype("int32")

In [50]:
ynew

array([[1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [1, 0],
       [1, 1],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [1, 0],
       [1,