In [323]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/titanic/train.csv
/kaggle/input/titanic/test.csv
/kaggle/input/titanic/gender_submission.csv


In [324]:
train_raw = pd.read_csv('/kaggle/input/titanic/train.csv')

train_raw.head(5)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [325]:
import category_encoders as ce

def make_nice_features(raw_data):
    data = raw_data.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'])
    data['Sex'] = data['Sex'].map({'female': 1, 'male': 0})
    age_median = data['Age'].median()
    data['Age'] = data['Age'].fillna(value=age_median)
    data['Age'] = data['Age']/100
    data['Fare'] = data['Fare'].fillna(value=50/data['Pclass'])
    data['Fare'] = data['Fare']/100
    data['SibSp'] = data['SibSp']/10
    data['Parch'] = data['Parch']/10
    data['Embarked'] = data['Embarked'].fillna('S')
    data['Pclass'] = 'class ' + data['Pclass'].astype(str)
    encoder = ce.OneHotEncoder(cols=['Pclass', 'Embarked'])
    data = encoder.fit_transform(data)
    return data

cleaned_features = make_nice_features(train_raw)
print(cleaned_features.head(5))

   Survived  Pclass_1  Pclass_2  Pclass_3  Sex   Age  SibSp  Parch      Fare  \
0         0         1         0         0    0  0.22    0.1    0.0  0.072500   
1         1         0         1         0    1  0.38    0.1    0.0  0.712833   
2         1         1         0         0    1  0.26    0.0    0.0  0.079250   
3         1         0         1         0    1  0.35    0.1    0.0  0.531000   
4         0         1         0         0    0  0.35    0.0    0.0  0.080500   

   Embarked_1  Embarked_2  Embarked_3  
0           1           0           0  
1           0           1           0  
2           1           0           0  
3           1           0           0  
4           1           0           0  


In [326]:
X = cleaned_features.drop(columns=['Survived'])
y = cleaned_features['Survived']

In [327]:
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(
    X, y, test_size=0.25, random_state=125
)

In [328]:
from tensorflow import keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import RMSprop

model = keras.Sequential([
    Dense(80, activation="sigmoid", name="layer1"),
    Dropout(0.05),
    Dense(55, activation='relu'),
    Dropout(0.05),
    Dense(25, activation='relu'),
    Dense(1, name="layer2", activation='sigmoid')
])

model.compile(loss='binary_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

In [329]:
history = model.fit(train_x, train_y,
                    batch_size=4,
                    epochs=30,
                    verbose=2,
                    validation_data=(test_x, test_y))

Epoch 1/30
167/167 - 2s - 12ms/step - accuracy: 0.6287 - loss: 0.6525 - val_accuracy: 0.6996 - val_loss: 0.6069
Epoch 2/30
167/167 - 0s - 2ms/step - accuracy: 0.7051 - loss: 0.5804 - val_accuracy: 0.7534 - val_loss: 0.5686
Epoch 3/30
167/167 - 0s - 3ms/step - accuracy: 0.7485 - loss: 0.5328 - val_accuracy: 0.7892 - val_loss: 0.4919
Epoch 4/30
167/167 - 1s - 3ms/step - accuracy: 0.7575 - loss: 0.5143 - val_accuracy: 0.7578 - val_loss: 0.5294
Epoch 5/30
167/167 - 0s - 3ms/step - accuracy: 0.7814 - loss: 0.4861 - val_accuracy: 0.7713 - val_loss: 0.4804
Epoch 6/30
167/167 - 0s - 3ms/step - accuracy: 0.7590 - loss: 0.4947 - val_accuracy: 0.7892 - val_loss: 0.4744
Epoch 7/30
167/167 - 0s - 2ms/step - accuracy: 0.7889 - loss: 0.4814 - val_accuracy: 0.7758 - val_loss: 0.4863
Epoch 8/30
167/167 - 0s - 2ms/step - accuracy: 0.7859 - loss: 0.4751 - val_accuracy: 0.7848 - val_loss: 0.4658
Epoch 9/30
167/167 - 0s - 3ms/step - accuracy: 0.7964 - loss: 0.4614 - val_accuracy: 0.7713 - val_loss: 0.5029


Try with competition data

In [330]:
test_raw = pd.read_csv('/kaggle/input/titanic/test.csv')

test_passenger_id = test_raw['PassengerId']

test_cleaned = make_nice_features(test_raw)

In [333]:
survived = model.predict(test_cleaned, batch_size=4).round().astype(int)

result = pd.DataFrame({'PassengerId': test_passenger_id, 'Survived': survived.flatten()})


result.to_csv('/kaggle/working/submission.csv', index=False)

[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
