# Titanic Competition

## Preparation

In [1]:
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from sklearn import preprocessing
import math

In [211]:
import numpy as np

## Data cleaning
### Import data

In [177]:
titanic = pd.read_csv('data/train.csv', index_col=0)

In [178]:
titanic.dtypes

Survived      int64
Pclass        int64
Name         object
Sex          object
Age         float64
SibSp         int64
Parch         int64
Ticket       object
Fare        float64
Cabin        object
Embarked     object
dtype: object

In [180]:
titanic['Sex'] = (titanic['Sex'] == 'male').astype('int')
# female = 0, male = 1

In [255]:
titanic.isna().sum()

Survived      0
Pclass        0
Name          0
Sex           0
Age           0
SibSp         0
Parch         0
Ticket        0
Fare          0
Cabin       687
Embarked      2
dtype: int64

In [256]:
# titanic.loc[titanic['Age'].isna(), 'Age'] = titanic['Age'].mean()
titanic['Age'].fillna(titanic['Age'].mean(), inplace=True)
titanic['Age'] = titanic['Age'] / 100

In [284]:
x_feature = ['Pclass', 'Sex', 'Age']
y_feature = 'Survived'

data_len = len(titanic)
train_len = math.floor(data_len * 0.8)

In [285]:
train_x = titanic.iloc[train_len:][x_feature]
train_y = titanic.iloc[train_len:][y_feature]

test_x = titanic.iloc[:train_len][x_feature]
test_y = titanic.iloc[:train_len][y_feature]

In [280]:
test_x

Unnamed: 0_level_0,Sex,Age
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,0.000022
2,0,0.000038
3,0,0.000026
4,0,0.000035
5,1,0.000035
...,...,...
708,1,0.000042
709,0,0.000022
710,1,0.000030
711,0,0.000024


In [287]:
model = keras.Sequential([
    layers.Dense(1, activation='sigmoid', input_shape=[3]),
    # layers.Dropout(0.2),
    # layers.BatchNormalization(),
    # layers.Dropout(0.2),
    # layers.BatchNormalization(),
    # layers.Dense(1, activation='softmax')
])
model.summary()

Model: "sequential_59"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_123 (Dense)            (None, 1)                 4         
Total params: 4
Trainable params: 4
Non-trainable params: 0
_________________________________________________________________


In [288]:
model.compile(
    optimizer='adam',
    loss='mae',
    metrics='accuracy'
)

In [289]:
model.fit(
    train_x, train_y,
    validation_data=(test_x, test_y),
    batch_size=100,
    epochs=12
)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<tensorflow.python.keras.callbacks.History at 0x7fe0cf09de50>

In [248]:
submission = pd.read_csv('data/train.csv')
submission['Sex'] = (submission['Sex'] == 'male').astype('int')
submission['Age'].fillna(submission['Age'].mean(), inplace=True)
submission['Age'] = submission['Age'] / 100

In [249]:
submission.head()
submission.isna().sum()

PassengerId      0
Pclass           0
Name             0
Sex              0
Age              0
SibSp            0
Parch            0
Ticket           0
Fare             1
Cabin          327
Embarked         0
dtype: int64

In [250]:
submission_y = model.predict(submission[x_feature])

In [251]:
def get_first_value (x):
    return (x[0] > 0.5).astype('int')
submission_y_clean = np.array([get_first_value(x) for x in submission_y])

In [252]:
submission_df = pd.DataFrame({
    'PassengerId': submission.PassengerId,
    'Survived': submission_y_clean
}).set_index('PassengerId')

In [253]:
submission_df.to_csv('data/sigmoid1_submission.csv')