## ライブラリのインポート

In [None]:
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from numpy.random import seed

## データの読み込み

In [None]:
parent_dir = "./titanic"
titanic_train = pd.read_csv(os.path.join(parent_dir, "train.csv"))
titanic_test = pd.read_csv(os.path.join(parent_dir, "test.csv"))

In [None]:
titanic_train.head(3)

In [None]:
titanic_test.head(3)

In [None]:
# データを数値形式に置き換え
titanic_train["isMale"] = titanic_train["Sex"].map({"male": 1, "female": 0})
titanic_test["isMale"] = titanic_test["Sex"].map({"male": 1, "female": 0})

cols = ["Pclass", "isMale", "Age", "SibSp", "Parch", "Fare"]

train_X = titanic_train[cols]
train_Y = titanic_train["Survived"]

test_X = titanic_test[cols]

## 欠損値の補完

In [None]:
train_X.isnull().any(axis=0)

In [None]:
for c in train_X.columns[train_X.isnull().any(axis=0)]:
    train_X[c] = train_X[c].fillna(train_X[c].mean())

In [None]:
test_X.isnull().any(axis=0)

In [None]:
for c in test_X.columns[test_X.isnull().any(axis=0)]:
    test_X[c] = test_X[c].fillna(test_X[c].mean())

In [None]:
train_X["Age"] = train_X["Age"].fillna(train_X["Age"].mean())

## データの再確認

In [None]:
train_X.head(3)

In [None]:
train_Y.head(3)

In [None]:
test_X.head(3)

## ニューラルネットワークを用いた学習

In [None]:
scaler = StandardScaler()
for c in train_X.columns:
    train_X[c] = train_X[c].astype('float64')
    train_X[c] = scaler.fit_transform(train_X[c].values.reshape(-1,1))

for c in test_X.columns:
    test_X[c] = test_X[c].astype('float64')
    test_X[c] = scaler.fit_transform(test_X[c].values.reshape(-1,1))

In [None]:
def create_model():
    model = Sequential()

    model.add(Dense(len(train_X.columns), input_dim=train_X.shape[1], activation='relu'))

    model.add(Dense(16384, activation='relu'))

    # output
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy'])

    return model

In [None]:
model = create_model()
print(model.summary())

In [None]:
history = model.fit(train_X, train_Y, epochs=100, batch_size=10, validation_split=0.2, verbose=1, callbacks=[EarlyStopping(monitor='val_loss',patience=10)])

In [None]:
import seaborn as sns

plt.style.use('seaborn-dark')
plt.figure(figsize=(8,5))

epoch = history.epoch
train_acc = history.history["accuracy"]
train_loss = history.history["loss"]
val_acc = history.history["val_accuracy"]
val_loss = history.history["val_loss"]

plt.plot(epoch, train_acc, label="train acc", c="#ff7f0e")
plt.plot(epoch, val_acc, marker="o", lw=0, label="val acc", c="#ff7f0e")

plt.plot(epoch, train_loss, label="train loss", c="#1f77b4")
plt.plot(epoch, val_loss, marker="o", lw=0, label="val loss", c="#1f77b4")

plt.ylim(0, 1)
plt.legend()
plt.grid()

plt.xlabel("Epochs")
plt.ylabel("Accuracy & Loss")
plt.show()

## 生存予測と提出ファイル出力

In [None]:
p = pd.DataFrame(model.predict(test_X), columns=["Survived"])
p["Survived"] = p["Survived"].apply(lambda x: round(x,0)).astype('int')
p

In [None]:
df_out = pd.DataFrame({"PassengerId": titanic_test["PassengerId"], "Survived": p["Survived"]})
df_out["PassengerId"] = df_out["PassengerId"].astype('int')
df_out.head()

In [None]:
df_out.to_csv("submission.csv", index=False)