In [1]:
# -*- coding: utf-8 -*-
# Tensorflow 2.x

import random
import numpy as np
import pandas as pd
import tensorflow as tf


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer
from sklearn import preprocessing

In [3]:
train = pd.read_csv('./data/train.csv')
test = pd.read_csv('./data/test.csv')

def extract_cabin_type(x):
  cabin = x['Cabin']
  if isinstance(cabin, str) and cabin[0] != 'T':
    return cabin[0]
  else:
    return np.nan   
train['CabinType'] = train.apply(extract_cabin_type, axis=1)
test['CabinType'] = test.apply(extract_cabin_type, axis=1)

def male_female_child(x):
  age = x['Age']
  sex = x['Sex']
  if age <= 15:
    return 'child'
  else:
    return sex
train['PersonType'] = train.apply(male_female_child,axis=1)
test['PersonType'] = test.apply(male_female_child,axis=1)


#データ整形 train
train["Embarked"] = train["Embarked"].replace("C", 0).replace("Q", 1).replace("S", 2)
train["CabinType"] = train["CabinType"].replace("A", 0).replace("B", 1).replace("C", 2).replace("D", 3).replace("E", 4).replace("F", 5).replace("G", 6)
train["Sex"] = train["Sex"].replace("male", 0).replace("female", 1)
train["PersonType"] = train["PersonType"].replace("male", 0).replace("female", 1).replace("child", 2)

#データ整形 test
test["Embarked"] = test["Embarked"].replace("C", 0).replace("Q", 1).replace("S", 2)
test["CabinType"] = test["CabinType"].replace("A", 0).replace("B", 1).replace("C", 2).replace("D", 3).replace("E", 4).replace("F", 5).replace("G", 6)
test["Sex"] = test["Sex"].replace("male", 0).replace("female", 1)
test["PersonType"] = test["PersonType"].replace("male", 0).replace("female", 1).replace("child", 2)

#データ補完
train["Embarked"] = train["Embarked"].fillna(2)
train["CabinType"] = train["CabinType"].fillna(-1)
age_mean = pd.concat([train["Age"], test["Age"]]).mean()
fare_mean = pd.concat([train["Fare"], test["Fare"]]).mean()
train["Age"] = train["Age"].fillna(age_mean)
train["Fare"] = train["Fare"].fillna(fare_mean)

test["Embarked"] = test["Embarked"].fillna(2)
test["CabinType"] = test["CabinType"].fillna(-1)
test["Age"] = test["Age"].fillna(age_mean)
test["Fare"] = test["Fare"].fillna(fare_mean)

# Cabin は使わない。
print('訓練データの欠損値の個数\n', train.isnull().sum())
print('-' * 40)
print('テストデータの欠損値の個数\n', test.isnull().sum())

訓練データの欠損値の個数
 PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age              0
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         0
CabinType        0
PersonType       0
dtype: int64
----------------------------------------
テストデータの欠損値の個数
 PassengerId      0
Pclass           0
Name             0
Sex              0
Age              0
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          327
Embarked         0
CabinType        0
PersonType       0
dtype: int64


In [4]:
# トレーニングデータ
x_spl = train.loc[:, ['Age', 'Pclass', 'PersonType', 'SibSp', 'Parch', 'Fare', 'CabinType', 'Embarked']].values
x_t = train.loc[:, ['Age', 'Pclass', 'PersonType', 'SibSp', 'Parch', 'Fare', 'CabinType', 'Embarked']].values
y_spl = train.loc[:, ['Survived']].values

x_train, x_test, y_train, y_test = train_test_split(x_spl, y_spl, test_size=0.25,random_state=32)
# print(x_train)
# print(y_train)

In [5]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.utils import np_utils
from keras.layers.core import Dropout
from keras.layers.normalization import BatchNormalization


# Network Parameters
n_hidden_1 = 64      # 隠れ層1のユニットの数
n_hidden_2 = 64      # 隠れ層2のユニットの数
n_input = x_train[0:1].size          # 与える変数の数
n_classes = 2        # 分類するクラスの数 今回は生き残ったか否かなので2
dropout=0.5
act="relu"
opt="adam"

model = Sequential()

# 隠れ層1
model.add(Dense(input_dim=n_input, units=n_hidden_1))
model.add(BatchNormalization())
model.add(Activation(act))
model.add(Dropout(dropout))
# 隠れ層2
model.add(Dense(units=n_hidden_２))
model.add(BatchNormalization())
model.add(Activation(act))
model.add(Dropout(dropout))
# 出力層
model.add(Dense(units=1))
model.add(Activation("sigmoid"))
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

In [6]:
# 学習
fit = model.fit(x_train.astype('float32'), y_train.astype('float32'), epochs=25, batch_size=16, verbose=2)


Epoch 1/25
42/42 - 1s - loss: 0.7249 - accuracy: 0.5868
Epoch 2/25
42/42 - 0s - loss: 0.6770 - accuracy: 0.6362
Epoch 3/25
42/42 - 0s - loss: 0.6936 - accuracy: 0.6497
Epoch 4/25
42/42 - 0s - loss: 0.6774 - accuracy: 0.6632
Epoch 5/25
42/42 - 0s - loss: 0.6710 - accuracy: 0.6557
Epoch 6/25
42/42 - 0s - loss: 0.6457 - accuracy: 0.6841
Epoch 7/25
42/42 - 0s - loss: 0.6331 - accuracy: 0.6632
Epoch 8/25
42/42 - 0s - loss: 0.6264 - accuracy: 0.6901
Epoch 9/25
42/42 - 0s - loss: 0.5855 - accuracy: 0.6961
Epoch 10/25
42/42 - 0s - loss: 0.5846 - accuracy: 0.6931
Epoch 11/25
42/42 - 0s - loss: 0.5813 - accuracy: 0.7081
Epoch 12/25
42/42 - 0s - loss: 0.5744 - accuracy: 0.7036
Epoch 13/25
42/42 - 0s - loss: 0.5535 - accuracy: 0.7216
Epoch 14/25
42/42 - 0s - loss: 0.5768 - accuracy: 0.7111
Epoch 15/25
42/42 - 0s - loss: 0.5515 - accuracy: 0.7246
Epoch 16/25
42/42 - 0s - loss: 0.5541 - accuracy: 0.7111
Epoch 17/25
42/42 - 0s - loss: 0.5656 - accuracy: 0.7470
Epoch 18/25
42/42 - 0s - loss: 0.5280 - 

In [7]:
y_test_proba = model.predict(x_test)
y_tmp = np.round(y_test_proba).astype(int)

df_output = pd.concat([y_test, pd.DataFrame(y_tmp, columns=['Survived'])], axis=1)

df_output.to_csv('titanic_res.csv', index=False)

TypeError: cannot concatenate object of type '<class 'numpy.ndarray'>'; only Series and DataFrame objs are valid