In [3]:
# -*- coding: utf-8 -*-
# Tensorflow 2.x

import random
import numpy as np
import pandas as pd
import tensorflow as tf

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer
from sklearn import preprocessing

In [5]:
# Parameters
learning_rate = 0.01 # 学習率 （※高いとcostの収束が早まる）
training_epochs = 10 # 世代数（※学習全体をこのエポック数で区切り、区切りごとにcostを表示する）
batch_size = 100     # 訓練単位（※学習1回ごと( sess.run()ごと )に訓練データをいくつ利用するか）
display_step = 1     # 1なら毎エポックごとにcostを表示
train_size = 800     # 全データの中でいくつ訓練データに回すか
step_size = 1000     # 何ステップ学習するか

# Network Parameters
n_hidden_1 = 64      # 隠れ層1のユニットの数
n_hidden_2 = 64      # 隠れ層2のユニットの数
n_input = 8          # 与える変数の数
n_classes = 2        # 分類するクラスの数 今回は生き残ったか否かなので2



In [8]:
train = pd.read_csv('./data/train.csv')
test = pd.read_csv('./data/test.csv')

def extract_cabin_type(x):
  cabin = x['Cabin']
  if isinstance(cabin, str) and cabin[0] != 'T':
    return cabin[0]
  else:
    return np.nan   
train['CabinType'] = train.apply(extract_cabin_type, axis=1)
test['CabinType'] = test.apply(extract_cabin_type, axis=1)

def male_female_child(x):
  age = x['Age']
  sex = x['Sex']
  if age <= 15:
    return 'child'
  else:
    return sex
train['PersonType'] = train.apply(male_female_child,axis=1)
test['PersonType'] = test.apply(male_female_child,axis=1)


#データ整形 train
train["Embarked"] = train["Embarked"].replace("C", 0).replace("Q", 1).replace("S", 2)
train["CabinType"] = train["CabinType"].replace("A", 0).replace("B", 1).replace("C", 2).replace("D", 3).replace("E", 4).replace("F", 5).replace("G", 6)
train["Sex"] = train["Sex"].replace("male", 0).replace("female", 1)
train["PersonType"] = train["PersonType"].replace("male", 0).replace("female", 1).replace("child", 2)

#データ整形 test
test["Embarked"] = test["Embarked"].replace("C", 0).replace("Q", 1).replace("S", 2)
test["CabinType"] = test["CabinType"].replace("A", 0).replace("B", 1).replace("C", 2).replace("D", 3).replace("E", 4).replace("F", 5).replace("G", 6)
test["Sex"] = test["Sex"].replace("male", 0).replace("female", 1)
test["PersonType"] = test["PersonType"].replace("male", 0).replace("female", 1).replace("child", 2)

#データ補完
train["Embarked"] = train["Embarked"].fillna(2)
train["CabinType"] = train["CabinType"].fillna(-1)
age_mean = pd.concat([train["Age"], test["Age"]]).mean()
fare_mean = pd.concat([train["Fare"], test["Fare"]]).mean()
train["Age"] = train["Age"].fillna(age_mean)
train["Fare"] = train["Fare"].fillna(fare_mean)

test["Embarked"] = test["Embarked"].fillna(2)
test["CabinType"] = test["CabinType"].fillna(-1)
test["Age"] = test["Age"].fillna(age_mean)
test["Fare"] = test["Fare"].fillna(fare_mean)

# Cabin は使わない。
print('訓練データの欠損値の個数\n', train.isnull().sum())
print('-' * 40)
print('テストデータの欠損値の個数\n', test.isnull().sum())

訓練データの欠損値の個数
 PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age              0
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         0
CabinType        0
PersonType       0
dtype: int64
----------------------------------------
テストデータの欠損値の個数
 PassengerId      0
Pclass           0
Name             0
Sex              0
Age              0
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          327
Embarked         0
CabinType        0
PersonType       0
dtype: int64


In [17]:
# トレーニングデータ
x_train = train.loc[:, ['Age', 'Pclass', 'Sex', 'SibSp', 'Parch', 'Fare', 'Cabin', 'Embarked']].values
y_train = train.loc[:, ['Survived']].values

# print(x_train)
# print(y_train)

[[22.0 3 0 ... 7.25 nan 2.0]
 [38.0 1 1 ... 71.2833 'C85' 0.0]
 [26.0 3 1 ... 7.925 nan 2.0]
 ...
 [29.881137667304014 3 1 ... 23.45 nan 2.0]
 [26.0 1 0 ... 30.0 'C148' 0.0]
 [32.0 3 0 ... 7.75 nan 1.0]]
[[0]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 