# ファイル読み込み

In [1]:
import pandas as pd
import numpy as np

# https://www.kaggle.com/c/titanic/data より
train = pd.read_csv("./data/titanic/train.csv")
test = pd.read_csv("./data/titanic/test.csv")

np.random.seed(666)


# データ確認

In [2]:
train.info()
print("-------------------------------------------")
print(train.isnull().sum())
print("-------------------------------------------")
print(test.isnull().sum())
print("-------------------------------------------")
train.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId    891 non-null int64
Survived       891 non-null int64
Pclass         891 non-null int64
Name           891 non-null object
Sex            891 non-null object
Age            714 non-null float64
SibSp          891 non-null int64
Parch          891 non-null int64
Ticket         891 non-null object
Fare           891 non-null float64
Cabin          204 non-null object
Embarked       889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.6+ KB
-------------------------------------------
PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64
-------------------------------------------
PassengerId      0
Pclass           0
Name             0
Sex              0
Age       

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


# 不要データ削除 欠損値補完

In [3]:

# train_data = train_data.drop(["PassengerId", "Name", "Cabin", "Embarked", "Ticket", "Fare"], axis=1)
del train['PassengerId']
del train['Name']
del train['Ticket']
del train['Cabin']
del train['Embarked']

del test['PassengerId']
del test['Name']
del test['Ticket']
del test['Cabin']
del test['Embarked']

# 欠損値を中央値で埋める
train.Age = train.Age.fillna(train.Age.median())
# 0, 1に変換
train.Sex = train.Sex.replace(['male', 'female'], [0, 1])
# train.Embarked = train.Embarked.fillna("S")
# train.Embarked = train.Embarked.replace(['C', 'S', 'Q'], [0, 1, 2])

test.Age = test.Age.fillna(test.Age.median())
test.Sex = test.Sex.replace(['male', 'female'], [0, 1])
# 欠損値を中央値で埋める
test.Fare = test.Fare.fillna(test.Fare.median())
# test.Embarked = test.Embarked.replace(['C', 'S', 'Q'], [0, 1, 2])
train.head()


Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare
0,0,3,0,22.0,1,0,7.25
1,1,1,1,38.0,1,0,71.2833
2,1,3,1,26.0,0,0,7.925
3,1,1,1,35.0,1,0,53.1
4,0,3,0,35.0,0,0,8.05


In [4]:
import keras
from keras.utils.np_utils import to_categorical
# 説明変数と目的変数に分割
y_train = train["Survived"].values

COLUMNS = ["Pclass", "Sex", "Age", "SibSp", "Parch","Fare"]
x_train = train[COLUMNS].values

x_test = test[COLUMNS].values



Using TensorFlow backend.


# モデル作成

In [5]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout

# モデル作成 正直適当
model = Sequential()
model.add(Dense(32, input_shape=(len(COLUMNS),), activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(
    x_train, 
    y_train, 
    epochs=30, 
    batch_size=1, 
    verbose=1)



Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0xb290c77b8>

# Submission用のデータ出力

In [6]:
predictions = model.predict(x_test)

# テスト値を再読み込みして，SVMでクラス分類したカラムを追加
df_out = pd.read_csv("./data/titanic/test.csv")
df_out["Survived"] = np.round(predictions).astype(np.int)

# outputディレクトリに出力する
df_out[["PassengerId","Survived"]].to_csv("submission.csv",index=False)