In [1]:
#khn0206@gmail.com

import tensorflow as tf
import pandas as pd
import numpy as np
import math

train_df = pd.read_csv('data/train.csv')
test_df = pd.read_csv('data/test.csv')
result_df = pd.read_csv('data/test.csv', usecols=['PassengerId'])

train_df = train_df.drop(['PassengerId', 'Name', 'Ticket'], axis=1)
test_df = test_df.drop(['PassengerId', 'Name', 'Ticket'], axis=1)

In [2]:
def convertDataToNumber(df, namelist):
    for name in namelist:
        name_class = df[name].unique()
        pairs = {}
        for index in range(len(name_class)):
            pairs[name_class[index]] = index
        for key, value in pairs.items():
            df[name] = df[name].replace(key, value)
    return df

def preprocessingData(df):
    
    age_mean_male = int(df[['Sex','Age']].loc[df['Sex'] == 'male'].mean())
    age_mean_female = int(df[['Sex','Age']].loc[df['Sex'] == 'female'].mean())

    for index, row in df.iterrows():
        if pd.isnull(row['Age']) and row['Sex'] == 'male':
            df.set_value(index, 'Age', age_mean_male)
        elif pd.isnull(row['Age']) and row['Sex'] == 'female':
            df.set_value(index, 'Age', age_mean_female)
    
    df['Embarked'] = df['Embarked'].fillna('S')
    
    df['Cabin'] = df['Cabin'].str[0]
    df['Cabin'] = df['Cabin'].fillna('N')
    
    df = convertDataToNumber(df, ['Sex', 'Embarked', 'Cabin'])
    
    df_normalized = (df - df.mean()) / (df.max() - df.min())
    
    return df_normalized
    
def getBatch(df, index=None):
    batch = df.sample(50)
    
    batch_x = batch[['Pclass', 'Sex', 'Age', 'Age', 'SibSp', 'Parch', 'Fare', 'Cabin', 'Embarked']]
    batch_y = pd.get_dummies(batch.pop('Survived').values).as_matrix()
    
    return batch_x, batch_y

In [3]:
train_df = preprocessingData(train_df)
test_df = preprocessingData(test_df)



In [4]:
X = tf.placeholder('float32', [None, 9])
X_2d  = tf.reshape(X, [-1, 3, 3, 1])
Y = tf.placeholder('float32', [None, 2])

W1 = tf.get_variable("W1", shape=[2, 2, 1, 32], initializer=tf.contrib.layers.xavier_initializer())

L1 = tf.nn.conv2d(X_2d, W1, strides=[1, 1, 1, 1], padding='SAME')
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='VALID')

W2 = tf.get_variable("W2", shape=[1, 1, 32, 64], initializer=tf.contrib.layers.xavier_initializer())

L2 = tf.nn.conv2d(L1, W2, strides=[1, 1, 1, 1], padding='SAME')
L2 = tf.nn.relu(L2)
L2 = tf.nn.max_pool(L2, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1], padding='SAME')
L2 = tf.reshape(L2, [-1, 2*2*64])

W3 = tf.get_variable("W3", shape=[2*2*64, 2], initializer=tf.contrib.layers.xavier_initializer())
hypothesis = tf.matmul(L2, W3)

# cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis + 1e-10), reduction_indices=1))
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))

optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)

In [5]:
sess = tf.Session()

sess.run(tf.global_variables_initializer())

training_epochs = 100
total_batch = 100

for epoch in range(training_epochs):
    avg_cost = 0    
    for i in range(total_batch):        
        batch_x, batch_y = getBatch(train_df)
        feed_dict = {X:batch_x, Y:batch_y}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
    
print('Leaning finished')



Epoch: 0001 cost = 0.603250706
Epoch: 0002 cost = 0.501237480
Epoch: 0003 cost = 0.443857658
Epoch: 0004 cost = 0.442735936
Epoch: 0005 cost = 0.411601383
Epoch: 0006 cost = 0.410644464
Epoch: 0007 cost = 0.388074892
Epoch: 0008 cost = 0.410872310
Epoch: 0009 cost = 0.398013740
Epoch: 0010 cost = 0.377872762
Epoch: 0011 cost = 0.382161790
Epoch: 0012 cost = 0.377055575
Epoch: 0013 cost = 0.381564520
Epoch: 0014 cost = 0.378272261
Epoch: 0015 cost = 0.385138526
Epoch: 0016 cost = 0.376472894
Epoch: 0017 cost = 0.369250470
Epoch: 0018 cost = 0.382431559
Epoch: 0019 cost = 0.369130192
Epoch: 0020 cost = 0.380826934
Epoch: 0021 cost = 0.373642359
Epoch: 0022 cost = 0.363722053
Epoch: 0023 cost = 0.373648473
Epoch: 0024 cost = 0.372212067
Epoch: 0025 cost = 0.375910466
Epoch: 0026 cost = 0.370814288
Epoch: 0027 cost = 0.363812254
Epoch: 0028 cost = 0.355453513
Epoch: 0029 cost = 0.375562041
Epoch: 0030 cost = 0.374713204
Epoch: 0031 cost = 0.354504902
Epoch: 0032 cost = 0.360932247
Epoch: 0

In [6]:
test_df = test_df[['Pclass', 'Sex', 'Age', 'Age', 'SibSp', 'Parch', 'Fare', 'Cabin', 'Embarked']]

prediction = sess.run(hypothesis, feed_dict={X:test_df})
refined_prediction = sess.run(tf.argmax(prediction, dimension=1))

result_df.loc[:,'Survived'] = refined_prediction

result_df.to_csv("my_solution.csv", index=False)

Instructions for updating:
Use the `axis` argument instead
