In [3]:
import io
import pandas as pd
import numpy as np
import tensorflow as tf

In [4]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [5]:
train_df_survived_col = train_df.pop('Survived')
train_df_cabin_col = train_df.pop('Cabin')
test_df_cabin_col = test_df.pop('Cabin')

def fixData(target, cols):
  for col in cols:
    try:
      target[col].fillna(target[col].mean(), inplace=True)
    except:
      target[col].fillna(target[col].mode()[0], inplace=True)


In [None]:
catagorical_columns = ['Pclass','Sex','SibSp','Parch', 'Embarked']
numeric_columns = ['Age', 'Fare']
feature_columns = []

# fixing datas
fixData(train_df, catagorical_columns)
fixData(train_df, numeric_columns)
fixData(test_df, catagorical_columns)
fixData(test_df, numeric_columns)

for feature_name in catagorical_columns:
  vocabulary = train_df[feature_name].unique()
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in numeric_columns:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

In [7]:
def input_in(data, lable, epoch=10, shuffle=True, batch_size=32):
  def inp():
    ds = tf.data.Dataset.from_tensor_slices((dict(data), lable))
    if shuffle:
      ds = ds.shuffle(1000)
    ds = ds.batch(batch_size).repeat(epoch)
    return ds
  return inp

train_data_fn = input_in(train_df, train_df_survived_col)

def input_in_no_lable(data, batch_size=32):
  def inp():
    ds = tf.data.Dataset.from_tensor_slices((dict(data)))
    ds = ds.batch(batch_size).repeat(1)
    return ds
  return inp

test_data_fn = input_in_no_lable(test_df)

In [None]:
linear_est = tf.estimator.LinearClassifier(feature_columns)
linear_est.train(train_data_fn)
result = linear_est.predict(test_data_fn)

In [None]:
l = list(result)


In [10]:
results = []
for id in range(test_df.shape[0]):
  pid = test_df.loc[id]['PassengerId']
  proba = l[id]["probabilities"]
  survive = 0 if proba[0] > proba[1] else 1
  results.append({'PassengerId': pid, "Survived": survive})

results_df = pd.DataFrame(results)

results_df.to_csv('submission.csv', index=False)