In [1]:
import pandas as pd
import tensorflow as tf

In [2]:
data = pd.read_csv('train.csv')

In [3]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
data['Sex'].value_counts()

male      577
female    314
Name: Sex, dtype: int64

In [5]:
data.count()

PassengerId    891
Survived       891
Pclass         891
Name           891
Sex            891
Age            714
SibSp          891
Parch          891
Ticket         891
Fare           891
Cabin          204
Embarked       889
dtype: int64

In [6]:
data.corr()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
PassengerId,1.0,-0.005007,-0.035144,0.036847,-0.057527,-0.001652,0.012658
Survived,-0.005007,1.0,-0.338481,-0.077221,-0.035322,0.081629,0.257307
Pclass,-0.035144,-0.338481,1.0,-0.369226,0.083081,0.018443,-0.5495
Age,0.036847,-0.077221,-0.369226,1.0,-0.308247,-0.189119,0.096067
SibSp,-0.057527,-0.035322,0.083081,-0.308247,1.0,0.414838,0.159651
Parch,-0.001652,0.081629,0.018443,-0.189119,0.414838,1.0,0.216225
Fare,0.012658,0.257307,-0.5495,0.096067,0.159651,0.216225,1.0


# Prepare the data

In [7]:
#custom transformer for gender

from sklearn.base import BaseEstimator, TransformerMixin

class GenderChange(BaseEstimator, TransformerMixin):
    def fit(self, data):
        return self
    def transform(self, data):
        data.loc[data['Sex'] == 'male', 'Sex'] = 0
        data.loc[data['Sex'] == 'female', "Sex"] = 1
        return data

In [8]:
class FeatureSelector(BaseEstimator, TransformerMixin):
    def __init__(self, attributes):
        self.attributes = attributes
    def fit(self, data):
        return self
    def transform(self, data):
        return data[self.attributes]

In [9]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

titanic_pipeline = Pipeline([
    ('feature select', FeatureSelector(["Age", "SibSp", "Parch", "Fare", "Sex"])),
    ('change gender', GenderChange()),
    ('imputer', SimpleImputer(strategy="median")),
])

In [10]:
prep_data = titanic_pipeline.fit_transform(data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [11]:
prep_data.shape

(891, 5)

In [12]:
labels = data['Survived']

In [13]:
labels_reshaped = labels.to_numpy().reshape(891, 1)
labels_reshaped.shape

(891, 1)

# Construction phase of NN

In [14]:
import numpy as np
n_inputs = 5
n_hidden1 = 100
n_hidden2 = 50
n_outputs = 1

In [15]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs))
Y = tf.placeholder(tf.float32, shape=(None))

In [16]:
def layer(X, n_neurons, activation=None):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name="bias")
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else: 
            return Z

In [27]:
hidden1 = layer(X, n_hidden1, activation=tf.nn.relu)
hidden2 = layer(hidden1, n_hidden2, activation=tf.nn.relu)
logits = layer(hidden2, n_outputs)

In [28]:
with tf.name_scope('loss'):
#     labels_cast = tf.cast(Y, tf.float32)
    log_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=logits)
    
    loss = tf.reduce_mean(log_loss)

In [29]:
with tf.name_scope('train'):
    learning_rate = 0.01
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [30]:
with tf.name_scope("eval"):
    predictions = tf.nn.sigmoid(logits)
    correct_pred = tf.equal(tf.round(predictions), Y)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
#     correct = tf.nn.in_top_k(logits, Y, 1)
#     accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

# Execution phase

In [31]:
n_epochs = 1000

init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    for i in range(0, n_epochs):
        sess.run(training_op, feed_dict={X: prep_data, Y: labels})
        if i % 100 == 0:
            acc = accuracy.eval(feed_dict={X: prep_data, Y: labels})
            print(acc)
            

0.52177215
0.61616164
0.61616164
0.61616164
0.61616164
0.61616164
0.61616164
0.61616164
0.61616164
0.61616164
