In [8]:
# Logistic regression example in TF using Kaggle's Titanic Dataset.
# Download train.csv from https://www.kaggle.com/c/titanic/data

import tensorflow as tf
import os

# same params and variables initialization as log reg.
W = tf.Variable(tf.zeros([5, 1]), name="weights")
b = tf.Variable(0., name="bias")


# former inference is now used for combining inputs
def combine_inputs(X):
    return tf.matmul(X, W) + b


# new inferred value is the sigmoid applied to the former
def inference(X):
    return tf.sigmoid(combine_inputs(X))


def loss(X, Y):
    '''
    https://www.tensorflow.org/versions/r0.12/api_docs/python/nn/classification#sigmoid_cross_entropy_with_logits
    tf.nn.sigmoid_cross_entropy_with_logits(logits, targets, name=None)
    
    https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
    sigmoid_cross_entropy_with_logits(
    _sentinel=None,
    labels=None,
    logits=None,
    name=None
    )
    '''
    return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=combine_inputs(X), labels=Y))


def read_csv(batch_size, file_name, record_defaults):
    filename_queue = tf.train.string_input_producer([os.path.join(os.getcwd(), file_name)])

    reader = tf.TextLineReader(skip_header_lines=1)
    key, value = reader.read(filename_queue)

    # decode_csv will convert a Tensor from type string (the text line) in
    # a tuple of tensor columns with the specified defaults, which also
    # sets the data type for each column
    decoded = tf.decode_csv(value, record_defaults=record_defaults)

    # batch actually reads the file and loads "batch_size" rows in a single tensor
    return tf.train.shuffle_batch(decoded,
                                  batch_size=batch_size,
                                  capacity=batch_size * 50,
                                  min_after_dequeue=batch_size)


def inputs():
    '''
    https://www.kaggle.com/c/titanic/data 에서 train.csv 다운로드
    
    PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
    1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S
    2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C
    '''
    passenger_id, survived, pclass, name, sex, age, sibsp, parch, ticket, fare, cabin, embarked = \
        read_csv(100, "train.csv", [[0.0], [0.0], [0], [""], [""], [0.0], [0.0], [0.0], [""], [0.0], [""], [""]])

    # convert categorical data
    is_first_class = tf.to_float(tf.equal(pclass, [1]))
    is_second_class = tf.to_float(tf.equal(pclass, [2]))
    is_third_class = tf.to_float(tf.equal(pclass, [3]))

    gender = tf.to_float(tf.equal(sex, ["female"]))

    # Finally we pack all the features in a single matrix;
    # We then transpose to have a matrix with one example per row and one feature per column.
    features = tf.transpose(tf.stack([is_first_class, is_second_class, is_third_class, gender, age])) # pack -> stack
    survived = tf.reshape(survived, [100, 1])

    return features, survived


def train(total_loss):
    learning_rate = 0.01
    return tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)


def evaluate(sess, X, Y):

    predicted = tf.cast(inference(X) > 0.5, tf.float32)

    print (sess.run(tf.reduce_mean(tf.cast(tf.equal(predicted, Y), tf.float32))))

# Launch the graph in a session, setup boilerplate
with tf.Session() as sess:

    tf.global_variables_initializer().run()

    X, Y = inputs()

    total_loss = loss(X, Y)
    train_op = train(total_loss)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    # actual training loop
    training_steps = 1000
    for step in range(training_steps):
        sess.run([train_op])
        # for debugging and learning purposes, see how the loss gets decremented thru training steps
        if step % 10 == 0:
            print ("loss: ", sess.run([total_loss]))

    evaluate(sess, X, Y)

    import time
    time.sleep(5)

    coord.request_stop()
    coord.join(threads)
    sess.close()

loss:  [0.6857698]
loss:  [0.68023866]
loss:  [0.71859926]
loss:  [0.70661646]
loss:  [0.63169461]
loss:  [0.74736029]
loss:  [0.68234819]
loss:  [0.7007103]
loss:  [0.68386006]
loss:  [0.66306257]
loss:  [0.68127668]
loss:  [0.72555137]
loss:  [0.68212616]
loss:  [1.0859654]
loss:  [0.64965332]
loss:  [0.68079656]
loss:  [0.68100387]
loss:  [0.7896477]
loss:  [0.64102387]
loss:  [0.66400725]
loss:  [0.705841]
loss:  [0.58889341]
loss:  [0.68924272]
loss:  [0.66839635]
loss:  [0.73202324]
loss:  [0.67480469]
loss:  [0.63285089]
loss:  [0.69808853]
loss:  [0.63049859]
loss:  [0.58562064]
loss:  [0.58393049]
loss:  [0.82518232]
loss:  [0.62124264]
loss:  [0.65973192]
loss:  [0.71335542]
loss:  [0.63013113]
loss:  [0.59793222]
loss:  [0.64591157]
loss:  [0.59271604]
loss:  [0.58625269]
loss:  [0.65081495]
loss:  [0.92214394]
loss:  [0.63958752]
loss:  [0.64558244]
loss:  [0.56279212]
loss:  [0.56727159]
loss:  [0.60606974]
loss:  [0.60070342]
loss:  [0.59954375]
loss:  [0.59509444]
loss: 