In [2]:
import random
import pandas
import numpy as np
from sklearn import metrics, cross_validation

import tensorflow as tf
from tensorflow.contrib import layers
from tensorflow.contrib import learn


## Read Data

- Read CSV file and print out 5 random examples

In [3]:
random.seed(42)

data = pandas.read_csv('titanic_dataset.csv')
rows = random.sample(data.index, 5)

data.ix[rows]

Unnamed: 0,survived,pclass,name,sex,age,sibsp,parch,ticket,fare
837,0,3,"Gustafsson, Mr. Anders Vilhelm",male,37.0,2,0,3101276,7.925
32,1,1,"Bonnell, Miss. Caroline",female,30.0,0,0,36928,164.8667
360,1,2,"Caldwell, Mr. Albert Francis",male,26.0,1,1,248738,29.0
292,1,1,"Taylor, Mr. Elmer Zebley",male,48.0,1,0,19996,52.0
964,0,3,"Lester, Mr. James",male,39.0,0,0,A/4 48871,24.15


## Preprocess Data

- Removed name and ticket ID as they won't help much in prediction

In [4]:
def preprocess(data):
    train = data.drop(['survived', 'name', 'ticket'], axis=1).values
   
    for i in range(len(train)):
        train[i][1] = 1. if train[i][1] == 'female' else 0.
  
    return train

In [5]:
#Prepare training data
x_train  = preprocess(data)
y_train = data['survived']

## Define and train the model

In [6]:
classifier = learn.LinearClassifier(n_classes=2, 
                                    feature_columns=learn.infer_real_valued_columns_from_input(x_train), 
                                    optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.05),
                                   model_dir='model')

classifier.fit(x_train, y_train, batch_size=128, steps=500)




LinearClassifier()

## Let's Predict if the survival chances of our beloved Rose & Jack !!

**Jack** , **19** year old, is **male**, a **3rd** class ticket holder and let's say his ticket fare is **$5**.
No Siblings, spouse nor parents aboard.

**Rose** , **17** year old, is **female**, a 1st class passenger who holds an expensive **$100** ticket.
Rose's parents and fiance are aboard.

Let's predict the survival of Cal too!
**Cal** , Rose's **fiance**, is **30** year old, **male**, **1st class** passenger and holds **$100** ticket.

In [7]:
#class, gender, age, sibling/spouse, parents, fare
Jack = [3,0.0, 19, 0, 0, 5.0000]
Rose = [1, 1.0, 17, 1, 2, 100.0000]
Cal = [1, 0.0, 30, 1, 0, 100.0]

test = np.array([Jack, Rose, Cal])
pred = classifier.predict(test)
prob = classifier.predict_proba(test)

answer = ['No', 'Yes']
print("Will Jack Survive? %s" % answer[pred[0]])
print("Will Rose Survive? %s" % answer[pred[1]])
print("Will Cal Survive? %s" % answer[pred[2]])

print("\nJack's Surviving Chance: %f%%" % (prob[0][1]*100))
print("Rose's Surviving Chance: %f%%" % (prob[1][1]*100))
print("Cal's Surviving Chance: %f%%" % (prob[2][1]*100))

Will Jack Survive? No
Will Rose Survive? Yes
Will Cal Survive? Yes

Jack's Surviving Chance: 0.003317%
Rose's Surviving Chance: 100.000000%
Cal's Surviving Chance: 99.974340%
