# Import libraries

In [120]:
import pandas as pd
import numpy as np
import tflearn
from tflearn.data_utils import load_csv
from tflearn.datasets import titanic
from sklearn.model_selection import train_test_split 

# Dataset
 - passenger class(1)
 - name (Allen, Miss. Elisabeth Walton)
 - gender (female)
 - age(29)
 - number of siblings/spouses aboard (0)
 - number of parents/children aboard (0)
 - ticket number (24160) and
 - passenger fare (211.3375)



In [128]:
# Download the Titanic dataset
titanic.download_dataset('titanic_dataset.csv')

# Load CSV file, indicate that the first column represents labels
data, labels = load_csv('titanic_dataset.csv', target_column=0,
                        categorical_labels=True, n_classes=2)

# Make a df out of it for convenience
df = pd.DataFrame(data)

# Do a test / train split
X_train, X_test, y_train, y_test = train_test_split(df, labels, test_size=0.33, random_state=42)

# Transformations
- Drop column 1 and 6 
- recode column two as cat codes
- Float32 type for all columns

In [129]:
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7
376,2,"Collett, Mr. Sidney C Stuart",male,24,0,0,28034,10.5
578,2,"Ware, Mr. John James",male,30,1,0,CA 31352,21.0
494,2,"Mallet, Mrs. Albert (Antoinette Magnin)",female,24,1,1,S.C./PARIS 2079,37.0042
314,1,"Widener, Mrs. George Dunton (Eleanor Elkins)",female,50,1,1,113503,211.5
1098,3,"Palsson, Miss. Stina Viola",female,3,3,1,349909,21.075


In [130]:
def preprocess(r):
    r = r.drop([1], axis=1,errors='ignore')
    r[2] = r[2].astype('category')
    r[2] = r[2].cat.codes
    r[6] = r[6].astype('category')
    r[6] = r[6].cat.codes
    for column in r.columns:
        r[column] = r[column].astype(np.float32)
    return r.values

In [131]:
X_train = preprocess(X_train)
pd.DataFrame(X_train).head()

Unnamed: 0,0,1,2,3,4,5,6
0,2.0,1.0,24.0,0.0,0.0,224.0,10.5
1,2.0,1.0,30.0,1.0,0.0,561.0,21.0
2,2.0,0.0,24.0,1.0,1.0,610.0,37.0042
3,1.0,0.0,50.0,1.0,1.0,27.0,211.5
4,3.0,0.0,3.0,3.0,1.0,388.0,21.075001


In [132]:
X_train.shape

(877, 7)

In [133]:
y_train.shape

(877, 2)

# Build the neural network

In [134]:
#Reset the graph first
import tensorflow as tf
tf.reset_default_graph()
# Build neural network
net = tflearn.input_data(shape=[None, 7])
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net)

# Train it

In [135]:
# Define model
model = tflearn.DNN(net)
# Start training (apply gradient descent algorithm)
model.fit(X_train, y_train, n_epoch=20, batch_size=32, show_metric=True)
# With cross validation
model2 = tflearn.DNN(net)
model2.fit(X_train, y_train, n_epoch=20, batch_size=32, show_metric=True, validation_set=0.1) 

Training Step: 499  | time: 0.055s
| Adam | epoch: 020 | loss: 0.00000 - acc: 0.0000 -- iter: 768/789
Training Step: 500  | time: 1.061s
| Adam | epoch: 020 | loss: 0.00000 - acc: 0.0000 | val_loss: 0.57327 - val_acc: 0.7159 -- iter: 789/789
--


# Evaluate it

In [136]:
X_test = preprocess(X_test)
metric_train = model.evaluate(X_train, y_train)
metric_test = model.evaluate(X_test, y_test)
metric_train_1 = model2.evaluate(X_train, y_train)
metric_test_1 = model2.evaluate(X_test, y_test)

In [137]:
print('Model 1 Accuracy on train set: %.9f' % metric_train[0])
print("Model 1 Accuracy on test set: %.9f" % metric_test[0])
print('Model 2 Accuracy on train set: %.9f' % metric_train_1[0])
print("Model 2 Accuracy on test set: %.9f" % metric_test_1[0])

Model 1 Accuracy on train set: 0.722919043
Model 1 Accuracy on test set: 0.685185185
Model 2 Accuracy on train set: 0.722919043
Model 2 Accuracy on test set: 0.659722222


# Lets create some fake data for leo and cate

In [88]:
# Let's create some data for DiCaprio and Winslet
dicaprio = [3, 'Jack Dawson', 'male', 19, 0, 0, 'N/A', 5.0000]
winslet = [1, 'Rose DeWitt Bukater', 'female', 17, 1, 2, 'N/A', 100.0000]
# Preprocess data
dicaprio, winslet = preprocess(pd.DataFrame([dicaprio, winslet]))
# Predict surviving chances (class 1 results)
pred = model.predict([dicaprio, winslet])
print("DiCaprio Surviving Rate:", pred[0][1])
print("Winslet Surviving Rate:", pred[1][1])

DiCaprio Surviving Rate: 0.128768
Winslet Surviving Rate: 0.903721
