# TFLearn Kaggle Titanic

## get data

get data from [kaggle-titanic](https://www.kaggle.com/c/titanic/data)

In [1]:
rm -rf logs

In [2]:
import pandas as pd
import numpy as np

df = pd.read_csv('data/train.csv')

In [3]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## Cleaning data

In [4]:
df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)

age_mean = df['Age'].mean()
df['Age'] = df['Age'].fillna(age_mean)

from scipy.stats import mode

#mode_embarked = mode(df['Embarked'])[0][0]
df['Embarked'] = df['Embarked'].fillna("C")

df['Gender'] = df['Sex'].map({'female': 0, 'male': 1}).astype(int)

pd.get_dummies(df['Embarked'], prefix='Embarked').head(10)
df = pd.concat([df, pd.get_dummies(df['Embarked'], prefix='Embarked')], axis=1)

df = df.drop(['Sex', 'Embarked'], axis=1)

cols = df.columns.tolist()
cols = cols[1:2] + cols[:1] + cols[2:]

df = df[cols]

train_data = df.values

In [5]:
df.head()

Unnamed: 0,Survived,PassengerId,Pclass,Age,SibSp,Parch,Fare,Gender,Embarked_C,Embarked_Q,Embarked_S
0,0,1,3,22.0,1,0,7.25,1,0.0,0.0,1.0
1,1,2,1,38.0,1,0,71.2833,0,1.0,0.0,0.0
2,1,3,3,26.0,0,0,7.925,0,0.0,0.0,1.0
3,1,4,1,35.0,1,0,53.1,0,0.0,0.0,1.0
4,0,5,3,35.0,0,0,8.05,1,0.0,0.0,1.0


In [6]:
train_data.shape

(891, 11)

## Training

In [7]:
from tflearn.data_utils import to_categorical
X, Xtest = train_data[:700, 2:], train_data[700:, 2:]
Y = to_categorical(train_data[:700, :1], 2)
Ytest = to_categorical(train_data[:700, :1], 2)

In [None]:
import tensorflow as tf
import tflearn
from tflearn.data_preprocessing import DataPreprocessing
from tflearn.layers import input_data, fully_connected, dropout

prep = DataPreprocessing()
prep.add_featurewise_zero_center()
prep.add_featurewise_stdnorm()

net = input_data(shape=[None, 9], data_preprocessing=prep)
net = fully_connected(net, 256, activation='tanh', regularizer='L2', weight_decay=0.001, name='FC-1')
net = dropout(net, 0.5, name='Dropout-1')
net = fully_connected(net, 256, activation='tanh', regularizer='L2', weight_decay=0.001, name='FC-2')
net = dropout(net, 0.5, name='Dropout-2')
net = fully_connected(net, 2, activation='softmax', name='FC-3')

opt = tflearn.Adam()
acc = tflearn.Accuracy()
net = tflearn.regression(net, optimizer=opt, metric=acc, loss='categorical_crossentropy')

model = tflearn.DNN(net, tensorboard_verbose=3, tensorboard_dir='logs')
model.fit(X, Y, n_epoch=300, validation_set=(Xtest, Ytest),
          show_metric=True, run_id="dense_model-test", batch_size=128)

In [None]:
!tensorboard --logdir=logs --port=6010