# Section 3-1 - Deep Learning

For detailed steps on extracting and cleaning data, please review Sections 1-0 to 1-2.

## Pandas - Extracting data

In [4]:
import pandas as pd
import numpy as np

df = pd.read_csv('../data/train.csv')

## Pandas - Cleaning data

In [5]:
df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)

age_mean = df['Age'].mean()
df['Age'] = df['Age'].fillna(age_mean)

from scipy.stats import mode

mode_embarked = mode(df['Embarked'])[0][0]
df['Embarked'] = df['Embarked'].fillna(mode_embarked)

df['Gender'] = df['Sex'].map({'female': 0, 'male': 1}).astype(int)

pd.get_dummies(df['Embarked'], prefix='Embarked').head(10)
df = pd.concat([df, pd.get_dummies(df['Embarked'], prefix='Embarked')], axis=1)

df = df.drop(['Sex', 'Embarked'], axis=1)

cols = df.columns.tolist()
cols = [cols[1]] + cols[0:1] + cols[2:]

df = df[cols]

train_data = df.values

## TensorFlow - Training the model

In [6]:
import skflow

model = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=2, steps=200)
model.fit(train_data[:800, 2:], train_data[:800, 0])

Step #1, avg. loss: 1.01257
Step #21, avg. loss: 1.36267
Step #41, avg. loss: 0.62276
Step #61, avg. loss: 0.64010
Step #81, avg. loss: 0.61333
Step #101, avg. loss: 0.62791
Step #121, avg. loss: 0.63716
Step #141, avg. loss: 0.61182
Step #161, avg. loss: 0.63481
Step #181, avg. loss: 0.64032


TensorFlowDNNClassifier(batch_size=32, continue_training=False,
            hidden_units=None, learning_rate=0.1, n_classes=2,
            optimizer='SGD', steps=200, tf_master='', tf_random_seed=42)

## TensorFlow - Making predictions

In [7]:
y_test = train_data[800:, 0]
y_prediction = model.predict(train_data[800:, 2:])

print "prediction accuracy:", np.sum(y_test == y_prediction)*1./len(y_test)

prediction accuracy: 0.747252747253


In [9]:
def dnn_tanh(X, y):
    layers = skflow.ops.dnn(X, [10, 20, 10], tf.tanh)
    return skflow.ops.logistic_classifier(layers, y)

In [10]:
classifier = skflow.TensorFlowEstimator(model_fn=dnn_tanh, n_classes=2, batch_size=128, steps=500, learning_rate=0.05)

In [11]:
classifier.fit(train_data[:800, 2:], train_data[:800, 0])

NameError: global name 'tf' is not defined

## Appendix: Installation

For Mac:

In [None]:
pip install https://storage.googleapis.com/tensorflow/mac/tensorflow-0.5.0-py2-none-any.whl
pip install git+git://github.com/google/skflow.git

For Ubuntu:

In [None]:
pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.5.0-cp27-none-linux_x86_64.whl
pip install git+git://github.com/google/skflow.git