# Make decision tree from iris data

Taken from Google's [Visualizing a Decision Tree - Machine Learning Recipes #2](https://www.youtube.com/watch?v=tNa99PG8hR8)

In [2]:
import tensorflow.contrib.learn as skflow
from sklearn.datasets import load_iris
from sklearn import metrics

In [58]:
iris = load_iris()

In [59]:
iris.keys()

dict_keys(['DESCR', 'data', 'target_names', 'target', 'feature_names'])

In [60]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [61]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], 
      dtype='<U10')

In [62]:
# Withhold 3 for testing
test_idx = [0, 50, 100]

train_data = np.delete(iris.data, test_idx, axis=0)
train_target = np.delete(iris.target, test_idx)

In [63]:
test_target = iris.target[test_idx]  # array([0, 1, 2])

In [64]:
test_data = iris.data[test_idx]  # array([[ 5.1,  3.5,  1.4,  0.2], [ 7. ,  3.2,  4.7,  1.4], ...])

# Tensorflow

Examples from <http://terrytangyuan.github.io/2016/03/14/scikit-flow-intro/>

## Deep neural network

3 layer deep neural network with 10, 20 and 10 hidden units in each layer, respectively.

In [19]:
classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3)
classifier.fit(iris.data, iris.target)

Step #100, epoch #20, avg. train loss: 0.54416
Step #200, epoch #40, avg. train loss: 0.14521


TensorFlowDNNClassifier(batch_size=32, class_weight=None, clip_gradients=5.0,
            config=None, continue_training=False, dropout=None,
            hidden_units=[10, 20, 10], learning_rate=0.1, n_classes=3,
            optimizer='Adagrad', steps=200, verbose=1)

In [20]:
metrics.accuracy_score(iris.target, classifier.predict(iris.data))

0.93999999999999995

## Custom model with TensorFlowEstimator()

In [29]:
def my_model(X, y):
    """This is DNN with 10, 20, 10 hidden layers, and dropout of 0.5 probability."""
    layers = skflow.ops.dnn(X, [10, 20, 10])  # keep_prob=0.5 causes error
    return skflow.models.logistic_regression(layers, y)

In [30]:
classifier = skflow.TensorFlowEstimator(model_fn=my_model, n_classes=3)
classifier.fit(iris.data, iris.target)

Step #100, epoch #20, avg. train loss: 0.54416
Step #200, epoch #40, avg. train loss: 0.14521


TensorFlowEstimator(batch_size=32, class_weight=None, clip_gradients=5.0,
          config=None, continue_training=False, learning_rate=0.1,
          model_fn=<function my_model at 0x10fa567b8>, n_classes=3,
          optimizer='Adagrad', steps=200, verbose=1)

In [32]:
metrics.accuracy_score(iris.target, classifier.predict(iris.data))

0.93999999999999995

# Recurrent neural network

See <http://terrytangyuan.github.io/2016/03/14/scikit-flow-intro/#recurrent-neural-network>.

In [49]:
classifier = skflow.TensorFlowRNNClassifier(rnn_size=2, n_classes=15)

In [50]:
classifier.fit(iris.data, iris.target)

TypeError: inputs must be a list

From <https://github.com/tensorflow/tensorflow/blob/17dcc5a176d152caec570452d28fb94920cceb8c/tensorflow/contrib/learn/python/learn/tests/test_nonlinear.py>

In [56]:
import numpy as np
from tensorflow.contrib.learn.python import learn
import tensorflow as tf

np.random.seed(42)
data = np.array(
    list([[2, 1, 2, 2, 3], [2, 2, 3, 4, 5], [3, 3, 1, 2, 1], [2, 4, 5, 4, 1]
         ]),
    dtype=np.float32)
# labels for classification
labels = np.array(list([1, 0, 1, 0]), dtype=np.float32)
# targets for regression
targets = np.array(list([10, 16, 10, 16]), dtype=np.float32)
test_data = np.array(list([[1, 3, 3, 2, 1], [2, 3, 4, 5, 6]]))

def input_fn(X):
  return tf.split(1, 5, X)

# Classification
classifier = learn.TensorFlowRNNClassifier(rnn_size=2,
                                           cell_type="lstm",
                                           n_classes=2,
                                           input_op_fn=input_fn)
classifier.fit(data, labels)
classifier.weights_
classifier.bias_
predictions = classifier.predict(test_data)
#assertAllClose(predictions, np.array([1, 0]))

classifier = learn.TensorFlowRNNClassifier(rnn_size=2,
                                           cell_type="rnn",
                                           n_classes=2,
                                           input_op_fn=input_fn,
                                           num_layers=2)
classifier.fit(data, labels)

TensorFlowRNNClassifier(batch_size=32, bidirectional=False, cell_type='rnn',
            class_weight=None, clip_gradients=5.0, config=None,
            continue_training=False, initial_state=None,
            input_op_fn=<function input_fn at 0x1100fe2f0>,
            learning_rate=0.1, n_classes=2, num_layers=2,
            optimizer='Adagrad', rnn_size=2, sequence_length=None,
            steps=50, verbose=1)

In [66]:
classifier.predict(iris.data)

ValueError: Cannot feed value of shape (32, 4) for Tensor 'input:0', which has shape '(?, 5)'