In [1]:
from pandas import DataFrame
from tensorflow.contrib.learn import DNNClassifier, DNNRegressor, LinearClassifier, LinearRegressor, RunConfig
from tensorflow.contrib.layers import one_hot_column, real_valued_column, sparse_column_with_keys
from tensorflow.contrib.layers.python.layers.feature_column import _OneHotColumn, _RealValuedColumn, _SparseColumnKeys
from tensorflow.contrib.learn.python.learn.utils.input_fn_utils import InputFnOps

import numpy
import os
import pandas
import shutil
import tempfile
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)

In [41]:
estimator_conf = RunConfig(num_cores = 1, tf_random_seed = 42)

def load_csv(name):
    df = pandas.read_csv(name)
    df.reset_index()
    return df

def store_csv(df, name):
    df.to_csv(name, index = False)

def store_savedmodel(estimator, serving_input_fn, name):
    savemodel_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_fn = serving_input_fn, as_text = True)
    savemodel_dir = savemodel_dir.decode("UTF-8")

    if(os.path.isdir("savedmodel/" + name)):
        shutil.rmtree("savedmodel/" + name)
    shutil.move(savemodel_dir, "savedmodel/" + name)

def _dnn_feature_columns(feature_columns):
    return list(map(lambda x: one_hot_column(x) if isinstance(x, _SparseColumnKeys) else x, feature_columns))

def _input_fn(df, cont_feature_columns, cat_feature_columns, label_column):
    cont_features = {column : tf.constant(df[column].values, dtype = tf.float64, shape = [df[column].size, 1]) for column in cont_feature_columns}
    cat_features = {column : tf.constant(df[column].values, dtype = tf.string, shape = [df[column].size, 1]) for column in cat_feature_columns}
    features = dict(list(cont_features.items()) + list(cat_features.items()))
    label = tf.constant(df[label_column].values, shape = [df[label_column].size, 1])
    return features, label

def _serving_input_fn(cont_feature_columns, cat_feature_columns):
    cont_feature_placeholders = {column : tf.placeholder(dtype = tf.float64, shape = [None, 1], name = column) for column in cont_feature_columns}
    cat_feature_placeholders = {column : tf.placeholder(dtype = tf.string, shape = [None, 1], name = column) for column in cat_feature_columns}
    feature_placeholders = dict(list(cont_feature_placeholders.items()) + list(cat_feature_placeholders.items()))
    features = {column : tensor for column, tensor in feature_placeholders.items()}
    label = None
    return InputFnOps(features, label, feature_placeholders)

In [39]:
data_path = "~/Codecookies/machine-learning-workspace/datasets/iris/iris.data"
iris_df = load_csv(data_path)
iris_df.columns = ["sepal_length", "sepal_width", "petal_length", "petal_width", "Species"]
iris_df["Species"] = iris_df[["Species"]].replace("Iris-setosa", "0").replace("Iris-versicolor", "1").replace("Iris-virginica", "2").astype(int)
iris_cont_columns = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
iris_feature_columns = [real_valued_column(column, dtype = tf.float64) for column in iris_cont_columns]

In [44]:
iris_df.as_matrix().shape

(149, 6)

In [40]:
def iris_input_fn():
    return _input_fn(iris_df, iris_cont_columns, [], "Species")

def iris_serving_input_fn():
    return _serving_input_fn(iris_cont_columns, [])

def build_iris(classifier, max_steps, name, with_proba = True):
    classifier.fit(input_fn = iris_input_fn, max_steps = max_steps)

    species = DataFrame(classifier.predict(input_fn = iris_input_fn, as_iterable = False), columns = ["_target"])
#     if(with_proba):
#         species_proba = DataFrame(classifier.predict_proba(input_fn = iris_input_fn, as_iterable = False), columns = ["probability(0)", "probability(1)", "probability(2)"])
#         species = pandas.concat((species, species_proba), axis = 1)
#     store_csv(species, name + ".csv")

    store_savedmodel(classifier, iris_serving_input_fn, name)

build_iris(DNNClassifier(hidden_units = [4 * 3, 2 * 3], feature_columns = _dnn_feature_columns(iris_feature_columns), n_classes = 3, optimizer = tf.train.AdamOptimizer, config = estimator_conf), 100, "DNNClassificationIris")
build_iris(LinearClassifier(feature_columns = iris_feature_columns, n_classes = 3, optimizer = tf.train.AdamOptimizer, config = estimator_conf), 5000, "LinearClassificationIris")


INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1a1a8e5890>, '_model_dir': '/var/folders/1_/4_crrbzx2llc_qzmrngr4vx00000gn/T/tmpFn7P2b', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_tf_random_seed': 42, '_save_summary_steps': 100, '_environment': 'local', '_num_worker_replicas': 0, '_task_id': 0, '_log_step_count_steps': 100, '_tf_config': intra_op_parallelism_threads: 1
inter_op_parallelism_threads: 1
gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_evaluation_master': '', '_master': ''}
Instructions for updating:
Please switch to tf.train.get_global_step
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/1_/4_crrbzx2llc_qzmrngr4vx00000gn/T/tmpFn7P2b/model.ckpt.
INFO:tensorflow:loss = 1.1375

INFO:tensorflow:loss = 0.167743, step = 3401 (0.162 sec)
INFO:tensorflow:global_step/sec: 401.947
INFO:tensorflow:loss = 0.162824, step = 3501 (0.247 sec)
INFO:tensorflow:global_step/sec: 888.597
INFO:tensorflow:loss = 0.158122, step = 3601 (0.113 sec)
INFO:tensorflow:global_step/sec: 990.627
INFO:tensorflow:loss = 0.153625, step = 3701 (0.100 sec)
INFO:tensorflow:global_step/sec: 1017.7
INFO:tensorflow:loss = 0.149323, step = 3801 (0.099 sec)
INFO:tensorflow:global_step/sec: 970.252
INFO:tensorflow:loss = 0.145205, step = 3901 (0.102 sec)
INFO:tensorflow:global_step/sec: 487.845
INFO:tensorflow:loss = 0.141264, step = 4001 (0.206 sec)
INFO:tensorflow:global_step/sec: 592.182
INFO:tensorflow:loss = 0.13749, step = 4101 (0.171 sec)
INFO:tensorflow:global_step/sec: 448.435
INFO:tensorflow:loss = 0.133874, step = 4201 (0.220 sec)
INFO:tensorflow:global_step/sec: 643.96
INFO:tensorflow:loss = 0.13041, step = 4301 (0.154 sec)
INFO:tensorflow:global_step/sec: 500.857
INFO:tensorflow:loss = 0

In [None]:
# java -cp ../../jpmml-tensorflow/target/converter-executable-1.0-SNAPSHOT.jar org.jpmml.tensorflow.Main --tf-input ./pmml-models/tensorflow/savedmodel/LinearClassificationIris/ --pmml-output ./pmml-models/tensorflow/linear_classifier.pmml