In [1]:
import os
import sys
import logging
import numpy as np
import pandas as pd
import shutil
import tempfile
import unittest

import tensorflow as tf
from tensorflow.contrib.learn import LinearClassifier
from tensorflow.contrib.learn import DNNClassifier
from tensorflow.contrib.learn import RunConfig
from tensorflow.contrib.layers import one_hot_column
from tensorflow.contrib.layers import real_valued_column
from tensorflow.contrib.layers import sparse_column_with_keys
from tensorflow.contrib.layers.python.layers.feature_column import _OneHotColumn
from tensorflow.contrib.layers.python.layers.feature_column import _RealValuedColumn
from tensorflow.contrib.layers.python.layers.feature_column import _SparseColumnKeys
from tensorflow.contrib.learn.python.learn.utils.input_fn_utils import InputFnOps

from sklearn.datasets import load_iris

import ppmml

tf.logging.set_verbosity(tf.logging.INFO)

# Train iris data with tensorflow and export tensorflow model

In [2]:
def _export_tf_model(estimator, serving_input_fn, model_output_path):
    """ export tensorflow model
    """
    savemodel_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
        serving_input_fn = serving_input_fn, as_text = True)
    savemodel_dir = savemodel_dir.decode("UTF-8")

    if(os.path.isdir(model_output_path)):
        shutil.rmtree(model_output_path)
    logging.info("moving model path from {} to {}".format(savemodel_dir, model_output_path))
    shutil.move(savemodel_dir, model_output_path)


def _dnn_feature_columns(feature_columns):
    """ generate dnn feature columns
    """
    dnn_columns = []
    for col in feature_columns:
        dnn_col = real_valued_column(col, dtype = tf.float64)
        if isinstance(col, _SparseColumnKeys):
            dnn_columns.append(one_hot_column(dnn_col))
        else:
            dnn_columns.append(dnn_col)
    return dnn_columns


def _input_fn(df, cont_feature_columns, cat_feature_columns, label_column):
    """ tensorflow estimator input function

    Args:
        df: pandas dataframe
        cont_feature_columns: list of string, numeric column names
        cat_feature_columns: list of string, category column names
    """
    cont_features = {}
    for col in cont_feature_columns:
        cont_features[col] = \
            tf.constant(df[col].values, dtype = tf.float64, shape = [df[col].size, 1])

    cat_features = {}
    for col in cat_feature_columns:
        cat_features[col] = \
            tf.constant(df[col].values, dtype = tf.string, shape = [df[col].size, 1])
    features = dict(list(cont_features.items()) + list(cat_features.items()))
    label = tf.constant(df[label_column].values, shape = [df[label_column].size, 1])
    return features, label

def _serving_input_fn(cont_feature_columns, cat_feature_columns):
    """ tensorflow estimator serving input function

    Args:
        cont_feature_columns: list of string, numeric column names
        cat_feature_columns: list of string, category column names
    """
    cont_features = {}
    for col in cont_feature_columns:
        cont_features[col] = \
            tf.placeholder(dtype = tf.float64, shape = [None, 1], name = col)

    cat_features = {}
    for col in cat_feature_columns:
        cat_features[col] = \
            tf.placeholder(dtype = tf.string, shape = [None, 1], name = col)
    feature_placeholders = \
        dict(list(cont_features.items())+ list(cat_features.items()))
    features = {column: tensor for column, tensor in feature_placeholders.items()}
    label = None
    return InputFnOps(features, label, feature_placeholders)

In [3]:
estimator_conf = RunConfig(num_cores = 1, tf_random_seed = 42)

In [4]:
# load iris data
(X, y) = load_iris(return_X_y=True)
iris_df = pd.DataFrame(X)
features = ['x1', 'x2', 'x3', 'x4']
iris_df.columns = features
iris_df['y'] = y
iris_df.head(5)

Unnamed: 0,x1,x2,x3,x4,y
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [5]:
def _generate_tf_model(iris_df, features, estimator, model_output_path):
    iris_feature_columns = features
    def __iris_input_fn():
        return _input_fn(iris_df, iris_feature_columns, [], label_column="y")

    def __iris_serving_input_fn():
        return _serving_input_fn(iris_feature_columns, [])

    estimator.fit(input_fn = __iris_input_fn, max_steps = 10)
    _export_tf_model(estimator, __iris_serving_input_fn, model_output_path)

In [6]:
def _iris_dnn_features(features):
        """ get iris dnn features
        """
        return _dnn_feature_columns(features)

In [7]:
# Train with DNNClassifier model
algorithm_name = "dnn_classifier"
base_path = "/tmp/pmml-models/tensorflow/"
model_output = os.path.join(base_path, algorithm_name)
classifier = DNNClassifier(
    hidden_units = [4 * 3, 2 * 3],
    feature_columns = _iris_dnn_features(features),
    n_classes = 3, optimizer = tf.train.AdamOptimizer,
    config = estimator_conf)
_generate_tf_model(iris_df, features, classifier, model_output)





INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1c1476ec50>, '_model_dir': '/var/folders/1_/4_crrbzx2llc_qzmrngr4vx00000gn/T/tmp_Mktml', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_tf_random_seed': 42, '_save_summary_steps': 100, '_environment': 'local', '_num_worker_replicas': 0, '_task_id': 0, '_log_step_count_steps': 100, '_tf_config': intra_op_parallelism_threads: 1
inter_op_parallelism_threads: 1
gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_evaluation_master': '', '_master': ''}
INFO: 17-12-28 16:48:32: tf_logging.py:82 * 140735235661824 Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterS

Instructions for updating:
Please switch to tf.train.get_global_step


INFO:tensorflow:Create CheckpointSaverHook.
INFO: 17-12-28 16:48:33: tf_logging.py:82 * 140735235661824 Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/1_/4_crrbzx2llc_qzmrngr4vx00000gn/T/tmp_Mktml/model.ckpt.
INFO: 17-12-28 16:48:36: tf_logging.py:82 * 140735235661824 Saving checkpoints for 1 into /var/folders/1_/4_crrbzx2llc_qzmrngr4vx00000gn/T/tmp_Mktml/model.ckpt.
INFO:tensorflow:loss = 1.2028, step = 1
INFO: 17-12-28 16:48:37: tf_logging.py:82 * 140735235661824 loss = 1.2028, step = 1
INFO:tensorflow:Saving checkpoints for 10 into /var/folders/1_/4_crrbzx2llc_qzmrngr4vx00000gn/T/tmp_Mktml/model.ckpt.
INFO: 17-12-28 16:48:37: tf_logging.py:82 * 140735235661824 Saving checkpoints for 10 into /var/folders/1_/4_crrbzx2llc_qzmrngr4vx00000gn/T/tmp_Mktml/model.ckpt.
INFO:tensorflow:Loss for final step: 1.16411.
INFO: 17-12-28 16:48:37: tf_logging.py:82 * 140735235661824 Loss for final step: 1.16411.
INFO:tensorflow:Restoring parameters from /var/fol

# Export PMML File

In [8]:
pmml_output = os.path.join(base_path, "{}.pmml".format(algorithm_name))
ppmml.to_pmml(
    model_input=model_output,
    pmml_output=pmml_output,
    model_type='tensorflow')

INFO: 17-12-28 16:48:39: base_converter.py:89 * 140735235661824 Starting to convert model file /tmp/pmml-models/tensorflow/dnn_classifier to pmml file
INFO: 17-12-28 16:48:42: base_converter.py:96 * 140735235661824 Successfully generate pmml file: /tmp/pmml-models/tensorflow/dnn_classifier.pmml


# Predict with PMML File

In [9]:
# prepare test data
test_data_input = os.path.join(base_path, "test.csv")
iris_df.to_csv(test_data_input, index=False, header=True)

In [10]:
data_output = os.path.join(base_path, "{}.csv".format(algorithm_name))
ppmml.predict(pmml_output, test_data_input, data_output)

INFO: 17-12-28 16:48:42: evaluator.py:62 * 140735235661824 Starting to make predictions of pmml file: /tmp/pmml-models/tensorflow/dnn_classifier.pmml, data_input: /tmp/pmml-models/tensorflow/test.csv, data_output: /tmp/pmml-models/tensorflow/dnn_classifier.csv
INFO: 17-12-28 16:48:43: evaluator.py:80 * 140735235661824 Successfully generate predictions to path: /tmp/pmml-models/tensorflow/dnn_classifier.csv


In [11]:
pd.read_csv(data_output).head(5)

Unnamed: 0,x1,x2,x3,x4,y,_target,probability(0),probability(1),probability(2)
0,5.1,3.5,1.4,0.2,0,1,0.269455,0.4017,0.328845
1,4.9,3.0,1.4,0.2,0,1,0.285334,0.384116,0.33055
2,4.7,3.2,1.3,0.2,0,1,0.276925,0.393353,0.329722
3,4.6,3.1,1.5,0.2,0,1,0.270846,0.400135,0.329019
4,5.0,3.6,1.4,0.2,0,1,0.263683,0.408243,0.328074
