In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_california_housing
import tensorflow.compat.v1 as tf
import numpy as np
tf.disable_eager_execution()

from sklearn.datasets import make_moons
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd

In [34]:
def train_input_fn(features,labels,batch_size):
    display('train_input_fn features ',type(features))
    display('train_input_fn labels',type(labels))
    dataset = tf.data.Dataset.from_tensor_slices((dict(features),labels))
    return dataset.shuffle(1000,seed=42).repeat().batch(batch_size)


In [35]:
def eval_input_fn(features,labels,batch_size):
    features = dict(features)
    if labels is None:
        inputs = features
    else:
        inputs = (features,labels)
    
    dataset = tf.data.Dataset.from_tensor_slices(inputs)
    
    return dataset.batch(batch_size)
    

In [36]:
# create input function
TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
CSV_COLUMN_NAMES = ['SepalLength','SepalWidth',
                   'PetalLength','PetalWidth','Species']
SPECIES = ['Setosa','Versocolor','Virginica']

def load_data():
    train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1],TRAIN_URL)
    test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1],TEST_URL)
    
    train = pd.read_csv(train_path,names=CSV_COLUMN_NAMES,header=0)
    X_train, y_train = train,train.pop('Species')
    
    test = pd.read_csv(test_path,names=CSV_COLUMN_NAMES,header=0)
    X_test, y_test = test,test.pop('Species')
    
    return (X_train,y_train), (X_test,y_test)

(X_train,y_train), (X_test,y_test) = load_data()
print (X_train)

     SepalLength  SepalWidth  PetalLength  PetalWidth
0            6.4         2.8          5.6         2.2
1            5.0         2.3          3.3         1.0
2            4.9         2.5          4.5         1.7
3            4.9         3.1          1.5         0.1
4            5.7         3.8          1.7         0.3
..           ...         ...          ...         ...
115          5.5         2.6          4.4         1.2
116          5.7         3.0          4.2         1.2
117          4.4         2.9          1.4         0.2
118          4.8         3.0          1.4         0.1
119          5.5         2.4          3.7         1.0

[120 rows x 4 columns]


In [37]:
def my_model(features,labels,mode,params):
        """DNN with 2 hidden layers , and dropout of 0.1 probability"""
        input_layer = tf.feature_column.input_layer(features,
                                                   params['feature_columns'])

        hidden1 = tf.layers.dense(input_layer,units=params['n_hidden1'],
                                 activation=tf.nn.relu)
        dropout1 = tf.layers.dropout(inputs=hidden1,rate=0.1, 
                                     training= mode == tf.estimator.ModeKeys.TRAIN)
        hidden2 = tf.layers.dense(dropout1,units=params['n_hidden2'],
                                 activation=tf.nn.relu)
        dropout2 = tf.layers.dropout(inputs=hidden2,rate=0.1, 
                                     training= mode == tf.estimator.ModeKeys.TRAIN)
        
        #compute logits (one per class)
        logits = tf.layers.dense(dropout2,params['n_classes'])
        
        #compute predictions
        predicted_classes = tf.argmax(logits,axis=1)
        
        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                'class_ids' : predicted_classes,
                'probabilities': tf.nn.softmax(logits),
                'logits':logits
            }

            return tf.estimator.EstimatorSpec(mode,predictions=predictions)
    
        
        #compute loss
        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,
                                                      logits=logits)
        
        #compute evaluation metrics
        accuracy = tf.metrics.accuracy(labels=labels,
                                      predictions = predicted_classes,
                                      name='acc_op')
        
        metrics = {'accuracy':accuracy}
        tf.summary.scalar('accuracy',accuracy[1])
        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,loss = loss,
                                             eval_metric_ops=metrics)
        
        optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
        train_op = optimizer.minimize(loss,
                                     global_step = tf.train.get_global_step())
        
        return tf.estimator.EstimatorSpec(mode,loss=loss,train_op=train_op)
    
    
    

In [38]:
config = tf.estimator.RunConfig(tf_random_seed=42)

feature_columns = []
for key in X_train.columns:
    feature_columns.append(tf.feature_column.numeric_column(key=key))

display(feature_columns)
classifier = tf.estimator.Estimator(
    model_fn=my_model,
    params={
        'feature_columns':feature_columns,
        'n_hidden1': 10,
        'n_hidden2': 10,
        'n_classes': 3
    },
    model_dir = 'my_model',
    config=config
)

[NumericColumn(key='SepalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='SepalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='PetalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='PetalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

INFO:tensorflow:Using config: {'_model_dir': 'my_model', '_tf_random_seed': 42, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000020B67785848>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [39]:
batch_size = 100
train_step = 1000

classifier.train(
    input_fn=lambda:train_input_fn(X_train,y_train,batch_size),
    steps=train_step)


'train_input_fn features '

pandas.core.frame.DataFrame

'train_input_fn labels'

pandas.core.series.Series

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from my_model\model.ckpt-5500
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 5500 into my_model\model.ckpt.
INFO:tensorflow:loss = 0.21818577, step = 5500
INFO:tensorflow:global_step/sec: 493.977
INFO:tensorflow:loss = 0.22843137, step = 5600 (0.202 sec)
INFO:tensorflow:global_step/sec: 1033.69
INFO:tensorflow:loss = 0.15421426, step = 5700 (0.097 sec)
INFO:tensorflow:global_step/sec: 1023.13
INFO:tensorflow:loss = 0.2791499, step = 5800 (0.098 sec)
INFO:tensorflow:global_step/sec: 1066.67
INFO:tensorflow:loss = 0.390869, step = 5900 (0.094 sec)
INFO:tensorflow:global_step/sec: 887.321
INFO:tensorflow:loss = 0.302419, step = 6000 (0.114 sec)
INFO:tensorflow:global_step/sec: 954.935
INFO:tensorflow:loss = 0.229864, step = 610

<tensorflow_estimator.python.estimator.estimator.Estimator at 0x20b676f3f08>

In [23]:
eval_result = classifier.evaluate(
    input_fn=lambda:eval_input_fn(X_test,y_test,batch_size))

print('eval_result',eval_result)
print('eval_result',eval_result['accuracy'])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-02-09T11:13:04Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from my_model\model.ckpt-3000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2020-02-09-11:13:04
INFO:tensorflow:Saving dict for global step 3000: accuracy = 0.96666664, global_step = 3000, loss = 0.14257695
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 3000: my_model\model.ckpt-3000
eval_result {'accuracy': 0.96666664, 'loss': 0.14257695, 'global_step': 3000}
eval_result 0.96666664


In [24]:
#generate predictions from the model
X_pred = {'SepalLength': [5.1,5.9,6.9],
          'SepalWidth': [3.3,3.0,3.1],
          'PetalLength': [1.7,4.2,5.4],
          'PetalWidth': [0.5,1.5,2.1]}

expected = ['Setosa','Versocolor','Virginica']

predictions = classifier.predict(
    input_fn=lambda:eval_input_fn(X_pred,
                                 labels = None,
                                 batch_size = batch_size))

template = ( 'Prediction is "{}" ({:.1f}%), expected "{}"')
            
for pred_dict,label in zip(predictions,expected):
    class_id = pred_dict['class_ids']
    prob = pred_dict['probabilities'][class_id]
    
    print(template.format(SPECIES[class_id],
                         100 * prob , label))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from my_model\model.ckpt-3000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Prediction is "Setosa" (100.0%), expected "Setosa"
Prediction is "Versocolor" (95.1%), expected "Versocolor"
Prediction is "Virginica" (99.8%), expected "Virginica"
