In [1]:
# Classification

# Regression was used to predict a numeric value, 
# classification is used to seperate data points 
# into classes of different labels.

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt


In [3]:
# Example from TensorFlow tutorials: Classify Flowers

#DataSet

# 3 flower class: Setosa, Versicolor, Virginica
# 4 member utils for each class: Sepal Length, Sepal Width, Petal Length, Petal Width

In [4]:
FL_UTILS = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']
FL_CLASS = ['Setosa', 'Versicolor', 'Virginica']

# From remote origin, use following:
#trainData = tf.keras.utils.get_file('iris_training.csv', "https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv")
#testData = tf.keras.utils.get_file('iris_test.csv', "https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv")

rawData = pd.read_csv('assets/iris_training.csv', names=FL_UTILS, header=0)
testData = pd.read_csv('assets/iris_test.csv', names=FL_UTILS, header=0)

In [5]:
rawData.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Species
0,6.4,2.8,5.6,2.2,2
1,5.0,2.3,3.3,1.0,1
2,4.9,2.5,4.5,1.7,2
3,4.9,3.1,1.5,0.1,0
4,5.7,3.8,1.7,0.3,0


In [6]:
trainMe = rawData.pop("Species")
testMe = testData.pop("Species")


In [7]:
rawData.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth
0,6.4,2.8,5.6,2.2
1,5.0,2.3,3.3,1.0
2,4.9,2.5,4.5,1.7
3,4.9,3.1,1.5,0.1
4,5.7,3.8,1.7,0.3


In [8]:
rawData.shape

(120, 4)

In [9]:
# Input Function

def inputFunction(features, labels, training = True, batchSize = 256 ):
    # Creating dataset from inputs.
    dataSet = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    
    if training:
        dataSet = dataSet.shuffle(1000).repeat()
            
    return dataSet.batch(batchSize)  
     

In [10]:
# Column-Feature Mappings

# Since there are only numeric data:
mapped = []
for key in rawData.keys():
    mapped.append(tf.feature_column.numeric_column(key=key))
print(mapped)

[NumericColumn(key='SepalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='SepalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='PetalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='PetalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]


In [11]:
# The Model

# Using DNNClassifier (LinearClassifier can be used as well)
# DNNClassifier doc: https://www.tensorflow.org/api_docs/python/tf/estimator/DNNClassifier

classifierWorker = tf.estimator.DNNClassifier(feature_columns=mapped,
                                     # [30]->[10] inner nodes, 2 layers
                                       hidden_units = [60, 20],
                                    # 3 output classes (there are 3 flower species)
                                       n_classes=3)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\oezen\\AppData\\Local\\Temp\\tmpw51ycj4s', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [12]:
# Lambda expression, instead of adding inner function into input function.
classifierWorker.train(
    input_fn = lambda: inputFunction(rawData, trainMe, training=True), 
    steps=5000)

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\oezen\AppData\Local\Temp\tmpw51ycj4s\model.ckpt.
INFO:tensorflow:C:\Users\oezen\AppData\Local\Temp\tmpw51ycj4s\model.ckpt-0.data-00000-of-00001
INFO:tensorflow:0
INFO:tensorflow:C:\Users\oezen\AppData\Local\Temp\tmpw51ycj4s\model.ckpt-0.index
INFO:tensorflow:0
INFO:tensorflow:C:\Users\oezen\AppData\Local\Temp\tmpw51ycj4s\model.ckpt-0.meta
INFO:tensorflow:100
INF

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x20c71de5730>

In [13]:
# Evaluating results

eResult = classifierWorker.evaluate(input_fn = lambda: inputFunction(testData, testMe, training=False))

print('\n (TEST) Accuracy: {accuracy:0.5f}\n'.format(**eResult))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2022-06-19T13:52:43
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\oezen\AppData\Local\Temp\tmpw51ycj4s\model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 1.14048s
INFO:tensorflow:Finished evaluation at 2022-06-19-13:52:44
INFO:tensorflow:Saving dict for global step 5000: accuracy = 0.93333334, average_loss = 0.40924343, global_step = 5000, loss = 0.40924343
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 5000: C:\Users\oezen\AppData\Local\Temp\tmpw51ycj4s\model.ckpt-5000

 (TEST) Accuracy: 0.93333



In [14]:
dicPredict = list(classifierWorker.predict(input_fn = lambda:inputFunction(testData, testMe, training=False)))
#print(dicPredict)
probs = pd.Series([pre['probabilities'][1] for pre in dicPredict])
print(dicPredict[2]['probabilities'])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\oezen\AppData\Local\Temp\tmpw51ycj4s\model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
[0.9139417  0.07729208 0.00876625]


In [15]:
# General, user input function that gets data from user and predict the kind:

def PredictMe(inputClassifier):   

    def customInput(features, batchSize=256):
         return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batchSize)

    predict = {} # Empity dict, to be mapped.

    print('Please give only numeric values.\n')
    isValid = 0
    while isValid<len(FL_UTILS)-1:
        for util in FL_UTILS[:-1]:    
            value = input(util + ': ')
            predict[util] = [float(value)]
            isValid +=1            

    predictOutput = inputClassifier.predict(input_fn = lambda: customInput(predict))
    for prediction in predictOutput:
        classId = prediction['class_ids'][0]
        probability = prediction['probabilities'][classId]
        print('\nPrediction for given data is "{}" ({:.3f}%)'.format(FL_CLASS[classId], 100*probability))

In [16]:
# Testing the user input function that postprocess the data.
PredictMe(classifierWorker)

Please give only numeric values.

SepalLength: 4
SepalWidth: 4
PetalLength: 4
PetalWidth: 4
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\oezen\AppData\Local\Temp\tmpw51ycj4s\model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.

Prediction for given data is "Virginica" (62.758%)
