# TensorFlow Estimator

TensorFlow has an estimator object you can use to quickly create models without needing to manually define the Graph as done for the MNIST project

## Esimator Steps
1. Read in Data (normalize if necessary)
2. Train/Test split the data
3. Create Estimator Feature Columns
4. Create Input Estimator Function
5. Train Estimator Model
6. Predict with new Test Input Function

In [42]:
import pandas as pd

In [43]:
df = pd.read_csv('iris.csv')
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0.0
1,4.9,3.0,1.4,0.2,0.0
2,4.7,3.2,1.3,0.2,0.0
3,4.6,3.1,1.5,0.2,0.0
4,5.0,3.6,1.4,0.2,0.0


In [44]:
df.columns

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)', 'target'],
      dtype='object')

In [45]:
df.columns = ['sepal_length', 'sepal_width', 'petal_length',
       'petal_width', 'target']

In [46]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
0,5.1,3.5,1.4,0.2,0.0
1,4.9,3.0,1.4,0.2,0.0
2,4.7,3.2,1.3,0.2,0.0
3,4.6,3.1,1.5,0.2,0.0
4,5.0,3.6,1.4,0.2,0.0


In [47]:
# We need to change the values in target to be integers because it is binary
# for yes and no, not a float
df['target'] = df['target'].apply(int)
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [48]:
y = df['target']
X = df.drop('target',axis=1) #Axis of 1 means columns, while axis of 0 means rows.

In [49]:
from sklearn.model_selection import train_test_split

In [50]:
# random_state = 42 is a random number. 42 makes reference to the ultimate question from 
# Hitch Hiker's Guide To The Galaxy

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [51]:
import tensorflow as tf

In [52]:
X.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], dtype='object')

In [53]:
feat_cols = []

# FEATURE COLUMNS. There are different kinds, we want to use numeric column in this case

for col in X.columns:
    feat_cols.append(tf.feature_column.numeric_column(col))

In [54]:
feat_cols

[_NumericColumn(key='sepal_length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='sepal_width', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='petal_length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='petal_width', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [55]:
# If you get a bunch of predictions of 0 or none, that would be an indicator of errors and 
# a smaller batch size my help fix that error.
# num_epochs is how many times the training data was gone through. an epoch of 5 means it was 
# passed through 5 times.
# shuffle is used when the dataset is sorted, train_test_split does a shuffle by default as well
input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,batch_size=10,num_epochs=5,shuffle=True)

In [56]:
# DNN stands for Deep Neural Network
# hidden_units tells how many neurons you want in each layer. 10,20,10 is overkill for our app
# n_classes tells us how many options there are
classifier = tf.estimator.DNNClassifier(hidden_units=[10, 20, 10], n_classes=3,feature_columns=feat_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_evaluation_master': '', '_tf_random_seed': None, '_train_distribute': None, '_model_dir': 'C:\\Users\\ve032674\\AppData\\Local\\Temp\\tmpnsuhfztn', '_task_id': 0, '_num_ps_replicas': 0, '_save_summary_steps': 100, '_service': None, '_save_checkpoints_secs': 600, '_keep_checkpoint_every_n_hours': 10000, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000000012C87518>, '_master': '', '_is_chief': True, '_device_fn': None, '_log_step_count_steps': 100, '_task_type': 'worker', '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_num_worker_replicas': 1, '_global_id_in_cluster': 0}


In [57]:
classifier.train(input_fn=input_func,steps=50)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\ve032674\AppData\Local\Temp\tmpnsuhfztn\model.ckpt.
INFO:tensorflow:loss = 13.7598095, step = 1
INFO:tensorflow:Saving checkpoints for 50 into C:\Users\ve032674\AppData\Local\Temp\tmpnsuhfztn\model.ckpt.
INFO:tensorflow:Loss for final step: 1.7712499.


<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x12c87630>

In [60]:
pred_fn = tf.estimator.inputs.pandas_input_fn(x=X_test,batch_size=len(X_test),shuffle=False)

In [64]:
predictions = list(classifier.predict(input_fn=pred_fn))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\ve032674\AppData\Local\Temp\tmpnsuhfztn\model.ckpt-50
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [65]:
predictions

[{'class_ids': array([1], dtype=int64),
  'classes': array([b'1'], dtype=object),
  'logits': array([-2.283909 ,  1.5871568,  1.08097  ], dtype=float32),
  'probabilities': array([0.0128331, 0.6159054, 0.3712615], dtype=float32)},
 {'class_ids': array([0], dtype=int64),
  'classes': array([b'0'], dtype=object),
  'logits': array([ 5.0017056, -0.296674 , -3.4081779], dtype=float32),
  'probabilities': array([9.9480480e-01, 4.9737128e-03, 2.2149905e-04], dtype=float32)},
 {'class_ids': array([2], dtype=int64),
  'classes': array([b'2'], dtype=object),
  'logits': array([-4.873213  ,  0.17805958,  3.7016768 ], dtype=float32),
  'probabilities': array([1.8334526e-04, 2.8642414e-02, 9.7117424e-01], dtype=float32)},
 {'class_ids': array([1], dtype=int64),
  'classes': array([b'1'], dtype=object),
  'logits': array([-2.467533 ,  1.3988475,  1.3069134], dtype=float32),
  'probabilities': array([0.01082924, 0.517304  , 0.47186676], dtype=float32)},
 {'class_ids': array([1], dtype=int64),
  'cla

In [66]:
final_preds = []

for pred in predictions:
    final_preds.append(pred['class_ids'][0])

In [72]:
final_preds[:10]

[1, 0, 2, 1, 1, 0, 1, 2, 2, 1]

In [73]:
from sklearn.metrics import classification_report,confusion_matrix

In [74]:
print(confusion_matrix(y_test,final_preds))

[[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]


In [75]:
print(classification_report(y_test,final_preds))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.92      0.96        13
           2       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted avg       0.98      0.98      0.98        45

