# Classification

In [1]:
# Instead of predicting a numeric value, we can predict classes. 

In [2]:
# Use different properties of flowers to predict species of flower.

In [3]:
import tensorflow as tf
import pandas as pd

## Dataset

### This dataset separates flowers into three different species.
- Setosa
- Versicolor
- Virginica

### The information about each flower is...
- Sepal Length
- Sepal Width
- Petal Length
- Petal Width

In [58]:
CSV_COLUMN_NAMES = ['sepalLength','sepalWidth','petalLength','petalWidth','species']
SPECIES = ['Setosa','Versicolor','Virginica']

In [24]:
train_path=tf.keras.utils.get_file("iris_training.csv","https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv")
test_path = tf.keras.utils.get_file("iris_test.csv","https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv")

train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0) # create/name data frame for training
test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0) # create/name data frame for testing


### Let's look at our data...

In [25]:
train.head() # view first 5 rows of train dataframe

Unnamed: 0,sepalLength,sepalWidth,petalLength,petalWidth,species
0,6.4,2.8,5.6,2.2,2
1,5.0,2.3,3.3,1.0,1
2,4.9,2.5,4.5,1.7,2
3,4.9,3.1,1.5,0.1,0
4,5.7,3.8,1.7,0.3,0


In [26]:
test.head() # view first 5 rows of test  dataframe

Unnamed: 0,sepalLength,sepalWidth,petalLength,petalWidth,species
0,5.9,3.0,4.2,1.5,1
1,6.9,3.1,5.4,2.1,2
2,5.1,3.3,1.7,0.5,0
3,6.0,3.4,4.5,1.6,1
4,5.5,2.5,4.0,1.3,1


In [27]:
train_y = train.pop('species') # drop species from df-- they are showing up numerically when we want categorical values
test_y = test.pop('species') # drop species from df-- they are showing up numerically when we want categorical values

In [28]:
train.shape # 120 rows, 4 columns

(120, 4)

In [31]:
test.shape # 30 rows, 4 columns

(30, 4)

### Input Function

In [34]:
def input_fn(features, labels, training=True, batch_size=256):
    # Convert the inputs to a dataset
    dataset = tf.data.Dataset.from_tensor_slices((dict(features),labels))
    if training: 
        dataset = dataset.shuffle(1000).repeat()
    return dataset.batch(batch_size)

### Feature Columns

In [36]:
# feature columns help describe how to use the input. 
my_feature_columns = []
for key in train.keys():
    my_feature_columns.append(tf.feature_column.numeric_column(key=key))

### Build Model

##### Multiple estimators/models exist that we can pick from for classifcation tasks. These include Deep Neural Networks and Linear Classifers. According to the TF website, DNN Classifer is the best choice for our task.

In [38]:
# Build a DNN with 2 hidden layers with 30 and 10 hidden nodes respectively.
classifier = tf.estimator.DNNClassifier(
    feature_columns = my_feature_columns,
    hidden_units=[30, 10], # 30 hidden notes in first layer, 10 in second.
    n_classes = 3) # model chooses between 3 classes.                          

Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\Nate\\AppData\\Local\\Temp\\tmp4a8j38my', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_ta

### Train the Model

In [40]:
# lambda is an anonymous function written in one line. Whatever is after the colun is a funciton.

classifier.train(
    input_fn = lambda: input_fn(train, train_y, training=True), steps = 5000) # steps is like epochs, but rather that 5000 times, its going to look at 5000 points of data.

# the lower the losss, the better the model

Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.keras instead.
INFO:tensorflow:Done calling model_fn.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
INFO:tensorflow:Create CheckpointSaverHook.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving chec

INFO:tensorflow:global_step/sec: 548.246
INFO:tensorflow:loss = 0.46576518, step = 3600 (0.186 sec)
INFO:tensorflow:global_step/sec: 534.624
INFO:tensorflow:loss = 0.4552077, step = 3700 (0.185 sec)
INFO:tensorflow:global_step/sec: 567.687
INFO:tensorflow:loss = 0.46134, step = 3800 (0.176 sec)
INFO:tensorflow:global_step/sec: 585.828
INFO:tensorflow:loss = 0.45778757, step = 3900 (0.173 sec)
INFO:tensorflow:global_step/sec: 541.096
INFO:tensorflow:loss = 0.4638439, step = 4000 (0.182 sec)
INFO:tensorflow:global_step/sec: 583.412
INFO:tensorflow:loss = 0.45007378, step = 4100 (0.172 sec)
INFO:tensorflow:global_step/sec: 482.543
INFO:tensorflow:loss = 0.4499638, step = 4200 (0.207 sec)
INFO:tensorflow:global_step/sec: 529.959
INFO:tensorflow:loss = 0.45340702, step = 4300 (0.188 sec)
INFO:tensorflow:global_step/sec: 574.691
INFO:tensorflow:loss = 0.4361281, step = 4400 (0.174 sec)
INFO:tensorflow:global_step/sec: 554.712
INFO:tensorflow:loss = 0.43923128, step = 4500 (0.182 sec)
INFO:te

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x237e4448670>

### Evaluate the Model

In [46]:
eval_result = classifier.evaluate(input_fn = lambda: input_fn(test, test_y, training=False))
print('n\Test set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2023-06-05T16:33:20
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\Nate\AppData\Local\Temp\tmp4a8j38my\model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.50566s
INFO:tensorflow:Finished evaluation at 2023-06-05-16:33:21
INFO:tensorflow:Saving dict for global step 5000: accuracy = 0.9, average_loss = 0.48305714, global_step = 5000, loss = 0.48305714
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 5000: C:\Users\Nate\AppData\Local\Temp\tmp4a8j38my\model.ckpt-5000
n\Test set accuracy: 0.900



### Predictions Using Model

In [None]:
def input_fn(features, batch_size=256):
    return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size)

features = ['sepalLength','sepalWidth','petalLength','petalWidth']
predict = {}

print("Please type numeric values as prompted.")
for feature in features:
    valid = True
    while valid:
        val = input(feature + ": ")
        if not val.isdigit(): valid=False
            
    
    predict[feature] = [float(val)] # even if we only want to predict one value, we still need to build list due to TF setup.
    

predictions = classifier.predict(input_fn = lambda: input_fn(predict))
for pred_dict in predictions:
    class_id = pred_dict['class_ids'][0] # 
    probability = pred_dict['probabilities'][class_id]
    
    print('Prediction is "{}" ({:.1f}%)'.format(
        SPECIES[class_id], 100 * probability))
        
        

# Input and Expected Classes - Plug in these values to see if the model correctly predicts their species!

expected = ['Setosa','Versicolor','Virginica']
predict_x = {
    'sepalLength': [5.1, 5.9, 6.9],
    'sepalWidth': [3.3, 3.0, 3.1],
    'petalLength':[1.7,4.2,5.4],
    'petalWidth':[0.5,1.5,2.1], }


Please type numeric values as prompted.
