In [1]:
# Shows how to use tf.metrics.accuracy(..), tf.metrics.precision(..), and 
# tf.metrics.recall functions. And at the end,F1 score is calculated for
# both train and test sets.

# IMPORTANT NOTES:
# 1) Since the same tensor is used for accuracy/precision/recall calculation for
#    both train and test sets, you need to reset local variables in each tf.metrics
#    tensor before using it for - say - test set after using it for train set.
#    Otherwise, your - say - test results will be a combination of train and test results
# 2) You could have used the default tf local variable initializer. But it initializes all
#    local variables. It does not heart to use it in the beginning but before re-using 
#    the tf.metric function, you just need to reset the corresponding local variables
# 3) Note that tf.metrics.accuracy(..) function works fine with multi class scenarios.
#    However, precision and recall functions are not working when there are more than 2 classes.


In [2]:
import tensorflow as tf

In [3]:
# ASSUMPTIONS: (Otherwise, decode_csv function needs update)
# 1) The first column is NOT a feature. (It is most probably a training example ID or similar)
# 2) The last column is always the label. And there is ONLY 1 column that represents the label.
#    If more than 1 column represents the label, decode_csv() function needs update 
# 3) The first row is assumed to include names of the data types (i.e. feature name, label, etc.) 
#    so it is skipped

# UPDATE record_default IN EACH PROJECT (depending on default values for each column)
# Determine default values for each column in case data is missing
record_defaults = [[""], [0.0], [0.0], [0.0], [0.0], [0.0], [0]]

def decode_csv(line):
    parsed_line = tf.decode_csv(line, record_defaults)
    label = parsed_line[-1:]          # last column is label
    del parsed_line[-1]               # delete the last element from the list   (label column)
    del parsed_line[0]                # even delete the first element bcz it is assumed NOT to be a feature
    features = tf.stack(parsed_line)  # Stack features so that you can later vectorize forward prop., etc.
    label = tf.stack(label)           # Needed bcz labels consist of 2 columns
    batch_to_return = features, label

    return batch_to_return


In [4]:
# Note: For simplicity, train.csv is used as both train and test set data.

def validation(train_input_paths, minibatch_size, num_classes):
    
    with tf.name_scope("read_next_train_batch"):
        filenames = tf.placeholder(tf.string, shape=[None])
        dataset = tf.data.Dataset.from_tensor_slices(filenames)
        dataset = dataset.flat_map(lambda filename: tf.data.TextLineDataset(filename).skip(1).map(decode_csv))
        dataset = dataset.batch(minibatch_size)
        iterator = dataset.make_initializable_iterator()
        next_element = iterator.get_next()

    # Placeholders to take in batches of data
    tf_labels = tf.placeholder(dtype=tf.int64, shape=[minibatch_size, 1])
    tf_predictions = tf.placeholder(dtype=tf.int64, shape=[minibatch_size, 1])        

    # IMPORTANT: Never run both return tensors below at the same time as sess.run([accuracy, accuracy_update])
    # ALWAYS run them separately as shown below
    with tf.name_scope("metric_accuracy"):
        # Define the metric and update operations
        accuracy, accuracy_update = tf.metrics.accuracy(tf_labels,
                                                        tf_predictions)

    with tf.name_scope("metric_precision"):
        # Define the metric and update operations
        #precision, precision_update = tf.metrics.precision(tf_labels,
        #                                                   tf_predictions)
        precision, precision_update = tf.metrics.precision(tf_labels,
                                                           tf_predictions)

    with tf.name_scope("metric_recall"):
        # Define the metric and update operations
        recall, recall_update = tf.metrics.recall(tf_labels,
                                                  tf_predictions)        
   
    with tf.name_scope("reset_metric_variables"):
        # Isolate the variables stored behind the scenes by the metric operation
        accuracy_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metric_accuracy")
        precision_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metric_precision")
        recall_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metric_recall")
        metrics_var_list = accuracy_vars + precision_vars + recall_vars
        # Define initializer to initialize/reset running variables
        running_vars_initializer = tf.variables_initializer(var_list=metrics_var_list)        
        
    init_local_var = tf.local_variables_initializer()   
    
    # Assume that following values are our predictions from our model
    train_predictions = [[[0], [0], [0], [1], [1], [1]],
                         [[1], [1], [1], [1], [1], [1]],
                         [[1], [1], [1], [1], [1], [1]]]

    # Assume that following values are our predictions from our model
    test_predictions = [[[0], [0], [0], [0], [0], [0]],
                        [[1], [1], [1], [1], [1], [1]],
                        [[1], [1], [1], [1], [1], [1]]]    
    
    with tf.Session() as sess:
        sess.run(init_local_var)
        sess.run(iterator.initializer, feed_dict={filenames: train_input_paths})
        
        i = 0
        while True:
            try:
              features, labels = sess.run(next_element)
              print("labels:\n", labels)
              print("labels.shape: ", labels.shape, "\n")
            
              print("train_predictions[%d]: " % i, train_predictions[i], "\n")
            
              # Update the accuracy for the current batch. The function will anyway
              # update it based on previous batches as well.
              sess.run([accuracy_update, precision_update, recall_update], 
                       feed_dict={tf_labels: labels, tf_predictions: train_predictions[i]})
              i += 1
            except tf.errors.OutOfRangeError:
              print("All data processed.\n")
              break
        
        # Now get the accuracy, precision, and recall values
        acc, pre, rec = sess.run([accuracy, precision, recall])
        print("accuracy: ", acc, "   precision: ", pre, "   recall: ", rec)
        
        f1_score = (2 * pre * rec) / (pre + rec)
        print("Train - F1 Score: ", f1_score, "\n\n#######\n")
            
        # Before calculating accuracy on test set, reset local variables in your metric_accuracy.
        # If not done, the test accuracy result will be the combined result of train + test accuracy results
        sess.run(running_vars_initializer)    
        
        sess.run(iterator.initializer, feed_dict={filenames: train_input_paths})
        i = 0
        while True:
            try:
              features, labels = sess.run(next_element)
              print("labels:\n", labels)
              print("labels.shape: ", labels.shape, "\n")
            
              print("test_predictions[%d]: " % i, test_predictions[i], "\n")
            
              # Update the accuracy based on the existing batch
              sess.run([accuracy_update, precision_update, recall_update], 
                       feed_dict={tf_labels: labels, tf_predictions: test_predictions[i]})
              i += 1
            except tf.errors.OutOfRangeError:
              print("All data processed.\n")
              break

        # Now get the accuracy, precision, and recall values
        acc, pre, rec = sess.run([accuracy, precision, recall])
        print("accuracy: ", acc, "   precision: ", pre, "   recall: ", rec)  
        
        f1_score = (2 * pre * rec) / (pre + rec)
        print("Test - F1 Score: ", f1_score, "\n\n#######\n")

In [5]:
train_input_paths = ["train1_with_2_label_classes.csv"]

minibatch_size = 6
num_classes = 2

validation(train_input_paths, minibatch_size, num_classes)

labels:
 [[0]
 [0]
 [0]
 [0]
 [0]
 [0]]
labels.shape:  (6, 1) 

train_predictions[0]:  [[0], [0], [0], [1], [1], [1]] 

labels:
 [[1]
 [1]
 [1]
 [1]
 [1]
 [1]]
labels.shape:  (6, 1) 

train_predictions[1]:  [[1], [1], [1], [1], [1], [1]] 

labels:
 [[1]
 [1]
 [1]
 [1]
 [1]
 [1]]
labels.shape:  (6, 1) 

train_predictions[2]:  [[1], [1], [1], [1], [1], [1]] 

All data processed.

accuracy:  0.8333333    precision:  0.8    recall:  1.0
Train - F1 Score:  0.8888889256818805 

#######

labels:
 [[0]
 [0]
 [0]
 [0]
 [0]
 [0]]
labels.shape:  (6, 1) 

test_predictions[0]:  [[0], [0], [0], [0], [0], [0]] 

labels:
 [[1]
 [1]
 [1]
 [1]
 [1]
 [1]]
labels.shape:  (6, 1) 

test_predictions[1]:  [[1], [1], [1], [1], [1], [1]] 

labels:
 [[1]
 [1]
 [1]
 [1]
 [1]
 [1]]
labels.shape:  (6, 1) 

test_predictions[2]:  [[1], [1], [1], [1], [1], [1]] 

All data processed.

accuracy:  1.0    precision:  1.0    recall:  1.0
Test - F1 Score:  1.0 

#######



In [1]:
www = [[0,0,0,1,2,1],
       [1,1,1,2,2,0],
       [2,2,2,1,1,1]]

print(www[0])

[0, 0, 0, 1, 2, 1]


In [72]:
a = [1,1,1]
b = [2,2,2]

print(a+b)

[1, 1, 1, 2, 2, 2]
