# Tensorflow model
Continuation of the last prototypes. We will try to input the data using the DAtaset api As a start we will be using the cleaneval dataset because it's small and we'll get lower training times.

In [1]:
%matplotlib inline
# standard library
import itertools
import sys, os
import re
import glob

from collections import OrderedDict
from urllib.parse import urlparse

# pandas
import pandas as pd
import dask.dataframe as dd
import dask.array as da
import dask


# numpy, matplotlib, seaborn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


# tesnsorflow
import tensorflow as tf

# scikit
from sklearn.metrics import precision_recall_fscore_support, classification_report

from tensorflow.contrib.hooks import ProfilerHook
# local imports
sys.path.append(os.path.join(os.getcwd(), "../src"))

from tf_utils import make_csv_pipeline, make_csv_col_tensors, csv_dataset, csv_to_tf_types, tfrecord_dataset

# this styling is purely my preference
# less chartjunk
sns.set_context('notebook', font_scale=1.5, rc={'line.linewidth': 2.5})
sns.set(style='ticks', palette='Set2')

## CSV dataset

In [2]:
def build_fn(csv_pattern):
    # the entire graph has to be built here
    ddf = dd.read_csv(csv_pattern)

    # add the weight column based on proportions
    label_proportion = ddf['content_label'].mean().compute()
    def add_weights(tens_dict, label_tens):
        # defien the constants
        positive_label_val = tf.constant(1.0)
        positive_proportion = tf.constant(0.5 / label_proportion, shape=())
        negative_proportion = tf.constant(0.5 / (1-label_proportion), shape=())
        
        # the weights are added as a conditional based on the corresponding label
        weight_tens = tf.where(tf.equal(label_tens, positive_label_val), 
                               positive_proportion, 
                               negative_proportion)
        tens_dict['weights'] = weight_tens
        return tens_dict, label_tens
        
    def drop_strings(tens_dict, label):
        # drops the string columns
        return {k: v for k,v in tens_dict.items() if k not in ['url', 'path']}, label
    
    # must drop the nonnumeric cols as dask does not support numeric_only
#     means = ddf.drop(['url', 'path', 'content_label'], axis=1).mean().compute() 
#     scale = ddf.drop(['url', 'path', 'content_label'], axis=1).std().compute()  # the scale
    
    def normalize_features(tens_dict, label):
        # normalize values for faster convergence
        return {k: (tf.to_float(v) -  tf.constant(means[k], dtype=tf.float32)) / tf.constant(scale[k], dtype=tf.float32)
                for k, v in tens_dict.items()}, label
        
    def concat_features(tens_dict, label):
        # concatenate everything but the weights into one big tensor
        weights = tens_dict.pop('weights')
        result = {'weights': weights, 'X': tf.stack([tf.to_float(tens) for tens in tens_dict.values()])}, label
        return result

    def drop_weights(tens_dict, label):
        # just for debug, to see fi weights are not somehow considered by evaluation
        tens_dict.pop('weights')
        return tens_dict, label
    
    # the function to return
    def input_fn(keep_weights=True, num_epochs=1000, batch_size=100, shuffle=True, num_parallel_calls=16):
        # build the input pipeline 
        dataset = csv_dataset(csv_pattern, 'content_label', num_parallel_calls=num_parallel_calls)  # decode the csv
        dataset = dataset.map(drop_strings, num_parallel_calls=num_parallel_calls)  # drop redundants
#         dataset = dataset.map(normalize_features, num_parallel_calls=num_parallel_calls)  # mean, std normalization
        dataset = dataset.map(add_weights, num_parallel_calls=num_parallel_calls)  # add weight col
      
        if not keep_weights:
            # dropping the weights if neccesary
            dataset.map(drop_weights, num_parallel_calls=num_parallel_calls)
        
        dataset = dataset.map(concat_features, num_parallel_calls=num_parallel_calls).cache()  # concatenate the features
        
        # shuffle the input
        if shuffle:
            dataset = dataset.shuffle(buffer_size=100000)  
        
        # batch, repeate, iterate
        dataset = dataset.batch(batch_size)
        dataset = dataset.repeat(num_epochs)
        
        # return the iterator, must be returned from here
        # so that the graph is built upon call
        iterator = dataset.make_one_shot_iterator()
        features, labels = iterator.get_next()
        return features, labels
    
    # return the function
    return input_fn

In [3]:
# call the higher order function to return the input functions
train_set_fn = build_fn('../data/final/cleaneval/dom-full-train-*.csv')
validation_set_fn = build_fn('../data/final/cleaneval/dom-full-validation-*.csv')
test_set_fn = build_fn('../data/final/cleaneval/dom-full-test-*.csv')

# partial calls 
feat_col_fn = lambda: train_set_fn(num_epochs=1, batch_size=1, shuffle=False)
train_fn = lambda: train_set_fn(batch_size=2000, num_epochs=1000)
validation_fn = lambda: validation_set_fn(batch_size=2000, num_epochs=1, shuffle=False, keep_weights=False)
test_fn =  lambda: test_set_fn(batch_size=2000, num_epochs=1, shuffle=False, keep_weights=False)

In [4]:
# session workaround
sess_config = tf.ConfigProto()
sess_config.gpu_options.allow_growth = 1

config = tf.estimator.RunConfig(
    model_dir='/home/nikitautiu/model_dir2',
    save_checkpoints_steps=1000, 
    session_config=sess_config, 
)


# we can infer the columns from the function, a bit costly, but better than hardcoding
tf_feat_cols = tf.contrib.learn.infer_real_valued_columns_from_input_fn(feat_col_fn)
estimator = tf.estimator.DNNClassifier(feature_columns=tf_feat_cols, hidden_units=(1000, 500, 200), 
                                        weight_column='weigha', model_dir='/home/nikitautiu/model_dir2', config=config)


INFO:tensorflow:Using config: {'_master': '', '_num_worker_replicas': 1, '_tf_random_seed': None, '_task_id': 0, '_model_dir': '/home/nikitautiu/model_dir2', '_num_ps_replicas': 0, '_keep_checkpoint_every_n_hours': 10000, '_save_summary_steps': 100, '_session_config': gpu_options {
  allow_growth: true
}
, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f71b81eb828>, '_log_step_count_steps': 100, '_save_checkpoints_steps': 1000, '_task_type': 'worker', '_save_checkpoints_secs': None, '_service': None, '_keep_checkpoint_max': 5}


## Try the Experiment interface

In [5]:
# intialize the experiment, save evry snapshot
experiment = tf.contrib.learn.Experiment(
    estimator=estimator, train_input_fn=train_fn, eval_input_fn=validation_fn
)

In [8]:
experiment.train_and_evaluate()

INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-08-17:40:08
INFO:tensorflow:Saving dict for global step 22001: accuracy = 0.824425, accuracy_baseline = 0.5, auc = 0.899766, auc_precision_recall = 0.894141, average_loss = 0.569453, global_step = 22001, label/mean = 0.5, loss = 1117.01, prediction/mean = 0.448209
INFO:tensorflow:Validation (step 22001): label/mean = 0.5, auc = 0.899766, auc_precision_recall = 0.894141, loss = 1117.01, accuracy_baseline = 0.5, accuracy = 0.824425, average_loss = 0.569453, prediction/mean = 0.448209, global_step = 22001
INFO:tensorflow:step = 22001, loss = 428.501 (84.649 sec)
INFO:tensorflow:global_step/sec: 1.19136
INFO:tensorflow:step = 22101, loss = 385.265 (5.734 sec)
INFO:tensorflow:global_step/sec: 17.3694
INFO:tensorflow:step = 22201, loss = 460.016 (5.757 sec)
INFO:tensorflow:global_step/sec: 17.319
INFO:tensorflow:step = 22301, loss = 450.007 (5.773 sec)
INFO:tensorflow:global_step/sec: 17.3428
INFO:tensorflow:s

INFO:tensorflow:Validation (step 25001): label/mean = 0.5, auc = 0.894368, auc_precision_recall = 0.898644, loss = 1669.83, accuracy_baseline = 0.5, accuracy = 0.79209, average_loss = 0.851281, prediction/mean = 0.353291, global_step = 25001
INFO:tensorflow:step = 25001, loss = 605.571 (84.554 sec)
INFO:tensorflow:global_step/sec: 1.19286
INFO:tensorflow:step = 25101, loss = 468.118 (5.657 sec)
INFO:tensorflow:global_step/sec: 17.3504
INFO:tensorflow:step = 25201, loss = 341.309 (5.765 sec)
INFO:tensorflow:global_step/sec: 17.0641
INFO:tensorflow:step = 25301, loss = 359.148 (5.864 sec)
INFO:tensorflow:global_step/sec: 17.0676
INFO:tensorflow:step = 25401, loss = 392.14 (5.856 sec)
INFO:tensorflow:global_step/sec: 16.9688
INFO:tensorflow:step = 25501, loss = 375.842 (5.892 sec)
INFO:tensorflow:global_step/sec: 17.3133
INFO:tensorflow:step = 25601, loss = 514.275 (5.776 sec)
INFO:tensorflow:global_step/sec: 17.198
INFO:tensorflow:step = 25701, loss = 349.702 (5.818 sec)
INFO:tensorflow:

INFO:tensorflow:step = 28001, loss = 295.183 (84.909 sec)
INFO:tensorflow:global_step/sec: 1.18766
INFO:tensorflow:step = 28101, loss = 446.481 (5.623 sec)
INFO:tensorflow:global_step/sec: 17.2844
INFO:tensorflow:step = 28201, loss = 287.328 (5.785 sec)
INFO:tensorflow:global_step/sec: 17.3936
INFO:tensorflow:step = 28301, loss = 300.808 (5.749 sec)
INFO:tensorflow:global_step/sec: 17.5643
INFO:tensorflow:step = 28401, loss = 263.833 (5.694 sec)
INFO:tensorflow:global_step/sec: 17.17
INFO:tensorflow:step = 28501, loss = 550.417 (5.824 sec)
INFO:tensorflow:global_step/sec: 17.3089
INFO:tensorflow:step = 28601, loss = 277.749 (5.779 sec)
INFO:tensorflow:global_step/sec: 17.1883
INFO:tensorflow:step = 28701, loss = 406.259 (5.818 sec)
INFO:tensorflow:global_step/sec: 17.4769
INFO:tensorflow:step = 28801, loss = 349.831 (5.720 sec)
INFO:tensorflow:global_step/sec: 17.5932
INFO:tensorflow:step = 28901, loss = 307.289 (5.686 sec)
INFO:tensorflow:Saving checkpoints for 29001 into /home/nikita

INFO:tensorflow:step = 31201, loss = 1206.96 (5.825 sec)
INFO:tensorflow:global_step/sec: 17.361
INFO:tensorflow:step = 31301, loss = 240.99 (5.760 sec)
INFO:tensorflow:global_step/sec: 17.2682
INFO:tensorflow:step = 31401, loss = 269.999 (5.791 sec)
INFO:tensorflow:global_step/sec: 17.5375
INFO:tensorflow:step = 31501, loss = 271.895 (5.701 sec)
INFO:tensorflow:global_step/sec: 17.6159
INFO:tensorflow:step = 31601, loss = 182.196 (5.678 sec)
INFO:tensorflow:global_step/sec: 17.4447
INFO:tensorflow:step = 31701, loss = 229.35 (5.732 sec)
INFO:tensorflow:global_step/sec: 17.4533
INFO:tensorflow:step = 31801, loss = 229.165 (5.729 sec)
INFO:tensorflow:global_step/sec: 17.2922
INFO:tensorflow:step = 31901, loss = 394.512 (5.783 sec)
INFO:tensorflow:Saving checkpoints for 32001 into /home/nikitautiu/model_dir2/model.ckpt.
INFO:tensorflow:global_step/sec: 16.0002
INFO:tensorflow:Starting evaluation at 2017-11-08-18:01:43
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/

INFO:tensorflow:step = 34401, loss = 206.214 (5.605 sec)
INFO:tensorflow:global_step/sec: 17.7172
INFO:tensorflow:step = 34501, loss = 198.01 (5.644 sec)
INFO:tensorflow:global_step/sec: 17.4517
INFO:tensorflow:step = 34601, loss = 200.908 (5.730 sec)
INFO:tensorflow:global_step/sec: 17.4664
INFO:tensorflow:step = 34701, loss = 254.575 (5.729 sec)
INFO:tensorflow:global_step/sec: 17.524
INFO:tensorflow:step = 34801, loss = 385.128 (5.703 sec)
INFO:tensorflow:global_step/sec: 17.7658
INFO:tensorflow:step = 34901, loss = 233.555 (5.630 sec)
INFO:tensorflow:Saving checkpoints for 35001 into /home/nikitautiu/model_dir2/model.ckpt.
INFO:tensorflow:global_step/sec: 15.9482
INFO:tensorflow:Starting evaluation at 2017-11-08-18:08:31
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-35001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INF

INFO:tensorflow:step = 37601, loss = 240.757 (5.683 sec)
INFO:tensorflow:global_step/sec: 17.4221
INFO:tensorflow:step = 37701, loss = 232.363 (5.742 sec)
INFO:tensorflow:global_step/sec: 17.4595
INFO:tensorflow:step = 37801, loss = 202.836 (5.728 sec)
INFO:tensorflow:global_step/sec: 17.3534
INFO:tensorflow:step = 37901, loss = 173.108 (5.762 sec)
INFO:tensorflow:Saving checkpoints for 38001 into /home/nikitautiu/model_dir2/model.ckpt.
INFO:tensorflow:global_step/sec: 16.0525
INFO:tensorflow:Starting evaluation at 2017-11-08-18:15:19
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-38001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evalu

INFO:tensorflow:step = 40801, loss = 281.826 (5.743 sec)
INFO:tensorflow:global_step/sec: 17.355
INFO:tensorflow:step = 40901, loss = 372.998 (5.762 sec)
INFO:tensorflow:Saving checkpoints for 41001 into /home/nikitautiu/model_dir2/model.ckpt.
INFO:tensorflow:global_step/sec: 15.7847
INFO:tensorflow:Starting evaluation at 2017-11-08-18:22:09
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-41001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
IN

INFO:tensorflow:global_step/sec: 16.0078
INFO:tensorflow:Starting evaluation at 2017-11-08-18:28:57
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-44001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evalu

INFO:tensorflow:Starting evaluation at 2017-11-08-18:35:47
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-47001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-08-18:36:59
INFO:tensorf

INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-08-18:43:47
INFO:tensorflow:Saving dict for global step 50001: accuracy = 0.799093, accuracy_baseline = 0.5, auc = 0.92671, auc_precision_recall = 0.951592, average_loss = 0.894067, global_step = 50001, la

INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-08-18:50:34
INFO:tensorflow:Saving dict for global step 53001: accuracy = 0.919344, accuracy_baseline = 0.5, auc = 0.966375, auc_precision_recall = 0.968118, average_loss = 0.259305, global_step = 53001, label/mean = 0.5, loss = 508.639, prediction/mean = 0.516111
INFO:tensorflow:Validation (step 53001): label/mean = 0.5, auc = 0.966375, auc_precision_recall = 0.968118, loss = 

INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-08-18:57:47
INFO:tensorflow:Saving dict for global step 56001: accuracy = 0.933343, accuracy_baseline = 0.5, auc = 0.975804, auc_precision_recall = 0.980912, average_loss = 0.21485, global_step = 56001, label/mean = 0.5, loss = 421.439, prediction/mean = 0.47543
INFO:tensorflow:Validation (step 56001): label/mean = 0.5, auc = 0.975804, auc_precision_recall = 0.980912, loss = 421.439, accuracy_baseline = 0.5, accuracy = 0.933343, average_loss = 0.21485, prediction/mean = 0.47543, global_step = 56001
INFO:tensorflow:step = 56001, loss = 72.4582 (83.627 

INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-08-19:04:36
INFO:tensorflow:Saving dict for global step 59001: accuracy = 0.925323, accuracy_baseline = 0.5, auc = 0.979362, auc_precision_recall = 0.987126, average_loss = 0.276994, global_step = 59001, label/mean = 0.5, loss = 543.337, prediction/mean = 0.411798
INFO:tensorflow:Validation (step 59001): label/mean = 0.5, auc = 0.979362, auc_precision_recall = 0.987126, loss = 543.337, accuracy_baseline = 0.5, accuracy = 0.925323, average_loss = 0.276994, prediction/mean = 0.411798, global_step = 59001
INFO:tensorflow:step = 59001, loss = 57.0742 (83.060 sec)
INFO:tensorflow:global_step/sec: 1.21345
INFO:tensorflow:step = 59101, loss = 38.6449 (5.556 sec)
INFO:tensorflow:global_step/sec: 17.3972
INFO:tensorflow:step = 59201, lo

INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-08-19:11:20
INFO:tensorflow:Saving dict for global step 62001: accuracy = 0.913033, accuracy_baseline = 0.5, auc = 0.966281, auc_precision_recall = 0.971068, average_loss = 0.309572, global_step = 62001, label/mean = 0.5, loss = 607.239, prediction/mean = 0.431267
INFO:tensorflow:Validation (step 62001): label/mean = 0.5, auc = 0.966281, auc_precision_recall = 0.971068, loss = 607.239, accuracy_baseline = 0.5, accuracy = 0.913033, average_loss = 0.309572, prediction/mean = 0.431267, global_step = 62001
INFO:tensorflow:step = 62001, loss = 72.3838 (82.849 sec)
INFO:tensorflow:global_step/sec: 1.21478
INFO:tensorflow:step = 62101, loss = 108.909 (5.826 sec)
INFO:tensorflow:global_step/sec: 16.5536
INFO:tensorflow:step = 62201, loss = 56.27 (6.041 sec)
INFO:tensorflow:global_step/sec: 16.8648
INFO:tensorflow:step = 62301, loss = 63.6701 (5.969 sec)
INFO:tensorflow:global_step/sec: 16.322
INFO:tensorflow:ste

INFO:tensorflow:Validation (step 65001): label/mean = 0.5, auc = 0.979196, auc_precision_recall = 0.987486, loss = 483.225, accuracy_baseline = 0.5, accuracy = 0.939538, average_loss = 0.246349, prediction/mean = 0.423923, global_step = 65001
INFO:tensorflow:step = 65001, loss = 43.2218 (83.591 sec)
INFO:tensorflow:global_step/sec: 1.20697
INFO:tensorflow:step = 65101, loss = 56.3146 (6.326 sec)
INFO:tensorflow:global_step/sec: 14.895
INFO:tensorflow:step = 65201, loss = 158.693 (6.749 sec)
INFO:tensorflow:global_step/sec: 13.22
INFO:tensorflow:step = 65301, loss = 91.4306 (7.598 sec)
INFO:tensorflow:global_step/sec: 12.8884
INFO:tensorflow:step = 65401, loss = 59.7682 (7.740 sec)
INFO:tensorflow:global_step/sec: 13.1267
INFO:tensorflow:step = 65501, loss = 98.4756 (7.570 sec)
INFO:tensorflow:global_step/sec: 13.2704
INFO:tensorflow:step = 65601, loss = 93.082 (7.582 sec)
INFO:tensorflow:global_step/sec: 14.3379
INFO:tensorflow:step = 65701, loss = 42.7262 (6.927 sec)
INFO:tensorflow:g

INFO:tensorflow:step = 68001, loss = 46.4002 (83.537 sec)
INFO:tensorflow:global_step/sec: 1.19946
INFO:tensorflow:step = 68101, loss = 211.77 (6.268 sec)
INFO:tensorflow:global_step/sec: 13.4874
INFO:tensorflow:step = 68201, loss = 449.936 (7.386 sec)
INFO:tensorflow:global_step/sec: 14.2085
INFO:tensorflow:step = 68301, loss = 118.605 (7.002 sec)
INFO:tensorflow:global_step/sec: 15.9557
INFO:tensorflow:step = 68401, loss = 144.212 (6.306 sec)
INFO:tensorflow:global_step/sec: 13.7941
INFO:tensorflow:step = 68501, loss = 74.46 (7.213 sec)
INFO:tensorflow:global_step/sec: 13.2447
INFO:tensorflow:step = 68601, loss = 55.8435 (7.615 sec)
INFO:tensorflow:global_step/sec: 12.6861
INFO:tensorflow:step = 68701, loss = 35.1104 (7.835 sec)
INFO:tensorflow:global_step/sec: 13.2867
INFO:tensorflow:step = 68801, loss = 325.137 (7.528 sec)
INFO:tensorflow:global_step/sec: 14.5611
INFO:tensorflow:step = 68901, loss = 108.362 (6.891 sec)
INFO:tensorflow:Saving checkpoints for 69001 into /home/nikitau

INFO:tensorflow:step = 71201, loss = 65.2095 (6.947 sec)
INFO:tensorflow:global_step/sec: 12.8324
INFO:tensorflow:step = 71301, loss = 53.8744 (7.779 sec)
INFO:tensorflow:global_step/sec: 13.8594
INFO:tensorflow:step = 71401, loss = 41.2184 (7.247 sec)
INFO:tensorflow:global_step/sec: 13.541
INFO:tensorflow:step = 71501, loss = 408.231 (7.366 sec)
INFO:tensorflow:global_step/sec: 13.1666
INFO:tensorflow:step = 71601, loss = 52.5873 (7.602 sec)
INFO:tensorflow:global_step/sec: 12.7515
INFO:tensorflow:step = 71701, loss = 174.227 (7.851 sec)
INFO:tensorflow:global_step/sec: 12.9298
INFO:tensorflow:step = 71801, loss = 83.8781 (7.708 sec)
INFO:tensorflow:global_step/sec: 12.7528
INFO:tensorflow:step = 71901, loss = 45.5821 (7.816 sec)
INFO:tensorflow:Saving checkpoints for 72001 into /home/nikitautiu/model_dir2/model.ckpt.
INFO:tensorflow:global_step/sec: 14.0935
INFO:tensorflow:Starting evaluation at 2017-11-08-19:34:41
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir

INFO:tensorflow:step = 74401, loss = 95.285 (7.826 sec)
INFO:tensorflow:global_step/sec: 13.1788
INFO:tensorflow:step = 74501, loss = 1455.71 (7.581 sec)
INFO:tensorflow:global_step/sec: 14.07
INFO:tensorflow:step = 74601, loss = 64.181 (7.138 sec)
INFO:tensorflow:global_step/sec: 13.3124
INFO:tensorflow:step = 74701, loss = 24.5265 (7.519 sec)
INFO:tensorflow:global_step/sec: 13.3082
INFO:tensorflow:step = 74801, loss = 16.2345 (7.473 sec)
INFO:tensorflow:global_step/sec: 16.432
INFO:tensorflow:step = 74901, loss = 21.2919 (6.067 sec)
INFO:tensorflow:Saving checkpoints for 75001 into /home/nikitautiu/model_dir2/model.ckpt.
INFO:tensorflow:global_step/sec: 13.4399
INFO:tensorflow:Starting evaluation at 2017-11-08-19:41:59
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-75001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:t

INFO:tensorflow:step = 77601, loss = 39.3934 (6.335 sec)
INFO:tensorflow:global_step/sec: 15.63
INFO:tensorflow:step = 77701, loss = 24.8292 (6.422 sec)
INFO:tensorflow:global_step/sec: 15.31
INFO:tensorflow:step = 77801, loss = 14.8594 (6.509 sec)
INFO:tensorflow:global_step/sec: 15.4539
INFO:tensorflow:step = 77901, loss = 261.157 (6.479 sec)
INFO:tensorflow:Saving checkpoints for 78001 into /home/nikitautiu/model_dir2/model.ckpt.
INFO:tensorflow:global_step/sec: 14.6097
INFO:tensorflow:Starting evaluation at 2017-11-08-19:49:19
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-78001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluatio

INFO:tensorflow:step = 80801, loss = 14.8624 (6.773 sec)
INFO:tensorflow:global_step/sec: 15.5699
INFO:tensorflow:step = 80901, loss = 5319.72 (6.453 sec)
INFO:tensorflow:Saving checkpoints for 81001 into /home/nikitautiu/model_dir2/model.ckpt.
INFO:tensorflow:global_step/sec: 13.6144
INFO:tensorflow:Starting evaluation at 2017-11-08-19:56:23
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-81001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
I

INFO:tensorflow:global_step/sec: 15.0937
INFO:tensorflow:Starting evaluation at 2017-11-08-20:03:15
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-84001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evalu

INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-87001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-08-20:11:05
INFO:tensorflow:Saving dict for global step 87001: accuracy = 0.963322,

INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-08-20:17:55
INFO:tensorflow:Saving dict for global step 90001: accuracy = 0.960682, accuracy_baseline = 0.5, auc = 0.977952, auc_precision_recall = 0.984428, average_loss = 0.22947, global_step = 90001, label/mean = 0.5, loss = 450.116, prediction/mean = 0.455677
INFO:tensor

INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-08-20:24:29
INFO:tensorflow:Saving dict for global step 93001: accuracy = 0.96905, accuracy_baseline = 0.5, auc = 0.98208, auc_precision_recall = 0.989776, average_loss = 0.16572, global_step = 93001, label/mean = 0.5, loss = 325.068, prediction/mean = 0.469352
INFO:tensorflow:Validation (step 93001): label/mean = 0.5, auc = 0.98208, auc_precision_recall = 0.989776, loss = 325.068, accuracy_baseline = 0.5, accuracy = 0.96905, average_loss = 0.165

INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-08-20:31:02
INFO:tensorflow:Saving dict for global step 96001: accuracy = 0.935483, accuracy_baseline = 0.5, auc = 0.97958, auc_precision_recall = 0.977712, average_loss = 0.231526, global_step = 96001, label/mean = 0.5, loss = 454.15, prediction/mean = 0.44251
INFO:tensorflow:Validation (step 96001): label/mean = 0.5, auc = 0.97958, auc_precision_recall = 0.977712, loss = 454.15, accuracy_baseline = 0.5, accuracy = 0.935483, average_loss = 0.231526, prediction/mean = 0.44251, global_step = 96001
INFO:tensorflow:step = 96001, loss = 96.6044 (79.180 sec)
INFO:tensorflow:global_step/sec: 1.27329
INFO:tensorflow:step = 96101

INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-08-20:37:27
INFO:tensorflow:Saving dict for global step 99001: accuracy = 0.928278, accuracy_baseline = 0.5, auc = 0.976199, auc_precision_recall = 0.968285, average_loss = 0.221596, global_step = 99001, label/mean = 0.5, loss = 434.671, prediction/mean = 0.457251
INFO:tensorflow:Validation (step 99001): label/mean = 0.5, auc = 0.976199, auc_precision_recall = 0.968285, loss = 434.671, accuracy_baseline = 0.5, accuracy = 0.928278, average_loss = 0.221596, prediction/mean = 0.457251, global_step = 99001
INFO:tensorflow:step = 99001, loss = 119.264 (78.785 sec)
INFO:tensorflow:global_step/sec: 1.28028
INFO:tensorflow:step = 99101, loss = 102.138 (5.327 sec)
INFO:tensorflow:global_step/sec: 18.1917
INFO:tensorflow:step = 99201, loss = 86.8556 (5.498 sec)
INFO:tensorflow:global_step/sec: 17.7854
INFO:t

INFO:tensorflow:Finished evaluation at 2017-11-08-20:43:52
INFO:tensorflow:Saving dict for global step 102001: accuracy = 0.96049, accuracy_baseline = 0.5, auc = 0.981845, auc_precision_recall = 0.989039, average_loss = 0.203399, global_step = 102001, label/mean = 0.5, loss = 398.977, prediction/mean = 0.458231
INFO:tensorflow:Validation (step 102001): label/mean = 0.5, auc = 0.981845, auc_precision_recall = 0.989039, loss = 398.977, accuracy_baseline = 0.5, accuracy = 0.96049, average_loss = 0.203399, prediction/mean = 0.458231, global_step = 102001
INFO:tensorflow:step = 102001, loss = 15.3119 (78.880 sec)
INFO:tensorflow:global_step/sec: 1.27731
INFO:tensorflow:step = 102101, loss = 6.79893 (5.476 sec)
INFO:tensorflow:global_step/sec: 18.2439
INFO:tensorflow:step = 102201, loss = 13.1046 (5.481 sec)
INFO:tensorflow:global_step/sec: 18.1322
INFO:tensorflow:step = 102301, loss = 40.1182 (5.515 sec)
INFO:tensorflow:global_step/sec: 18.0609
INFO:tensorflow:step = 102401, loss = 7.67954 

INFO:tensorflow:Validation (step 105001): label/mean = 0.5, auc = 0.986416, auc_precision_recall = 0.992428, loss = 319.431, accuracy_baseline = 0.5, accuracy = 0.964255, average_loss = 0.162846, prediction/mean = 0.457305, global_step = 105001
INFO:tensorflow:step = 105001, loss = 8.97723 (78.900 sec)
INFO:tensorflow:global_step/sec: 1.27608
INFO:tensorflow:step = 105101, loss = 6.08582 (5.448 sec)
INFO:tensorflow:global_step/sec: 18.405
INFO:tensorflow:step = 105201, loss = 55.4282 (5.433 sec)
INFO:tensorflow:global_step/sec: 18.0863
INFO:tensorflow:step = 105301, loss = 12.2394 (5.528 sec)
INFO:tensorflow:global_step/sec: 18.2404
INFO:tensorflow:step = 105401, loss = 6.49392 (5.483 sec)
INFO:tensorflow:global_step/sec: 18.1078
INFO:tensorflow:step = 105501, loss = 3.98424 (5.523 sec)
INFO:tensorflow:global_step/sec: 18.1444
INFO:tensorflow:step = 105601, loss = 7.0966 (5.512 sec)
INFO:tensorflow:global_step/sec: 18.3097
INFO:tensorflow:step = 105701, loss = 5.05623 (5.462 sec)
INFO:

INFO:tensorflow:step = 108001, loss = 9.30013 (78.535 sec)
INFO:tensorflow:global_step/sec: 1.28337
INFO:tensorflow:step = 108101, loss = 9.06942 (5.392 sec)
INFO:tensorflow:global_step/sec: 18.2901
INFO:tensorflow:step = 108201, loss = 4.6658 (5.468 sec)
INFO:tensorflow:global_step/sec: 17.938
INFO:tensorflow:step = 108301, loss = 4.77944 (5.574 sec)
INFO:tensorflow:global_step/sec: 18.1837
INFO:tensorflow:step = 108401, loss = 31.2254 (5.501 sec)
INFO:tensorflow:global_step/sec: 18.0031
INFO:tensorflow:step = 108501, loss = 7.00913 (5.554 sec)
INFO:tensorflow:global_step/sec: 18.5741
INFO:tensorflow:step = 108601, loss = 48.721 (5.384 sec)
INFO:tensorflow:global_step/sec: 18.0194
INFO:tensorflow:step = 108701, loss = 5.65146 (5.548 sec)
INFO:tensorflow:global_step/sec: 17.8718
INFO:tensorflow:step = 108801, loss = 3.51023 (5.597 sec)
INFO:tensorflow:global_step/sec: 18.1989
INFO:tensorflow:step = 108901, loss = 17.8199 (5.493 sec)
INFO:tensorflow:Saving checkpoints for 109001 into /h

INFO:tensorflow:global_step/sec: 18.3704
INFO:tensorflow:step = 111201, loss = 7.46444 (5.442 sec)
INFO:tensorflow:global_step/sec: 18.1737
INFO:tensorflow:step = 111301, loss = 4.5057 (5.504 sec)
INFO:tensorflow:global_step/sec: 18.2404
INFO:tensorflow:step = 111401, loss = 11.6612 (5.481 sec)
INFO:tensorflow:global_step/sec: 18.3088
INFO:tensorflow:step = 111501, loss = 100.699 (5.462 sec)
INFO:tensorflow:global_step/sec: 18.0106
INFO:tensorflow:step = 111601, loss = 84.0018 (5.553 sec)
INFO:tensorflow:global_step/sec: 18.2876
INFO:tensorflow:step = 111701, loss = 24.5859 (5.467 sec)
INFO:tensorflow:global_step/sec: 17.9954
INFO:tensorflow:step = 111801, loss = 36.1301 (5.556 sec)
INFO:tensorflow:global_step/sec: 18.2173
INFO:tensorflow:step = 111901, loss = 16.3618 (5.490 sec)
INFO:tensorflow:Saving checkpoints for 112001 into /home/nikitautiu/model_dir2/model.ckpt.
INFO:tensorflow:global_step/sec: 16.5558
INFO:tensorflow:Starting evaluation at 2017-11-08-21:04:08
INFO:tensorflow:Re

INFO:tensorflow:step = 114301, loss = 8.7983 (5.457 sec)
INFO:tensorflow:global_step/sec: 18.3509
INFO:tensorflow:step = 114401, loss = 4.29294 (5.450 sec)
INFO:tensorflow:global_step/sec: 18.3498
INFO:tensorflow:step = 114501, loss = 49.6281 (5.449 sec)
INFO:tensorflow:global_step/sec: 17.7435
INFO:tensorflow:step = 114601, loss = 28.3724 (5.638 sec)
INFO:tensorflow:global_step/sec: 18.1022
INFO:tensorflow:step = 114701, loss = 8.93457 (5.524 sec)
INFO:tensorflow:global_step/sec: 18.2409
INFO:tensorflow:step = 114801, loss = 170.465 (5.482 sec)
INFO:tensorflow:global_step/sec: 18.0644
INFO:tensorflow:step = 114901, loss = 12.3681 (5.536 sec)
INFO:tensorflow:Saving checkpoints for 115001 into /home/nikitautiu/model_dir2/model.ckpt.
INFO:tensorflow:global_step/sec: 16.7535
INFO:tensorflow:Starting evaluation at 2017-11-08-21:10:33
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-115001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]


({'accuracy': 0.96744716,
  'accuracy_baseline': 0.50000012,
  'auc': 0.98571068,
  'auc_precision_recall': 0.99209225,
  'average_loss': 0.16093655,
  'global_step': 117000,
  'label/mean': 0.49999985,
  'loss': 315.68445,
  'prediction/mean': 0.45484051},
 [])

In [11]:
def get_pred_expected(estimator, input_fn):
    """Given an input function and an estimator, return the expected and predicted labels"""
    # get prdeiction
    predicted_list = [pred['class_ids'] for pred in estimator.predict(input_fn)]
    pred_array = np.stack(predicted_list, axis=0).ravel()  # concatenate them to one

    # consume label tensors
    # not necessarily the most elegant solution but works
    label_tens = input_fn()[1]
    expected_list = []
    with tf.Session() as sess:

        # get each element of the training dataset until the end is reached
        while True:
            try:
                elem = sess.run(label_tens)
                expected_list.append(elem)
            except tf.errors.OutOfRangeError:
                break
            
    expected_array = np.concatenate(expected_list).ravel()
    
    return expected_array, pred_array

In [76]:
# much needed sanity check. the data is small enough not to bother with pipelines
expected_array, pred_array = get_pred_expected(estimator, test_fn)
print(classification_report(expected_array, pred_array, digits=6))

INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-117001
             precision    recall  f1-score   support

        0.0   0.998660  0.998960  0.998810     43284
        1.0   0.994024  0.992311  0.993167      7543

avg / total   0.997972  0.997974  0.997973     50827



Attempt another training run with validation testing. The point of this is not t reach better performance, bt to actually see on the validation curves in tensorflow if it overfits.

In [None]:
experiment.train_and_evaluate()

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-117001
INFO:tensorflow:Saving checkpoints for 117002 into /home/nikitautiu/model_dir2/model.ckpt.
INFO:tensorflow:Starting evaluation at 2017-11-08-22:49:58
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-117002
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation 

INFO:tensorflow:global_step/sec: 16.1157
INFO:tensorflow:Starting evaluation at 2017-11-08-22:59:48
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-120002
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished eval

INFO:tensorflow:Starting evaluation at 2017-11-08-23:06:48
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-123002
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-08-23:07:56
INFO:tensor

INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-08-23:15:57
INFO:tensorflow:Saving dict for global step 126002: accuracy = 0.969647, accuracy_baseline = 0.5, auc = 0.987805, auc_precision_recall = 0.993438, average_los

In [80]:
# metrics on test
estimator.evaluate(input_fn=test_fn)

INFO:tensorflow:Starting evaluation at 2017-11-09-06:49:41
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-234001
INFO:tensorflow:Finished evaluation at 2017-11-09-06:50:54
INFO:tensorflow:Saving dict for global step 234001: accuracy = 0.998374, accuracy_baseline = 0.5, auc = 0.999467, auc_precision_recall = 0.999732, average_loss = 0.00922854, global_step = 234001, label/mean = 0.5, loss = 18.0407, prediction/mean = 0.498044


{'accuracy': 0.9983744,
 'accuracy_baseline': 0.5,
 'auc': 0.99946719,
 'auc_precision_recall': 0.99973232,
 'average_loss': 0.0092285443,
 'global_step': 234001,
 'label/mean': 0.49999997,
 'loss': 18.040741,
 'prediction/mean': 0.49804422}

In [8]:
# another try
expected_array, pred_array = get_pred_expected(estimator, test_fn)
print(classification_report(expected_array, pred_array, digits=6))

INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-234001
             precision    recall  f1-score   support

        0.0   0.851364  0.998175  0.918942     43284
        1.0   0.000000  0.000000  0.000000      7543

avg / total   0.725017  0.850040  0.782566     50827



In [12]:
# another try
expected_array, pred_array = get_pred_expected(estimator, validation_fn)
print(classification_report(expected_array, pred_array, digits=6))

INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir2/model.ckpt-234001
             precision    recall  f1-score   support

        0.0   0.825573  0.998036  0.903650     35639
        1.0   0.000000  0.000000  0.000000      7515

avg / total   0.681805  0.824234  0.746285     43154

