# Tensorflow model
Continuation of the last prototypes. We will try to input the data using the DAtaset api As a start we will be using the cleaneval dataset because it's small and we'll get lower training times.

In [1]:
%matplotlib inline
# standard library
import itertools
import sys, os
import re
import glob

from collections import OrderedDict
from urllib.parse import urlparse

# pandas
import pandas as pd
import dask.dataframe as dd
import dask.array as da
import dask


# numpy, matplotlib, seaborn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


# tesnsorflow
import tensorflow as tf

# scikit
from sklearn.metrics import precision_recall_fscore_support, classification_report

from tensorflow.contrib.hooks import ProfilerHook
# local imports
sys.path.append(os.path.join(os.getcwd(), "../src"))

from tf_utils import make_csv_pipeline, make_csv_col_tensors, csv_dataset, csv_to_tf_types, tfrecord_dataset

# this styling is purely my preference
# less chartjunk
sns.set_context('notebook', font_scale=1.5, rc={'line.linewidth': 2.5})
sns.set(style='ticks', palette='Set2')

## CSV dataset

In [2]:
def build_fn(csv_pattern):
    # the entire graph has to be built here
    ddf = dd.read_csv(csv_pattern)

    # add the weight column based on proportions
    label_proportion = ddf['content_label'].mean().compute()
    def add_weights(tens_dict, label_tens):
        # defien the constants
        positive_label_val = tf.constant(1.0)
        positive_proportion = tf.constant(0.5 / label_proportion, shape=())
        negative_proportion = tf.constant(0.5 / (1-label_proportion), shape=())
        
        # the weights are added as a conditional based on the corresponding label
        weight_tens = tf.where(tf.equal(label_tens, positive_label_val), 
                               positive_proportion, 
                               negative_proportion)
        tens_dict['weights'] = weight_tens
        return tens_dict, label_tens
        
    def drop_strings(tens_dict, label):
        # drops the string columns
        return {k: v for k,v in tens_dict.items() if k not in ['url', 'path']}, label
    
    # must drop the nonnumeric cols as dask does not support numeric_only
#     means = ddf.drop(['url', 'path', 'content_label'], axis=1).mean().compute() 
#     scale = ddf.drop(['url', 'path', 'content_label'], axis=1).std().compute()  # the scale
    
    def normalize_features(tens_dict, label):
        # normalize values for faster convergence
        return {k: (tf.to_float(v) -  tf.constant(means[k], dtype=tf.float32)) / tf.constant(scale[k], dtype=tf.float32)
                for k, v in tens_dict.items()}, label
        
    def concat_features(tens_dict, label):
        # concatenate everything but the weights into one big tensor
        weights = tens_dict.pop('weights')
        result = {'weights': weights, 'X': tf.stack([tf.to_float(tens) for tens in tens_dict.values()])}, label
        return result

    def drop_weights(tens_dict, label):
        # just for debug, to see fi weights are not somehow considered by evaluation
        tens_dict.pop('weights')
        return tens_dict, label
    
    # the function to return
    def input_fn(keep_weights=True, num_epochs=1000, batch_size=100, shuffle=True, num_parallel_calls=16):
        # build the input pipeline 
        dataset = csv_dataset(csv_pattern, 'content_label', num_parallel_calls=num_parallel_calls)  # decode the csv
        dataset = dataset.map(drop_strings, num_parallel_calls=num_parallel_calls)  # drop redundants
#         dataset = dataset.map(normalize_features, num_parallel_calls=num_parallel_calls)  # mean, std normalization
        dataset = dataset.map(add_weights, num_parallel_calls=num_parallel_calls)  # add weight col
      
        if not keep_weights:
            # dropping the weights if neccesary
            dataset.map(drop_weights, num_parallel_calls=num_parallel_calls)
        
        dataset = dataset.map(concat_features, num_parallel_calls=num_parallel_calls).cache()  # concatenate the features
        
        # shuffle the input
        if shuffle:
            dataset = dataset.shuffle(buffer_size=100000)  
        
        # batch, repeate, iterate
        dataset = dataset.batch(batch_size)
        dataset = dataset.repeat(num_epochs)
        
        # return the iterator, must be returned from here
        # so that the graph is built upon call
        iterator = dataset.make_one_shot_iterator()
        features, labels = iterator.get_next()
        return features, labels
    
    # return the function
    return input_fn

In [3]:
# call the higher order function to return the input functions
train_set_fn = build_fn('../data/final/cleaneval/dom-full-train-*.csv')
validation_set_fn = build_fn('../data/final/cleaneval/dom-full-validation-*.csv')
test_set_fn = build_fn('../data/final/cleaneval/dom-full-test-*.csv')

# partial calls 
feat_col_fn = lambda: train_set_fn(num_epochs=1, batch_size=1, shuffle=False)
train_fn = lambda: train_set_fn(batch_size=2000, num_epochs=1000)
validation_fn = lambda: validation_set_fn(batch_size=2000, num_epochs=1, shuffle=False, keep_weights=False)
test_fn =  lambda: test_set_fn(batch_size=2000, num_epochs=1, shuffle=False, keep_weights=False)

In [6]:
# session workaround
sess_config = tf.ConfigProto()
sess_config.gpu_options.allow_growth = 1

config = tf.estimator.RunConfig(
    model_dir='/home/nikitautiu/model_dir3',
    save_checkpoints_steps=5000, 
    session_config=sess_config, 
)


# we can infer the columns from the function, a bit costly, but better than hardcoding
tf_feat_cols = tf.contrib.learn.infer_real_valued_columns_from_input_fn(feat_col_fn)
estimator = tf.estimator.DNNClassifier(feature_columns=tf_feat_cols, hidden_units=(1000, 500, 200), 
                                        weight_column='weights', model_dir='/home/nikitautiu/model_dir3', config=config)


INFO:tensorflow:Using config: {'_service': None, '_master': '', '_task_type': 'worker', '_task_id': 0, '_num_worker_replicas': 1, '_save_checkpoints_secs': None, '_session_config': gpu_options {
  allow_growth: true
}
, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': '/home/nikitautiu/model_dir3', '_save_checkpoints_steps': 5000, '_is_chief': True, '_tf_random_seed': None, '_num_ps_replicas': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1e48423940>, '_keep_checkpoint_max': 5, '_log_step_count_steps': 100, '_save_summary_steps': 100}


## Try the Experiment interface

In [7]:
# intialize the experiment, save evry snapshot
experiment = tf.contrib.learn.Experiment(
    estimator=estimator, train_input_fn=train_fn, eval_input_fn=validation_fn, 
    min_eval_frequency=10000  # every 10 checkpoints or so
)

In [None]:
experiment.train_and_evaluate()

Instructions for updating:
Monitors are deprecated. Please use tf.train.SessionRunHook.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /home/nikitautiu/model_dir3/model.ckpt.
INFO:tensorflow:step = 1, loss = 603258.0
INFO:tensorflow:global_step/sec: 0.473732
INFO:tensorflow:step = 101, loss = 19823.2 (211.092 sec)
INFO:tensorflow:global_step/sec: 17.6636
INFO:tensorflow:step = 201, loss = 5174.29 (5.662 sec)
INFO:tensorflow:global_step/sec: 17.9169
INFO:tensorflow:step = 301, loss = 6435.26 (5.581 sec)
INFO:tensorflow:global_step/sec: 17.8965
INFO:tensorflow:step = 401, loss = 3547.22 (5.588 sec)
INFO:tensorflow:global_step/sec: 17.8152
INFO:tensorflow:step = 501, loss = 2789.97 (5.613 sec)
INFO:tensorflow:global_step/sec: 17.6928
INFO:tensorflow:step = 601, loss = 3122.56 (5.651 sec)
INFO:tensorflow:global_step/sec: 17.7996
INFO:tensorflow:step = 701, loss = 2322.93 (5.619 sec)
INFO:tensorflow:global_step/sec: 17.8116
INFO:tensorflow:step = 8

INFO:tensorflow:global_step/sec: 18.0548
INFO:tensorflow:step = 8001, loss = 930.719 (5.539 sec)
INFO:tensorflow:global_step/sec: 17.8479
INFO:tensorflow:step = 8101, loss = 933.447 (5.603 sec)
INFO:tensorflow:global_step/sec: 17.8437
INFO:tensorflow:step = 8201, loss = 849.815 (5.604 sec)
INFO:tensorflow:global_step/sec: 17.8887
INFO:tensorflow:step = 8301, loss = 855.788 (5.590 sec)
INFO:tensorflow:global_step/sec: 17.7363
INFO:tensorflow:step = 8401, loss = 792.326 (5.638 sec)
INFO:tensorflow:global_step/sec: 17.9039
INFO:tensorflow:step = 8501, loss = 1428.27 (5.585 sec)
INFO:tensorflow:global_step/sec: 17.8112
INFO:tensorflow:step = 8601, loss = 780.318 (5.614 sec)
INFO:tensorflow:global_step/sec: 17.8535
INFO:tensorflow:step = 8701, loss = 886.283 (5.602 sec)
INFO:tensorflow:global_step/sec: 17.8446
INFO:tensorflow:step = 8801, loss = 864.692 (5.603 sec)
INFO:tensorflow:global_step/sec: 17.7795
INFO:tensorflow:step = 8901, loss = 986.107 (5.625 sec)
INFO:tensorflow:global_step/se

INFO:tensorflow:global_step/sec: 17.814
INFO:tensorflow:step = 14701, loss = 691.965 (5.613 sec)
INFO:tensorflow:global_step/sec: 17.7952
INFO:tensorflow:step = 14801, loss = 550.719 (5.620 sec)
INFO:tensorflow:global_step/sec: 17.7501
INFO:tensorflow:step = 14901, loss = 695.006 (5.634 sec)
INFO:tensorflow:Saving checkpoints for 15001 into /home/nikitautiu/model_dir3/model.ckpt.
INFO:tensorflow:global_step/sec: 16.9744
INFO:tensorflow:step = 15001, loss = 657.328 (5.901 sec)
INFO:tensorflow:global_step/sec: 17.7381
INFO:tensorflow:step = 15101, loss = 649.4 (5.628 sec)
INFO:tensorflow:global_step/sec: 17.8559
INFO:tensorflow:step = 15201, loss = 590.409 (5.600 sec)
INFO:tensorflow:global_step/sec: 17.8347
INFO:tensorflow:step = 15301, loss = 640.748 (5.607 sec)
INFO:tensorflow:global_step/sec: 17.7046
INFO:tensorflow:step = 15401, loss = 794.117 (5.648 sec)
INFO:tensorflow:global_step/sec: 17.8032
INFO:tensorflow:step = 15501, loss = 765.469 (5.617 sec)
INFO:tensorflow:global_step/sec

INFO:tensorflow:global_step/sec: 17.8497
INFO:tensorflow:step = 21301, loss = 672.068 (5.603 sec)
INFO:tensorflow:global_step/sec: 17.7189
INFO:tensorflow:step = 21401, loss = 447.377 (5.643 sec)
INFO:tensorflow:global_step/sec: 17.8115
INFO:tensorflow:step = 21501, loss = 700.058 (5.614 sec)
INFO:tensorflow:global_step/sec: 17.6684
INFO:tensorflow:step = 21601, loss = 460.678 (5.660 sec)
INFO:tensorflow:global_step/sec: 17.8266
INFO:tensorflow:step = 21701, loss = 876.022 (5.610 sec)
INFO:tensorflow:global_step/sec: 17.8259
INFO:tensorflow:step = 21801, loss = 525.742 (5.610 sec)
INFO:tensorflow:global_step/sec: 17.801
INFO:tensorflow:step = 21901, loss = 461.066 (5.619 sec)
INFO:tensorflow:global_step/sec: 17.8464
INFO:tensorflow:step = 22001, loss = 437.115 (5.602 sec)
INFO:tensorflow:global_step/sec: 17.8555
INFO:tensorflow:step = 22101, loss = 486.693 (5.601 sec)
INFO:tensorflow:global_step/sec: 17.8482
INFO:tensorflow:step = 22201, loss = 548.238 (5.603 sec)
INFO:tensorflow:globa

INFO:tensorflow:global_step/sec: 18.2493
INFO:tensorflow:step = 29601, loss = 269.207 (5.479 sec)
INFO:tensorflow:global_step/sec: 17.4917
INFO:tensorflow:step = 29701, loss = 312.326 (5.718 sec)
INFO:tensorflow:global_step/sec: 17.8496
INFO:tensorflow:step = 29801, loss = 367.257 (5.602 sec)
INFO:tensorflow:global_step/sec: 17.7952
INFO:tensorflow:step = 29901, loss = 393.665 (5.620 sec)
INFO:tensorflow:Starting evaluation at 2017-11-09-08:20:36
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir3/model.ckpt-25001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INF

INFO:tensorflow:global_step/sec: 17.8196
INFO:tensorflow:step = 36201, loss = 295.49 (5.612 sec)
INFO:tensorflow:global_step/sec: 17.6983
INFO:tensorflow:step = 36301, loss = 227.032 (5.650 sec)
INFO:tensorflow:global_step/sec: 17.8636
INFO:tensorflow:step = 36401, loss = 622.823 (5.598 sec)
INFO:tensorflow:global_step/sec: 17.9711
INFO:tensorflow:step = 36501, loss = 882.051 (5.564 sec)
INFO:tensorflow:global_step/sec: 17.8179
INFO:tensorflow:step = 36601, loss = 409.795 (5.613 sec)
INFO:tensorflow:global_step/sec: 17.6589
INFO:tensorflow:step = 36701, loss = 348.698 (5.663 sec)
INFO:tensorflow:global_step/sec: 17.8477
INFO:tensorflow:step = 36801, loss = 446.839 (5.603 sec)
INFO:tensorflow:global_step/sec: 17.9818
INFO:tensorflow:step = 36901, loss = 357.598 (5.562 sec)
INFO:tensorflow:global_step/sec: 17.9523
INFO:tensorflow:step = 37001, loss = 529.494 (5.570 sec)
INFO:tensorflow:global_step/sec: 17.8874
INFO:tensorflow:step = 37101, loss = 181.552 (5.591 sec)
INFO:tensorflow:globa

INFO:tensorflow:step = 42801, loss = 195.895 (5.620 sec)
INFO:tensorflow:global_step/sec: 17.7563
INFO:tensorflow:step = 42901, loss = 179.636 (5.632 sec)
INFO:tensorflow:global_step/sec: 17.7137
INFO:tensorflow:step = 43001, loss = 330.133 (5.645 sec)
INFO:tensorflow:global_step/sec: 17.9267
INFO:tensorflow:step = 43101, loss = 126.514 (5.579 sec)
INFO:tensorflow:global_step/sec: 17.8665
INFO:tensorflow:step = 43201, loss = 449.556 (5.597 sec)
INFO:tensorflow:global_step/sec: 17.7518
INFO:tensorflow:step = 43301, loss = 307.262 (5.633 sec)
INFO:tensorflow:global_step/sec: 17.8273
INFO:tensorflow:step = 43401, loss = 375.663 (5.609 sec)
INFO:tensorflow:global_step/sec: 17.7386
INFO:tensorflow:step = 43501, loss = 163.837 (5.638 sec)
INFO:tensorflow:global_step/sec: 17.7258
INFO:tensorflow:step = 43601, loss = 1007.57 (5.641 sec)
INFO:tensorflow:global_step/sec: 17.7222
INFO:tensorflow:step = 43701, loss = 143.257 (5.643 sec)
INFO:tensorflow:global_step/sec: 17.8156
INFO:tensorflow:step

INFO:tensorflow:Saving dict for global step 45001: accuracy = 0.859537, accuracy_baseline = 0.5, auc = 0.958858, auc_precision_recall = 0.970022, average_loss = 0.684799, global_step = 45001, label/mean = 0.5, loss = 1343.26, prediction/mean = 0.347898
INFO:tensorflow:Validation (step 50000): global_step = 45001, accuracy_baseline = 0.5, accuracy = 0.859537, auc_precision_recall = 0.970022, auc = 0.958858, label/mean = 0.5, loss = 1343.26, average_loss = 0.684799, prediction/mean = 0.347898
INFO:tensorflow:Saving checkpoints for 50001 into /home/nikitautiu/model_dir3/model.ckpt.
INFO:tensorflow:global_step/sec: 1.19395
INFO:tensorflow:step = 50001, loss = 436.765 (83.765 sec)
INFO:tensorflow:global_step/sec: 17.8271
INFO:tensorflow:step = 50101, loss = 379.521 (5.601 sec)
INFO:tensorflow:global_step/sec: 17.3652
INFO:tensorflow:step = 50201, loss = 319.523 (5.758 sec)
INFO:tensorflow:global_step/sec: 17.8053
INFO:tensorflow:step = 50301, loss = 243.049 (5.617 sec)
INFO:tensorflow:globa

INFO:tensorflow:global_step/sec: 17.7734
INFO:tensorflow:step = 57701, loss = 707.001 (5.626 sec)
INFO:tensorflow:global_step/sec: 17.7172
INFO:tensorflow:step = 57801, loss = 111.064 (5.644 sec)
INFO:tensorflow:global_step/sec: 18.0076
INFO:tensorflow:step = 57901, loss = 159.397 (5.555 sec)
INFO:tensorflow:global_step/sec: 17.8487
INFO:tensorflow:step = 58001, loss = 785.066 (5.601 sec)
INFO:tensorflow:global_step/sec: 17.7483
INFO:tensorflow:step = 58101, loss = 125.183 (5.635 sec)
INFO:tensorflow:global_step/sec: 17.9731
INFO:tensorflow:step = 58201, loss = 67.5376 (5.564 sec)
INFO:tensorflow:global_step/sec: 17.8039
INFO:tensorflow:step = 58301, loss = 92.3336 (5.617 sec)
INFO:tensorflow:global_step/sec: 18.0435
INFO:tensorflow:step = 58401, loss = 78.7635 (5.542 sec)
INFO:tensorflow:global_step/sec: 17.937
INFO:tensorflow:step = 58501, loss = 118.642 (5.576 sec)
INFO:tensorflow:global_step/sec: 17.9258
INFO:tensorflow:step = 58601, loss = 76.922 (5.579 sec)
INFO:tensorflow:global

INFO:tensorflow:step = 64301, loss = 95.2489 (5.599 sec)
INFO:tensorflow:global_step/sec: 17.8223
INFO:tensorflow:step = 64401, loss = 377.601 (5.611 sec)
INFO:tensorflow:global_step/sec: 17.9657
INFO:tensorflow:step = 64501, loss = 79.9943 (5.566 sec)
INFO:tensorflow:global_step/sec: 17.7854
INFO:tensorflow:step = 64601, loss = 100.216 (5.623 sec)
INFO:tensorflow:global_step/sec: 18.0197
INFO:tensorflow:step = 64701, loss = 57.1476 (5.550 sec)
INFO:tensorflow:global_step/sec: 17.708
INFO:tensorflow:step = 64801, loss = 101.295 (5.646 sec)
INFO:tensorflow:global_step/sec: 17.649
INFO:tensorflow:step = 64901, loss = 89.4546 (5.667 sec)
INFO:tensorflow:Saving checkpoints for 65001 into /home/nikitautiu/model_dir3/model.ckpt.
INFO:tensorflow:global_step/sec: 16.8316
INFO:tensorflow:step = 65001, loss = 87.1734 (5.952 sec)
INFO:tensorflow:global_step/sec: 17.9516
INFO:tensorflow:step = 65101, loss = 76.3898 (5.559 sec)
INFO:tensorflow:global_step/sec: 17.5955
INFO:tensorflow:step = 65201, 

INFO:tensorflow:global_step/sec: 17.9396
INFO:tensorflow:step = 70901, loss = 358.753 (5.574 sec)
INFO:tensorflow:global_step/sec: 17.6999
INFO:tensorflow:step = 71001, loss = 171.605 (5.650 sec)
INFO:tensorflow:global_step/sec: 17.858
INFO:tensorflow:step = 71101, loss = 811.553 (5.599 sec)
INFO:tensorflow:global_step/sec: 17.8587
INFO:tensorflow:step = 71201, loss = 98.6455 (5.600 sec)
INFO:tensorflow:global_step/sec: 17.8424
INFO:tensorflow:step = 71301, loss = 59.8841 (5.605 sec)
INFO:tensorflow:global_step/sec: 18.0287
INFO:tensorflow:step = 71401, loss = 35.9381 (5.547 sec)
INFO:tensorflow:global_step/sec: 18.0074
INFO:tensorflow:step = 71501, loss = 407.85 (5.553 sec)
INFO:tensorflow:global_step/sec: 18.127
INFO:tensorflow:step = 71601, loss = 45.4034 (5.517 sec)
INFO:tensorflow:global_step/sec: 17.7283
INFO:tensorflow:step = 71701, loss = 75.6395 (5.640 sec)
INFO:tensorflow:global_step/sec: 17.7972
INFO:tensorflow:step = 71801, loss = 79.6241 (5.619 sec)
INFO:tensorflow:global_

INFO:tensorflow:global_step/sec: 17.8192
INFO:tensorflow:step = 79201, loss = 58.091 (5.612 sec)
INFO:tensorflow:global_step/sec: 17.7173
INFO:tensorflow:step = 79301, loss = 30.9262 (5.644 sec)
INFO:tensorflow:global_step/sec: 17.8121
INFO:tensorflow:step = 79401, loss = 59.491 (5.614 sec)
INFO:tensorflow:global_step/sec: 17.9782
INFO:tensorflow:step = 79501, loss = 156.016 (5.562 sec)
INFO:tensorflow:global_step/sec: 18.0145
INFO:tensorflow:step = 79601, loss = 163.917 (5.551 sec)
INFO:tensorflow:global_step/sec: 17.7782
INFO:tensorflow:step = 79701, loss = 35.5036 (5.624 sec)
INFO:tensorflow:global_step/sec: 17.8295
INFO:tensorflow:step = 79801, loss = 59.1622 (5.609 sec)
INFO:tensorflow:global_step/sec: 18.0133
INFO:tensorflow:step = 79901, loss = 28.2343 (5.552 sec)
INFO:tensorflow:Starting evaluation at 2017-11-09-09:13:55
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir3/model.ckpt-75001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
IN

INFO:tensorflow:global_step/sec: 18.0535
INFO:tensorflow:step = 85801, loss = 55.2712 (5.538 sec)
INFO:tensorflow:global_step/sec: 17.848
INFO:tensorflow:step = 85901, loss = 71.2397 (5.603 sec)
INFO:tensorflow:global_step/sec: 17.8745
INFO:tensorflow:step = 86001, loss = 34.4933 (5.594 sec)
INFO:tensorflow:global_step/sec: 17.9463
INFO:tensorflow:step = 86101, loss = 33.2464 (5.573 sec)
INFO:tensorflow:global_step/sec: 17.7557
INFO:tensorflow:step = 86201, loss = 59.4743 (5.632 sec)
INFO:tensorflow:global_step/sec: 17.8357
INFO:tensorflow:step = 86301, loss = 830.494 (5.606 sec)
INFO:tensorflow:global_step/sec: 17.8404
INFO:tensorflow:step = 86401, loss = 22.3625 (5.606 sec)
INFO:tensorflow:global_step/sec: 17.8191
INFO:tensorflow:step = 86501, loss = 25.0477 (5.612 sec)
INFO:tensorflow:global_step/sec: 17.785
INFO:tensorflow:step = 86601, loss = 43.2301 (5.623 sec)
INFO:tensorflow:global_step/sec: 17.8682
INFO:tensorflow:step = 86701, loss = 26.9661 (5.596 sec)
INFO:tensorflow:global

INFO:tensorflow:step = 92401, loss = 48.0892 (5.667 sec)
INFO:tensorflow:global_step/sec: 17.7515
INFO:tensorflow:step = 92501, loss = 27.7693 (5.633 sec)
INFO:tensorflow:global_step/sec: 17.8395
INFO:tensorflow:step = 92601, loss = 49.1826 (5.605 sec)
INFO:tensorflow:global_step/sec: 17.9795
INFO:tensorflow:step = 92701, loss = 131.611 (5.562 sec)
INFO:tensorflow:global_step/sec: 17.9488
INFO:tensorflow:step = 92801, loss = 29.7846 (5.572 sec)
INFO:tensorflow:global_step/sec: 17.7829
INFO:tensorflow:step = 92901, loss = 35.9062 (5.624 sec)
INFO:tensorflow:global_step/sec: 17.8366
INFO:tensorflow:step = 93001, loss = 16.6755 (5.606 sec)
INFO:tensorflow:global_step/sec: 17.919
INFO:tensorflow:step = 93101, loss = 29.9561 (5.581 sec)
INFO:tensorflow:global_step/sec: 17.9421
INFO:tensorflow:step = 93201, loss = 34.6646 (5.574 sec)
INFO:tensorflow:global_step/sec: 17.9795
INFO:tensorflow:step = 93301, loss = 3447.47 (5.561 sec)
INFO:tensorflow:global_step/sec: 17.8409
INFO:tensorflow:step 

INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Finished evaluation at 2017-11-09-09:36:25
INFO:tensorflow:Saving dict for global step 95001: accuracy = 0.963375, accuracy_baseline = 0.5, auc = 0.984401, auc_precision_recall = 0.990744, average_loss = 0.174039, global_step = 95001, label/mean = 0.5, loss = 341.386, prediction/mean = 0.459886
INFO:tensorflow:Validation (step 100000): global_step = 95001, accuracy_baseline = 0.5, accuracy = 0.963375, auc_precision_recall = 0.990744, auc = 0.984401, label/mean = 0.5, loss = 341.386, average_loss = 0.174039, prediction/mean = 0.459886
INFO:tensorflow:Saving checkpoints for 100001 into /home/nikitautiu/model_dir3/model.ckpt.
INFO:tensorflow:global_step/s

INFO:tensorflow:step = 107201, loss = 13.918 (5.568 sec)
INFO:tensorflow:global_step/sec: 17.9316
INFO:tensorflow:step = 107301, loss = 60.9888 (5.577 sec)
INFO:tensorflow:global_step/sec: 17.9493
INFO:tensorflow:step = 107401, loss = 11.1918 (5.570 sec)
INFO:tensorflow:global_step/sec: 17.7881
INFO:tensorflow:step = 107501, loss = 263.622 (5.622 sec)
INFO:tensorflow:global_step/sec: 17.7839
INFO:tensorflow:step = 107601, loss = 27.2508 (5.623 sec)
INFO:tensorflow:global_step/sec: 17.7384
INFO:tensorflow:step = 107701, loss = 77.5605 (5.638 sec)
INFO:tensorflow:global_step/sec: 17.9182
INFO:tensorflow:step = 107801, loss = 130.711 (5.581 sec)
INFO:tensorflow:global_step/sec: 17.7607
INFO:tensorflow:step = 107901, loss = 36.0495 (5.630 sec)
INFO:tensorflow:global_step/sec: 17.8284
INFO:tensorflow:step = 108001, loss = 11.0305 (5.610 sec)
INFO:tensorflow:global_step/sec: 17.8844
INFO:tensorflow:step = 108101, loss = 167.042 (5.591 sec)
INFO:tensorflow:global_step/sec: 17.5891
INFO:tensor

INFO:tensorflow:step = 113801, loss = 7.41772 (5.611 sec)
INFO:tensorflow:global_step/sec: 17.8259
INFO:tensorflow:step = 113901, loss = 187.495 (5.610 sec)
INFO:tensorflow:global_step/sec: 17.8806
INFO:tensorflow:step = 114001, loss = 12.6881 (5.593 sec)
INFO:tensorflow:global_step/sec: 17.7048
INFO:tensorflow:step = 114101, loss = 7.1016 (5.648 sec)
INFO:tensorflow:global_step/sec: 17.7051
INFO:tensorflow:step = 114201, loss = 18.3262 (5.648 sec)
INFO:tensorflow:global_step/sec: 17.7657
INFO:tensorflow:step = 114301, loss = 47.5652 (5.629 sec)
INFO:tensorflow:global_step/sec: 17.7391
INFO:tensorflow:step = 114401, loss = 6.0401 (5.637 sec)
INFO:tensorflow:global_step/sec: 17.8432
INFO:tensorflow:step = 114501, loss = 22.1475 (5.604 sec)
INFO:tensorflow:global_step/sec: 17.9994
INFO:tensorflow:step = 114601, loss = 73.8106 (5.556 sec)
INFO:tensorflow:global_step/sec: 17.8378
INFO:tensorflow:step = 114701, loss = 28.9664 (5.606 sec)
INFO:tensorflow:global_step/sec: 17.6878
INFO:tensorf

In [9]:
def get_pred_expected(estimator, input_fn):
    """Given an input function and an estimator, return the expected and predicted labels"""
    # get prdeiction
    predicted_list = [pred['class_ids'] for pred in estimator.predict(input_fn)]
    pred_array = np.stack(predicted_list, axis=0).ravel()  # concatenate them to one

    # consume label tensors
    # not necessarily the most elegant solution but works
    label_tens = input_fn()[1]
    expected_list = []
    with tf.Session() as sess:

        # get each element of the training dataset until the end is reached
        while True:
            try:
                elem = sess.run(label_tens)
                expected_list.append(elem)
            except tf.errors.OutOfRangeError:
                break
            
    expected_array = np.concatenate(expected_list).ravel()
    
    return expected_array, pred_array

In [None]:
# much needed sanity check. the data is small enough not to bother with pipelines
expected_array, pred_array = get_pred_expected(estimator, test_fn)
print(classification_report(expected_array, pred_array, digits=6))

             precision    recall  f1-score   support

        0.0   0.998706  0.998498  0.998602     43284
        1.0   0.991393  0.992576  0.991984      7543

avg / total   0.997621  0.997619  0.997620     50827



Attempt another training run with validation testing. The point of this is not t reach better performance, bt to actually see on the validation curves in tensorflow if it overfits.

In [None]:
experiment.train_and_evaluate()

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir3/model.ckpt-117000
INFO:tensorflow:Saving checkpoints for 117001 into /home/nikitautiu/model_dir3/model.ckpt.
INFO:tensorflow:Starting evaluation at 2017-11-09-09:59:57
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir3/model.ckpt-117001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation 

INFO:tensorflow:global_step/sec: 17.8777
INFO:tensorflow:step = 123401, loss = 19.1474 (5.593 sec)
INFO:tensorflow:global_step/sec: 17.9333
INFO:tensorflow:step = 123501, loss = 9.21934 (5.576 sec)
INFO:tensorflow:global_step/sec: 17.8535
INFO:tensorflow:step = 123601, loss = 4.72415 (5.601 sec)
INFO:tensorflow:global_step/sec: 17.8949
INFO:tensorflow:step = 123701, loss = 31.1423 (5.588 sec)
INFO:tensorflow:global_step/sec: 18.0263
INFO:tensorflow:step = 123801, loss = 9.26283 (5.547 sec)
INFO:tensorflow:global_step/sec: 18.129
INFO:tensorflow:step = 123901, loss = 12.4028 (5.516 sec)
INFO:tensorflow:global_step/sec: 17.8594
INFO:tensorflow:step = 124001, loss = 10.831 (5.599 sec)
INFO:tensorflow:global_step/sec: 17.9298
INFO:tensorflow:step = 124101, loss = 3.96134 (5.578 sec)
INFO:tensorflow:global_step/sec: 17.902
INFO:tensorflow:step = 124201, loss = 9.6836 (5.585 sec)
INFO:tensorflow:global_step/sec: 17.7919
INFO:tensorflow:step = 124301, loss = 355.381 (5.621 sec)
INFO:tensorflo

INFO:tensorflow:global_step/sec: 18.0827
INFO:tensorflow:step = 130001, loss = 5.77342 (5.530 sec)
INFO:tensorflow:global_step/sec: 18.1409
INFO:tensorflow:step = 130101, loss = 273.266 (5.513 sec)
INFO:tensorflow:global_step/sec: 18.1883
INFO:tensorflow:step = 130201, loss = 25.4212 (5.498 sec)
INFO:tensorflow:global_step/sec: 18.0673
INFO:tensorflow:step = 130301, loss = 19.8328 (5.535 sec)
INFO:tensorflow:global_step/sec: 18.0988
INFO:tensorflow:step = 130401, loss = 5.53924 (5.526 sec)
INFO:tensorflow:global_step/sec: 18.0674
INFO:tensorflow:step = 130501, loss = 11.5662 (5.534 sec)
INFO:tensorflow:global_step/sec: 18.0615
INFO:tensorflow:step = 130601, loss = 4.78946 (5.537 sec)
INFO:tensorflow:global_step/sec: 18.3041
INFO:tensorflow:step = 130701, loss = 4.273 (5.463 sec)
INFO:tensorflow:global_step/sec: 18.2716
INFO:tensorflow:step = 130801, loss = 6.13391 (5.474 sec)
INFO:tensorflow:global_step/sec: 17.9803
INFO:tensorflow:step = 130901, loss = 40.2819 (5.561 sec)
INFO:tensorf

INFO:tensorflow:Finished evaluation at 2017-11-09-10:25:34
INFO:tensorflow:Saving dict for global step 137001: accuracy = 0.971866, accuracy_baseline = 0.5, auc = 0.985615, auc_precision_recall = 0.992592, average_loss = 0.147043, global_step = 137001, label/mean = 0.5, loss = 288.432, prediction/mean = 0.467809
INFO:tensorflow:Validation (step 137001): global_step = 137001, accuracy_baseline = 0.5, accuracy = 0.971866, auc_precision_recall = 0.992592, auc = 0.985615, label/mean = 0.5, loss = 288.432, average_loss = 0.147043, prediction/mean = 0.467809
INFO:tensorflow:step = 137001, loss = 1.94641 (80.518 sec)
INFO:tensorflow:global_step/sec: 1.24746
INFO:tensorflow:step = 137101, loss = 5377.1 (5.428 sec)
INFO:tensorflow:global_step/sec: 18.0902
INFO:tensorflow:step = 137201, loss = 459.028 (5.528 sec)
INFO:tensorflow:global_step/sec: 18.2086
INFO:tensorflow:step = 137301, loss = 506.368 (5.492 sec)
INFO:tensorflow:global_step/sec: 18.1971
INFO:tensorflow:step = 137401, loss = 43.7685

INFO:tensorflow:global_step/sec: 18.133
INFO:tensorflow:step = 144701, loss = 16.2714 (5.515 sec)
INFO:tensorflow:global_step/sec: 18.3763
INFO:tensorflow:step = 144801, loss = 1.37482 (5.442 sec)
INFO:tensorflow:global_step/sec: 18.3293
INFO:tensorflow:step = 144901, loss = 1.28757 (5.456 sec)
INFO:tensorflow:global_step/sec: 18.1059
INFO:tensorflow:step = 145001, loss = 2.05274 (5.523 sec)
INFO:tensorflow:global_step/sec: 18.3932
INFO:tensorflow:step = 145101, loss = 1.2619 (5.437 sec)
INFO:tensorflow:global_step/sec: 18.1435
INFO:tensorflow:step = 145201, loss = 3.38056 (5.511 sec)
INFO:tensorflow:global_step/sec: 18.3871
INFO:tensorflow:step = 145301, loss = 1.39965 (5.438 sec)
INFO:tensorflow:global_step/sec: 18.3715
INFO:tensorflow:step = 145401, loss = 2.43847 (5.444 sec)
INFO:tensorflow:global_step/sec: 18.0567
INFO:tensorflow:step = 145501, loss = 1.02852 (5.538 sec)
INFO:tensorflow:global_step/sec: 18.0963
INFO:tensorflow:step = 145601, loss = 1.95246 (5.526 sec)
INFO:tensorf

INFO:tensorflow:global_step/sec: 18.5365
INFO:tensorflow:step = 151301, loss = 2.5871 (5.394 sec)
INFO:tensorflow:global_step/sec: 18.6008
INFO:tensorflow:step = 151401, loss = 3.3295 (5.376 sec)
INFO:tensorflow:global_step/sec: 18.5227
INFO:tensorflow:step = 151501, loss = 22.9357 (5.399 sec)
INFO:tensorflow:global_step/sec: 18.5247
INFO:tensorflow:step = 151601, loss = 8.57113 (5.398 sec)
INFO:tensorflow:global_step/sec: 18.3961
INFO:tensorflow:step = 151701, loss = 6.27533 (5.436 sec)
INFO:tensorflow:global_step/sec: 18.5468
INFO:tensorflow:step = 151801, loss = 3.13279 (5.391 sec)
INFO:tensorflow:global_step/sec: 18.5267
INFO:tensorflow:step = 151901, loss = 4.23627 (5.399 sec)
INFO:tensorflow:Saving checkpoints for 152001 into /home/nikitautiu/model_dir3/model.ckpt.
INFO:tensorflow:global_step/sec: 16.9756
INFO:tensorflow:step = 152001, loss = 3.32529 (5.902 sec)
INFO:tensorflow:global_step/sec: 18.525
INFO:tensorflow:step = 152101, loss = 2.81842 (5.386 sec)
INFO:tensorflow:globa

INFO:tensorflow:global_step/sec: 18.2574
INFO:tensorflow:step = 158401, loss = 45.9673 (5.477 sec)
INFO:tensorflow:global_step/sec: 18.4584
INFO:tensorflow:step = 158501, loss = 15.275 (5.417 sec)
INFO:tensorflow:global_step/sec: 18.2755
INFO:tensorflow:step = 158601, loss = 8.87158 (5.472 sec)
INFO:tensorflow:global_step/sec: 18.3361
INFO:tensorflow:step = 158701, loss = 5.37339 (5.454 sec)
INFO:tensorflow:global_step/sec: 18.1737
INFO:tensorflow:step = 158801, loss = 3.57887 (5.502 sec)
INFO:tensorflow:global_step/sec: 17.5181
INFO:tensorflow:step = 158901, loss = 5.14657 (5.708 sec)
INFO:tensorflow:global_step/sec: 17.4112
INFO:tensorflow:step = 159001, loss = 3.56636 (5.744 sec)
INFO:tensorflow:global_step/sec: 16.6666
INFO:tensorflow:step = 159101, loss = 35.394 (6.000 sec)
INFO:tensorflow:global_step/sec: 18.1645
INFO:tensorflow:step = 159201, loss = 13.344 (5.506 sec)
INFO:tensorflow:global_step/sec: 17.0689
INFO:tensorflow:step = 159301, loss = 58.6332 (5.858 sec)
INFO:tensorfl

INFO:tensorflow:global_step/sec: 18.6372
INFO:tensorflow:step = 166601, loss = 7.18355 (5.365 sec)
INFO:tensorflow:global_step/sec: 18.4651
INFO:tensorflow:step = 166701, loss = 2.74982 (5.416 sec)
INFO:tensorflow:global_step/sec: 18.5738
INFO:tensorflow:step = 166801, loss = 1.63048 (5.384 sec)
INFO:tensorflow:global_step/sec: 17.7456
INFO:tensorflow:step = 166901, loss = 2.1082 (5.635 sec)
INFO:tensorflow:Saving checkpoints for 167001 into /home/nikitautiu/model_dir3/model.ckpt.
INFO:tensorflow:global_step/sec: 17.6797
INFO:tensorflow:Starting evaluation at 2017-11-09-10:55:29
INFO:tensorflow:Restoring parameters from /home/nikitautiu/model_dir3/model.ckpt-167001
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensor

INFO:tensorflow:global_step/sec: 18.5943
INFO:tensorflow:step = 173101, loss = 1.42787 (5.377 sec)
INFO:tensorflow:global_step/sec: 18.7048
INFO:tensorflow:step = 173201, loss = 1.96736 (5.347 sec)
INFO:tensorflow:global_step/sec: 18.6843
INFO:tensorflow:step = 173301, loss = 1.87898 (5.352 sec)
INFO:tensorflow:global_step/sec: 18.4961
INFO:tensorflow:step = 173401, loss = 1.10284 (5.407 sec)
INFO:tensorflow:global_step/sec: 18.7103
INFO:tensorflow:step = 173501, loss = 1.71128 (5.344 sec)
INFO:tensorflow:global_step/sec: 18.5231
INFO:tensorflow:step = 173601, loss = 1.26825 (5.399 sec)
INFO:tensorflow:global_step/sec: 18.4881
INFO:tensorflow:step = 173701, loss = 3.23229 (5.409 sec)
INFO:tensorflow:global_step/sec: 18.5077
INFO:tensorflow:step = 173801, loss = 1.07534 (5.403 sec)
INFO:tensorflow:global_step/sec: 18.5789
INFO:tensorflow:step = 173901, loss = 1.43176 (5.382 sec)
INFO:tensorflow:global_step/sec: 18.7248
INFO:tensorflow:step = 174001, loss = 1.61472 (5.340 sec)
INFO:tenso

In [None]:
# metrics on test
estimator.evaluate(input_fn=test_fn)

In [None]:
# another try
expected_array, pred_array = get_pred_expected(estimator, test_fn)
print(classification_report(expected_array, pred_array, digits=6))

In [None]:
# another try
expected_array, pred_array = get_pred_expected(estimator, validation_fn)
print(classification_report(expected_array, pred_array, digits=6))