# data download

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import os
import sys

from six.moves import urllib
import tensorflow as tf

DATA_URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult'
TRAINING_FILE = 'adult.data'
TRAINING_URL = '%s/%s' % (DATA_URL, TRAINING_FILE)
EVAL_FILE = 'adult.test'
EVAL_URL = '%s/%s' % (DATA_URL, EVAL_FILE)

parser = argparse.ArgumentParser()

parser.add_argument(
    '--data_dir', type=str, default='./',
    help='Directory to download census data')


def _download_and_clean_file(filename, url):
  """Downloads data from url, and makes changes to match the CSV format."""
  temp_file, _ = urllib.request.urlretrieve(url)
  with tf.gfile.Open(temp_file, 'r') as temp_eval_file:
    with tf.gfile.Open(filename, 'w') as eval_file:
      for line in temp_eval_file:
        line = line.strip()
        line = line.replace(', ', ',')
        if not line or ',' not in line:
          continue
        if line[-1] == '.':
          line = line[:-1]
        line += '\n'
        eval_file.write(line)
  tf.gfile.Remove(temp_file)


def main(_):
  if not tf.gfile.Exists(FLAGS.data_dir):
    tf.gfile.MkDir(FLAGS.data_dir)

  training_file_path = os.path.join(FLAGS.data_dir, TRAINING_FILE)
  _download_and_clean_file(training_file_path, TRAINING_URL)

  eval_file_path = os.path.join(FLAGS.data_dir, EVAL_FILE)
  _download_and_clean_file(eval_file_path, EVAL_URL)


if __name__ == '__main__':
  FLAGS, unparsed = parser.parse_known_args()
  tf.app.run(argv=[sys.argv[0]] + unparsed)

# wide component

In [12]:
import tensorflow as tf

_CSV_COLUMNS = [
    'age', 'workclass', 'fnlwgt', 'education', 'education_num',
    'marital_status', 'occupation', 'relationship', 'race', 'gender',
    'capital_gain', 'capital_loss', 'hours_per_week', 'native_country',
    'income_bracket'
]

_CSV_COLUMN_DEFAULTS = [[0], [''], [0], [''], [0], [''], [''], [''], [''], [''],
                        [0], [0], [0], [''], ['']]

_NUM_EXAMPLES = {
    'train': 32561,
    'validation': 16281,
}


# 1. Read the Census Data

# 2. Converting Data into Tensors
def input_fn(data_file, num_epochs, shuffle, batch_size):
    """为Estimator创建一个input function"""
    assert tf.gfile.Exists(data_file), "{0} not found.".format(data_file)

    def parse_csv(line):
        print("Parsing", data_file)
        # tf.decode_csv会把csv文件转换成很a list of Tensor,一列一个。record_defaults用于指明每一列的缺失值用什么填充
        columns = tf.decode_csv(line, record_defaults=_CSV_COLUMN_DEFAULTS)
        features = dict(zip(_CSV_COLUMNS, columns))
        labels = features.pop('income_bracket')
        return features, tf.equal(labels, '>50K') # tf.equal(x, y) 返回一个bool类型Tensor， 表示x == y, element-wise

    dataset = tf.data.TextLineDataset(data_file) \
                .map(parse_csv, num_parallel_calls=5)

    if shuffle:
        dataset = dataset.shuffle(buffer_size=_NUM_EXAMPLES['train'] + _NUM_EXAMPLES['validation'])

    dataset = dataset.repeat(num_epochs)
    dataset = dataset.batch(batch_size)

    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels

# 3. Select and Engineer Features for Model

## 3.1 Base Categorical Feature Columns
# 如果我们知道所有的取值，并且取值不是很多
relationship = tf.feature_column.categorical_column_with_vocabulary_list(
    'relationship', [
        'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried',
        'Other-relative'
    ]
)
# 如果不知道有多少取值
occupation = tf.feature_column.categorical_column_with_hash_bucket(
    'occupation', hash_bucket_size=1000
)

education = tf.feature_column.categorical_column_with_vocabulary_list(
    'education', [
        'Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college',
        'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school',
        '5th-6th', '10th', '1st-4th', 'Preschool', '12th'
    ]
)

marital_status = tf.feature_column.categorical_column_with_vocabulary_list(
'marital_status', [
        'Married-civ-spouse', 'Divorced', 'Married-spouse-absent',
        'Never-married', 'Separated', 'Married-AF-spouse', 'Widowed']
)


workclass = tf.feature_column.categorical_column_with_vocabulary_list(
    'workclass', [
        'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov',
        'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked'])


# 3.2 Base Continuous Feature Columns
age = tf.feature_column.numeric_column('age')
education_num = tf.feature_column.numeric_column('education_num')
capital_gain = tf.feature_column.numeric_column('capital_gain')
capital_loss = tf.feature_column.numeric_column('capital_loss')
hours_per_week = tf.feature_column.numeric_column('hours_per_week')

#
"""
Sometimes the relationship between a continuous feature and the label is not linear. As a hypothetical example, 
a person's income may grow with age in the early stage of one's career, then the growth may slow at some point, 
and finally the income decreases after retirement. 
In this scenario, using the raw age as a real-valued feature column might not be a good choice because the model 
can only learn one of the three cases:
"""    
    
# 3.2.1 连续特征离散化
# 之所以这么做是因为：有些时候连续特征和label之间不是线性的关系。可能刚开始是正的线性关系，后面又变成了负的线性关系，
# 这样一个折线的关系整体来看就不再是线性关系。
# bucketization 装桶
# 10个边界，11个桶
age_buckets = tf.feature_column.bucketized_column(
    age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])

# 3.3 组合特征/交叉特征
education_x_occupation = tf.feature_column.crossed_column(
    ['education', 'occupation'], hash_bucket_size=1000)

age_buckets_x_education_x_occupation = tf.feature_column.crossed_column(
    [age_buckets, 'education', 'occupation'], hash_bucket_size=1000
)


# 4. 模型
"""
之前的特征：
1. CategoricalColumn
2. NumericalColumn
3. BucketizedColumn
4. CrossedColumn
这些特征都是FeatureColumn的子类，可以放到一起
"""
base_columns = [
    education, marital_status, relationship, workclass, occupation,
    age_buckets,
]

crossed_column = [
    tf.feature_column.crossed_column(
        ['education', 'occupation'], hash_bucket_size=1000
    ),
    tf.feature_column.crossed_column(
        [age_buckets, 'education', 'occupation'], hash_bucket_size=1000
    )
]

model_dir = "./model/wide_component"
#model = tf.estimator.LinearClassifier(
#    model_dir=model_dir, feature_columns=base_columns + crossed_column
#)

# 6. 正则化
model = tf.estimator.LinearClassifier(
    feature_columns=base_columns + crossed_column, model_dir=model_dir,
    optimizer=tf.train.FtrlOptimizer(
        learning_rate=0.1,
        l1_regularization_strength=1.0,
        l2_regularization_strength=1.0
    )
)


train_file = './data/adult.data'
val_file = './data/adult.data'
test_file = './data/adult.test'

# 5. Train & Evaluate & Predict
print("Train .....")
model.train(input_fn=lambda: input_fn(data_file=train_file, num_epochs=1, shuffle=True, batch_size=512))
results = model.evaluate(input_fn=lambda: input_fn(val_file, 1, False, 512))
for key in sorted(results):
    print("{0:20}: {1:.4f}".format(key, results[key]))

print("Predict .....")
pred_iter = model.predict(input_fn=lambda: input_fn(test_file, 1, False, 1))
for pred in pred_iter:
    print(pred)
    break #太多了，只打印一条

print("test .....")
test_results = model.evaluate(input_fn=lambda: input_fn(test_file, 1, False, 512))
for key in sorted(test_results):
    print("{0:20}: {1:.4f}".format(key, test_results[key]))





# if __name__ == '__main__':
#     print(tf.VERSION)
#     data_file = './data/adult.data'
#     next_batch = input_fn(data_file, num_epochs=1, shuffle=True, batch_size=5)
#     with tf.Session() as sess:
#         first_batch = sess.run(next_batch)
#         print(first_batch[0])
#         print(first_batch[1])

I1028 14:53:33.755602  7056 estimator.py:1790] Using default config.
I1028 14:53:33.757597  7056 estimator.py:209] Using config: {'_model_dir': './model/wide_component', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000026AD8B54668>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
I1028 14:53:33.812449  70

Train .....
Parsing ./data/adult.data


I1028 14:53:34.643213  7056 estimator.py:1147] Done calling model_fn.
I1028 14:53:34.644210  7056 basic_session_run_hooks.py:541] Create CheckpointSaverHook.
I1028 14:53:34.792810  7056 monitored_session.py:240] Graph was finalized.
I1028 14:53:34.796800  7056 saver.py:1280] Restoring parameters from ./model/wide_component\model.ckpt-192
I1028 14:53:34.900092  7056 session_manager.py:500] Running local_init_op.
I1028 14:53:34.924055  7056 session_manager.py:502] Done running local_init_op.
I1028 14:53:35.897060  7056 basic_session_run_hooks.py:606] Saving checkpoints for 192 into ./model/wide_component\model.ckpt.
I1028 14:53:37.688073  7056 basic_session_run_hooks.py:262] loss = 190.58664, step = 193
I1028 14:53:38.245586  7056 basic_session_run_hooks.py:606] Saving checkpoints for 256 into ./model/wide_component\model.ckpt.
I1028 14:53:39.065700  7056 estimator.py:368] Loss for final step: 89.07118.
I1028 14:53:39.189360  7056 estimator.py:1145] Calling model_fn.


Parsing ./data/adult.data


W1028 14:53:40.140820  7056 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.
W1028 14:53:40.158750  7056 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.
I1028 14:53:40.175730  7056 estimator.py:1147] Done calling model_fn.
I1028 14:53:40.190690  7056 evaluation.py:255] Starting evaluation at 2019-10-28T14:53:40Z
I1028 14:53:40.286406  7056 monitored_session.py:240] Graph was finalized.
I1028 14:53:40.291393  7056 saver.py:1280] Restoring parameters from ./model/wide_component\model.ckpt-256
I1028 14:53:40.379157  7056 session_manager.py:500] Running local_init_op.
I1028 14:53:40.417055  7056 session_manager.py:502] Done running local_init_op.
I1028 14:53:41.234852  7056 evaluation.py:275] Finished evaluation at 2019-10-28-14:53:41
I1028 14:53:41.235852  7056 estimator.py:2039] Saving dict for global step 256: accuracy = 0.8407911, 

accuracy            : 0.8408
accuracy_baseline   : 0.7592
auc                 : 0.8907
auc_precision_recall: 0.7168
average_loss        : 0.3483
global_step         : 256.0000
label/mean          : 0.2408
loss                : 177.2255
precision           : 0.7281
prediction/mean     : 0.2462
recall              : 0.5407
Predict .....
Parsing ./data/adult.test


I1028 14:53:41.928039  7056 estimator.py:1147] Done calling model_fn.
I1028 14:53:42.117522  7056 monitored_session.py:240] Graph was finalized.
I1028 14:53:42.121500  7056 saver.py:1280] Restoring parameters from ./model/wide_component\model.ckpt-256
I1028 14:53:42.183352  7056 session_manager.py:500] Running local_init_op.
I1028 14:53:42.201302  7056 session_manager.py:502] Done running local_init_op.
I1028 14:53:42.405745  7056 estimator.py:1145] Calling model_fn.


{'logits': array([-4.784633], dtype=float32), 'logistic': array([0.00828793], dtype=float32), 'probabilities': array([0.9917121 , 0.00828793], dtype=float32), 'class_ids': array([0], dtype=int64), 'classes': array([b'0'], dtype=object), 'all_class_ids': array([0, 1]), 'all_classes': array([b'0', b'1'], dtype=object)}
test .....
Parsing ./data/adult.test


W1028 14:53:43.346195  7056 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.
W1028 14:53:43.362178  7056 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.
I1028 14:53:43.380119  7056 estimator.py:1147] Done calling model_fn.
I1028 14:53:43.397086  7056 evaluation.py:255] Starting evaluation at 2019-10-28T14:53:43Z
I1028 14:53:43.488837  7056 monitored_session.py:240] Graph was finalized.
I1028 14:53:43.492799  7056 saver.py:1280] Restoring parameters from ./model/wide_component\model.ckpt-256
I1028 14:53:43.582591  7056 session_manager.py:500] Running local_init_op.
I1028 14:53:43.619492  7056 session_manager.py:502] Done running local_init_op.
I1028 14:53:44.248796  7056 evaluation.py:275] Finished evaluation at 2019-10-28-14:53:44
I1028 14:53:44.249793  7056 estimator.py:2039] Saving dict for global step 256: accuracy = 0.83717215,

accuracy            : 0.8372
accuracy_baseline   : 0.7638
auc                 : 0.8847
auc_precision_recall: 0.6967
average_loss        : 0.3525
global_step         : 256.0000
label/mean          : 0.2362
loss                : 179.3483
precision           : 0.7070
prediction/mean     : 0.2439
recall              : 0.5307


#  wide & deep

In [7]:
import tensorflow as tf
from absl import flags
from absl import app


# 1. 最基本的特征：

# Continuous columns. Wide和Deep组件都会用到。
age = tf.feature_column.numeric_column('age')
education_num = tf.feature_column.numeric_column('education_num')
capital_gain = tf.feature_column.numeric_column('capital_gain')
capital_loss = tf.feature_column.numeric_column('capital_loss')
hours_per_week = tf.feature_column.numeric_column('hours_per_week')

# 离散特征 16dim
education = tf.feature_column.categorical_column_with_vocabulary_list(
    'education', [
        'Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college',
        'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school',
        '5th-6th', '10th', '1st-4th', 'Preschool', '12th'])

# 7 dim
marital_status = tf.feature_column.categorical_column_with_vocabulary_list(
    'marital_status', [
        'Married-civ-spouse', 'Divorced', 'Married-spouse-absent',
        'Never-married', 'Separated', 'Married-AF-spouse', 'Widowed'])

# 6 dim
relationship = tf.feature_column.categorical_column_with_vocabulary_list(
    'relationship', [
        'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried',
        'Other-relative'])

# 9 dim
workclass = tf.feature_column.categorical_column_with_vocabulary_list(
    'workclass', [
        'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov',
        'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked'])

# 展示一下这个API
occupation = tf.feature_column.categorical_column_with_hash_bucket(
    'occupation', hash_bucket_size=1000
)

# Transformations
age_buckets = tf.feature_column.bucketized_column(
    age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]
)

# 2. The Wide Model: Linear Model with CrossedFeatureColumns
"""
The wide model is a linear model with a wide set of *sparse and crossed feature* columns
Wide部分用了一个规范化后的连续特征age_buckets，其他的连续特征没有使用
"""
base_columns = [
    # 全是离散特征
    education, marital_status, relationship, workclass, occupation,
    age_buckets,
]

crossed_columns = [
    tf.feature_column.crossed_column(
        ['education', 'occupation'], hash_bucket_size=1000),
    tf.feature_column.crossed_column(
        [age_buckets, 'education', 'occupation'], hash_bucket_size=1000
    )
]

# 3. The Deep Model: Neural Network with Embeddings
"""
1. Sparse Features -> Embedding vector -> 串联(Embedding vector, 连续特征) -> 输入到Hidden Layer
2. Embedding Values随机初始化
3. 另外一种处理离散特征的方法是：one-hot or multi-hot representation. 但是仅仅适用于维度较低的，embedding是更加通用的做法
4. embedding_column(embedding);indicator_column(multi-hot);
"""
deep_columns = [
    age,
    education_num,
    capital_gain,
    capital_loss,
    hours_per_week,
    tf.feature_column.indicator_column(workclass),
    tf.feature_column.indicator_column(education),
    tf.feature_column.indicator_column(marital_status),
    tf.feature_column.indicator_column(relationship),

    # To show an example of embedding
    tf.feature_column.embedding_column(occupation, dimension=8)
]

model_dir = './model/wide_deep'

# 4. Combine Wide & Deep
model = tf.estimator.DNNLinearCombinedClassifier(
    model_dir = model_dir,
    linear_feature_columns=base_columns + crossed_columns,
    dnn_feature_columns=deep_columns,
    dnn_hidden_units=[100,50]
)

# 5. Train & Evaluate
_CSV_COLUMNS = [
    'age', 'workclass', 'fnlwgt', 'education', 'education_num',
    'marital_status', 'occupation', 'relationship', 'race', 'gender',
    'capital_gain', 'capital_loss', 'hours_per_week', 'native_country',
    'income_bracket'
]
_CSV_COLUMN_DEFAULTS = [[0], [''], [0], [''], [0], [''], [''], [''], [''], [''],
                        [0], [0], [0], [''], ['']]
_NUM_EXAMPLES = {
    'train': 32561,
    'validation': 16281,
}

def input_fn(data_file, num_epochs, shuffle, batch_size):
    """为Estimator创建一个input function"""
    assert tf.gfile.Exists(data_file), "{0} not found.".format(data_file)

    def parse_csv(line):
        print("Parsing", data_file)
        # tf.decode_csv会把csv文件转换成很a list of Tensor,一列一个。record_defaults用于指明每一列的缺失值用什么填充
        columns = tf.decode_csv(line, record_defaults=_CSV_COLUMN_DEFAULTS)
        features = dict(zip(_CSV_COLUMNS, columns))
        labels = features.pop('income_bracket')
        return features, tf.equal(labels, '>50K') # tf.equal(x, y) 返回一个bool类型Tensor， 表示x == y, element-wise

    dataset = tf.data.TextLineDataset(data_file) \
                .map(parse_csv, num_parallel_calls=5)

    if shuffle:
        dataset = dataset.shuffle(buffer_size=_NUM_EXAMPLES['train'] + _NUM_EXAMPLES['validation'])

    dataset = dataset.repeat(num_epochs)
    dataset = dataset.batch(batch_size)

    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels

# Train + Eval
train_epochs = 6
epochs_per_eval = 2
batch_size = 40
train_file = './data/adult.data'
test_file  = './data/adult.test'

for n in range(train_epochs // epochs_per_eval):
    model.train(input_fn=lambda: input_fn(train_file, epochs_per_eval, True, batch_size))
    results = model.evaluate(input_fn=lambda: input_fn(
        test_file, 1, False, batch_size))

    # Display Eval results
    print("Results at epoch {0}".format((n+1) * epochs_per_eval))
    print('-'*30)

    for key in sorted(results):
        print("{0:20}: {1:.4f}".format(key, results[key]))





I1025 10:51:09.071384  7056 estimator.py:1790] Using default config.
I1025 10:51:09.073353  7056 estimator.py:209] Using config: {'_model_dir': './model/wide_deep', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000026AD13A8EF0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
I1025 10:51:09.125210  7056 es

Parsing ./data/adult.data


W1025 10:51:09.443383  7056 deprecation.py:323] From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py:4207: IndicatorColumn._variable_shape (from tensorflow.python.feature_column.feature_column_v2) is deprecated and will be removed in a future version.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
W1025 10:51:09.444378  7056 deprecation.py:323] From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py:4262: VocabularyListCategoricalColumn._num_buckets (from tensorflow.python.feature_column.feature_column_v2) is deprecated and will be removed in a future version.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
W1025 10:51:10.585306  7056 deprecation.py:506] From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\adagrad.py:76:

Parsing ./data/adult.test


W1025 10:51:19.783305  7056 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.
W1025 10:51:19.800260  7056 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.
I1025 10:51:19.817216  7056 estimator.py:1147] Done calling model_fn.
I1025 10:51:19.833145  7056 evaluation.py:255] Starting evaluation at 2019-10-25T10:51:19Z
I1025 10:51:19.950846  7056 monitored_session.py:240] Graph was finalized.
I1025 10:51:19.955814  7056 saver.py:1280] Restoring parameters from ./model/wide_deep\model.ckpt-1629
I1025 10:51:20.066543  7056 session_manager.py:500] Running local_init_op.
I1025 10:51:20.112393  7056 session_manager.py:502] Done running local_init_op.
I1025 10:51:21.005022  7056 evaluation.py:275] Finished evaluation at 2019-10-25-10:51:21
I1025 10:51:21.006992  7056 estimator.py:2039] Saving dict for global step 1629: accuracy = 0.83649653, ac

Results at epoch 2
------------------------------
accuracy            : 0.8365
accuracy_baseline   : 0.7638
auc                 : 0.8805
auc_precision_recall: 0.7163
average_loss        : 0.3705
global_step         : 1629.0000
label/mean          : 0.2362
loss                : 14.7857
precision           : 0.7397
prediction/mean     : 0.2615
recall              : 0.4750
Parsing ./data/adult.data


I1025 10:51:22.906556  7056 estimator.py:1147] Done calling model_fn.
I1025 10:51:22.907536  7056 basic_session_run_hooks.py:541] Create CheckpointSaverHook.
I1025 10:51:23.114007  7056 monitored_session.py:240] Graph was finalized.
I1025 10:51:23.119964  7056 saver.py:1280] Restoring parameters from ./model/wide_deep\model.ckpt-1629
I1025 10:51:23.255624  7056 session_manager.py:500] Running local_init_op.
I1025 10:51:23.287539  7056 session_manager.py:502] Done running local_init_op.
I1025 10:51:24.188347  7056 basic_session_run_hooks.py:606] Saving checkpoints for 1629 into ./model/wide_deep\model.ckpt.
I1025 10:51:26.377677  7056 basic_session_run_hooks.py:262] loss = 14.380152, step = 1630
I1025 10:51:26.893378  7056 basic_session_run_hooks.py:692] global_step/sec: 193.537
I1025 10:51:26.894375  7056 basic_session_run_hooks.py:260] loss = 15.490909, step = 1730 (0.517 sec)
I1025 10:51:27.073893  7056 basic_session_run_hooks.py:692] global_step/sec: 557.05
I1025 10:51:27.074891  70

Parsing ./data/adult.test


W1025 10:51:31.104682  7056 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.
W1025 10:51:31.121664  7056 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.
I1025 10:51:31.138617  7056 estimator.py:1147] Done calling model_fn.
I1025 10:51:31.216407  7056 evaluation.py:255] Starting evaluation at 2019-10-25T10:51:31Z
I1025 10:51:31.338073  7056 monitored_session.py:240] Graph was finalized.
I1025 10:51:31.343039  7056 saver.py:1280] Restoring parameters from ./model/wide_deep\model.ckpt-3258
I1025 10:51:31.452770  7056 session_manager.py:500] Running local_init_op.
I1025 10:51:31.498630  7056 session_manager.py:502] Done running local_init_op.
I1025 10:51:32.404186  7056 evaluation.py:275] Finished evaluation at 2019-10-25-10:51:32
I1025 10:51:32.405184  7056 estimator.py:2039] Saving dict for global step 3258: accuracy = 0.84828943, ac

Results at epoch 4
------------------------------
accuracy            : 0.8483
accuracy_baseline   : 0.7638
auc                 : 0.8931
auc_precision_recall: 0.7447
average_loss        : 0.3451
global_step         : 3258.0000
label/mean          : 0.2362
loss                : 13.7725
precision           : 0.7577
prediction/mean     : 0.2519
recall              : 0.5260
Parsing ./data/adult.data


I1025 10:51:33.781861  7056 estimator.py:1147] Done calling model_fn.
I1025 10:51:33.782855  7056 basic_session_run_hooks.py:541] Create CheckpointSaverHook.
I1025 10:51:34.048143  7056 monitored_session.py:240] Graph was finalized.
I1025 10:51:34.054101  7056 saver.py:1280] Restoring parameters from ./model/wide_deep\model.ckpt-3258
I1025 10:51:34.186771  7056 session_manager.py:500] Running local_init_op.
I1025 10:51:34.219655  7056 session_manager.py:502] Done running local_init_op.
I1025 10:51:35.190168  7056 basic_session_run_hooks.py:606] Saving checkpoints for 3258 into ./model/wide_deep\model.ckpt.
I1025 10:51:37.261632  7056 basic_session_run_hooks.py:262] loss = 11.329519, step = 3259
I1025 10:51:37.772257  7056 basic_session_run_hooks.py:692] global_step/sec: 195.838
I1025 10:51:37.773254  7056 basic_session_run_hooks.py:260] loss = 12.077276, step = 3359 (0.512 sec)
I1025 10:51:37.954766  7056 basic_session_run_hooks.py:692] global_step/sec: 547.92
I1025 10:51:37.955763  70

Parsing ./data/adult.test


W1025 10:51:41.933068  7056 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.
W1025 10:51:41.950003  7056 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.
I1025 10:51:41.967955  7056 estimator.py:1147] Done calling model_fn.
I1025 10:51:41.982940  7056 evaluation.py:255] Starting evaluation at 2019-10-25T10:51:41Z
I1025 10:51:42.165493  7056 monitored_session.py:240] Graph was finalized.
I1025 10:51:42.169459  7056 saver.py:1280] Restoring parameters from ./model/wide_deep\model.ckpt-4887
I1025 10:51:42.278182  7056 session_manager.py:500] Running local_init_op.
I1025 10:51:42.321076  7056 session_manager.py:502] Done running local_init_op.
I1025 10:51:43.206663  7056 evaluation.py:275] Finished evaluation at 2019-10-25-10:51:43
I1025 10:51:43.207660  7056 estimator.py:2039] Saving dict for global step 4887: accuracy = 0.8518519, acc

Results at epoch 6
------------------------------
accuracy            : 0.8519
accuracy_baseline   : 0.7638
auc                 : 0.8950
auc_precision_recall: 0.7527
average_loss        : 0.3389
global_step         : 4887.0000
label/mean          : 0.2362
loss                : 13.5221
precision           : 0.7541
prediction/mean     : 0.2501
recall              : 0.5533
