## 训练与early stopping防止过拟合

### 0.引入工具库

In [11]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
import tensorflow as tf
warnings.filterwarnings("ignore")

### 1.定义训练与测试文件

In [12]:
# 数据集
IRIS_TRAINING = "iris_training.csv"
IRIS_TEST = "iris_test.csv"

In [13]:
# 加载数据
training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
  filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32)
test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
  filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32)

In [14]:
# 设定监控指标
validation_metrics = {
   # 正确率
  "accuracy":
      tf.contrib.learn.MetricSpec(
          metric_fn=tf.metrics.accuracy,
          prediction_key="classes"),
   # 准确率
  "precision":
      tf.contrib.learn.MetricSpec(
          metric_fn=tf.metrics.precision,
          prediction_key="classes"),
   # 召回率
  "recall":
      tf.contrib.learn.MetricSpec(
          metric_fn=tf.metrics.recall,
          prediction_key="classes")
}

In [15]:
# 监控器
validation_monitor = tf.contrib.learn.monitors.ValidationMonitor(
                                                        test_set.data,
                                                        test_set.target,
                                                        every_n_steps=50,
                                                        metrics=validation_metrics,
                                                        early_stopping_metric="loss",
                                                        early_stopping_metric_minimize=True,
                                                        early_stopping_rounds=200)

In [16]:
# 指定特征列，这里都是数值型的
feature_columns = [tf.feature_column.numeric_column("",shape=(4,))]

In [17]:
# 构建estimator
classifier = tf.contrib.learn.DNNClassifier(
                feature_columns=feature_columns,
                hidden_units=[10, 20, 10],
                n_classes=3,
                model_dir="/tmp/iris_model",
                config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))

INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x117d1ba90>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 1, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': '/tmp/iris_model'}


In [18]:
# 拟合模型
classifier.fit(x=training_set.data,
             y=training_set.target,
             steps=2000,
             monitors=[validation_monitor])

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/iris_model/model.ckpt-20000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 20001 into /tmp/iris_model/model.ckpt.
INFO:tensorflow:Starting evaluation at 2018-05-18-04:29:11
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/iris_model/model.ckpt-20001
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-05-18-04:29:12
INFO:tensorflow:Saving dict for global step 20001: accuracy = 0.96666664, global_step = 20001, loss = 0.16083662, precision = 1.0, recall = 1.0
INFO:tensorflow:Validation (step 20001): loss = 0.16083662, accuracy = 0.96666664, precision = 1.0, recall = 1.0, global_step = 20001
INFO:tensorflow:loss = 0.0016258508, step = 20001
INFO:tensorflow:global_step/sec: 165.816
I

DNNClassifier(params={'head': <tensorflow.contrib.learn.python.learn.estimators.head._MultiClassHead object at 0x117d1b4a8>, 'hidden_units': [10, 20, 10], 'feature_columns': (_NumericColumn(key='', shape=(4,), default_value=None, dtype=tf.float32, normalizer_fn=None),), 'optimizer': None, 'activation_fn': <function relu at 0x110f1fae8>, 'dropout': None, 'gradient_clip_norm': None, 'embedding_lr_multipliers': None, 'input_layer_min_slice_size': None})

In [19]:
# 评估准确率
accuracy_score = classifier.evaluate(
  x=test_set.data, y=test_set.target)["accuracy"]
print("Accuracy: {0:f}".format(accuracy_score))

INFO:tensorflow:Starting evaluation at 2018-05-18-04:29:14
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/iris_model/model.ckpt-22000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-05-18-04:29:14
INFO:tensorflow:Saving dict for global step 22000: accuracy = 0.96666664, global_step = 22000, loss = 0.16551517
Accuracy: 0.966667


In [20]:
# 对2个样本分类
new_samples = np.array(
  [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)
y = list(classifier.predict(new_samples))
print("Predictions: {}".format(str(y)))

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/iris_model/model.ckpt-22000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Predictions: [1, 2]
