In [1]:
# https://www.tensorflow.org/get_started/input_fn
import itertools

import pandas as pd
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)

In [3]:
COLUMNS = ["crim", "zn", "indus", "nox", "rm", "age",
           "dis", "tax", "ptratio", "medv"]
FEATURES = ["crim", "zn", "indus", "nox", "rm",
            "age", "dis", "tax", "ptratio"]
LABEL = "medv"

training_set = pd.read_csv("boston_train.csv", skipinitialspace=True,
                           skiprows=1, names=COLUMNS)
test_set = pd.read_csv("boston_test.csv", skipinitialspace=True,
                       skiprows=1, names=COLUMNS)
prediction_set = pd.read_csv("boston_predict.csv", skipinitialspace=True,
                             skiprows=1, names=COLUMNS)


In [4]:
feature_cols = [tf.feature_column.numeric_column(k) for k in FEATURES]


In [5]:
regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols,
                                      hidden_units=[10, 10],
                                      model_dir="/tmp/boston_model")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/boston_model', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100}


In [6]:
def get_input_fn(data_set, num_epochs=None, shuffle=True):
  return tf.estimator.inputs.pandas_input_fn(
      x=pd.DataFrame({k: data_set[k].values for k in FEATURES}),
      y = pd.Series(data_set[LABEL].values),
      num_epochs=num_epochs,
      shuffle=shuffle)


In [7]:
regressor.train(input_fn=get_input_fn(training_set), steps=5000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/boston_model\model.ckpt.
INFO:tensorflow:loss = 53678.6, step = 1
INFO:tensorflow:global_step/sec: 492.394
INFO:tensorflow:loss = 9471.59, step = 101 (0.203 sec)
INFO:tensorflow:global_step/sec: 639.992
INFO:tensorflow:loss = 10588.7, step = 201 (0.172 sec)
INFO:tensorflow:global_step/sec: 581.783
INFO:tensorflow:loss = 10100.0, step = 301 (0.156 sec)
INFO:tensorflow:global_step/sec: 640.036
INFO:tensorflow:loss = 9972.9, step = 401 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.955
INFO:tensorflow:loss = 8461.61, step = 501 (0.156 sec)
INFO:tensorflow:global_step/sec: 640.035
INFO:tensorflow:loss = 9097.31, step = 601 (0.156 sec)
INFO:tensorflow:global_step/sec: 581.813
INFO:tensorflow:loss = 8218.45, step = 701 (0.172 sec)
INFO:tensorflow:global_step/sec: 639.986
INFO:tensorflow:loss = 6889.53, step = 801 (0.156 sec)
INFO:tensorflow:global_step/sec: 492.308
INFO:tensorflow:loss = 9430.98,

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x23b90110320>

In [8]:
ev = regressor.evaluate(
    input_fn=get_input_fn(test_set, num_epochs=1, shuffle=False))

INFO:tensorflow:Starting evaluation at 2017-10-05-18:04:58
INFO:tensorflow:Restoring parameters from /tmp/boston_model\model.ckpt-5000
INFO:tensorflow:Finished evaluation at 2017-10-05-18:04:58
INFO:tensorflow:Saving dict for global step 5000: average_loss = 14.9884, global_step = 5000, loss = 1498.84


In [9]:
loss_score = ev["loss"]
print("Loss: {0:f}".format(loss_score))

Loss: 1498.842529


In [10]:
y = regressor.predict(
    input_fn=get_input_fn(prediction_set, num_epochs=1, shuffle=False))
# .predict() returns an iterator of dicts; convert to a list and print
# predictions
predictions = list(p["predictions"] for p in itertools.islice(y, 6))
print("Predictions: {}".format(str(predictions)))


INFO:tensorflow:Restoring parameters from /tmp/boston_model\model.ckpt-5000
Predictions: [array([ 33.69565964], dtype=float32), array([ 18.10408783], dtype=float32), array([ 23.69057846], dtype=float32), array([ 34.9368248], dtype=float32), array([ 15.36082363], dtype=float32), array([ 20.06808472], dtype=float32)]


In [11]:
x=pd.DataFrame({k: training_set[k].values for k in FEATURES})

In [17]:
print(x[:10])

    age      crim      dis  indus     nox  ptratio     rm  tax    zn
0  96.1   2.30040   2.1000  19.58  0.6050     14.7  6.319  403   0.0
1  94.7  13.35980   1.7821  18.10  0.6930     20.2  5.887  666   0.0
2   2.9   0.12744   5.7209   6.91  0.4480     17.9  6.770  233   0.0
3  17.5   0.15876   5.2873  10.81  0.4130     19.2  5.961  305   0.0
4  38.3   0.03768   7.3090   1.52  0.4040     12.6  7.274  329  80.0
5  37.2   0.03705   5.2447   3.33  0.4429     14.9  6.968  216  20.0
6  18.5   0.07244  10.7103   1.69  0.4110     18.3  5.884  411  60.0
7  17.7   0.10000   5.4917   6.09  0.4330     16.1  6.982  329  34.0
8  90.0   4.81213   2.5975  18.10  0.7130     20.2  6.701  666   0.0
9  92.7   0.15086   1.8209  27.74  0.6090     20.1  5.454  711   0.0


In [21]:
print(x["crim"][:10])

0     2.30040
1    13.35980
2     0.12744
3     0.15876
4     0.03768
5     0.03705
6     0.07244
7     0.10000
8     4.81213
9     0.15086
Name: crim, dtype: float64
