In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import pandas as pd

In [3]:
dataset_path = tf.keras.utils.get_file("auto-mpg.data",
                                      ("http://archive.ics.uci.edu/ml/machine-learning"
 "-databases/auto-mpg/auto-mpg.data"))

In [4]:
column_names = ['MPG', 'Cylinders', 'Displacement',
                'Horsepower', 'Weight', 'Acceleration',
                'ModelYear', 'Origin']

In [5]:
df = pd.read_csv(dataset_path, names=column_names,
                 na_values = '?', comment='\t',
                 sep=' ', skipinitialspace=True)
df.head()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,ModelYear,Origin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,1


In [6]:
df = df.dropna()
df = df.reset_index(drop=True)
df.head()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,ModelYear,Origin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,1


In [7]:
import sklearn
import sklearn.model_selection
df_train, df_test = sklearn.model_selection.train_test_split(
    df, train_size=0.8)
train_stats = df_train.describe().transpose()
train_stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
MPG,313.0,23.713099,7.745904,9.0,17.6,23.0,29.0,46.6
Cylinders,313.0,5.405751,1.690347,3.0,4.0,4.0,6.0,8.0
Displacement,313.0,188.92492,101.093516,68.0,98.0,140.0,260.0,455.0
Horsepower,313.0,102.539936,37.192403,46.0,75.0,92.0,120.0,230.0
Weight,313.0,2940.731629,836.720428,1755.0,2215.0,2725.0,3530.0,5140.0
Acceleration,313.0,15.664217,2.691403,8.0,14.0,15.5,17.2,24.8
ModelYear,313.0,76.047923,3.703897,70.0,73.0,76.0,79.0,82.0
Origin,313.0,1.594249,0.807556,1.0,1.0,1.0,2.0,3.0


In [8]:
numeric_column_names = ['Cylinders', 'Displacement',
                        'Horsepower', 'Weight',
                        'Acceleration']

In [9]:
df_train_norm, df_test_norm = df_train.copy(), df_test.copy()

In [10]:
for col_name in numeric_column_names:
    mean = train_stats.loc[col_name, 'mean']
    std  = train_stats.loc[col_name, 'std']
    df_train_norm.loc[:, col_name] = (
        df_train_norm.loc[:, col_name] - mean)/std
    df_test_norm.loc[:, col_name] = (
        df_test_norm.loc[:, col_name] - mean)/std

In [11]:
numeric_features = []
for col_name in numeric_column_names:
    numeric_features.append(
        tf.feature_column.numeric_column(key=col_name))

In [12]:
numeric_features

[NumericColumn(key='Cylinders', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Displacement', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Horsepower', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Weight', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Acceleration', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [13]:
feature_year = tf.feature_column.numeric_column(key='ModelYear')
bucketized_features = []
bucketized_features.append(
    tf.feature_column.bucketized_column(
    source_column=feature_year,
    boundaries=[73, 76, 79]))
bucketized_features

[BucketizedColumn(source_column=NumericColumn(key='ModelYear', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(73, 76, 79))]

In [16]:
feature_origin = tf.feature_column.categorical_column_with_vocabulary_list(
       key='Origin',
       vocabulary_list=[1, 2, 3])

In [17]:
categorical_indicator_features = []
categorical_indicator_features.append(
    tf.feature_column.indicator_column(feature_origin))
categorical_indicator_features

[IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='Origin', vocabulary_list=(1, 2, 3), dtype=tf.int64, default_value=-1, num_oov_buckets=0))]

In [19]:
def train_input_fn(df_train, batch_size=8):
    df = df_train.copy()
    train_x, train_y = df, df.pop('MPG')
    dataset = tf.data.Dataset.from_tensor_slices(
        (dict(train_x), train_y))
    return dataset.shuffle(1000).repeat().batch(batch_size)

In [20]:
ds = train_input_fn(df_train_norm)
batch = next(iter(ds))
print('Keys:', batch[0].keys())

Keys: dict_keys(['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'ModelYear', 'Origin'])


In [21]:
print('Batch Model Years:', batch[0]['ModelYear'])

Batch Model Years: tf.Tensor([82 71 75 77 70 76 82 74], shape=(8,), dtype=int64)


In [22]:
def eval_input_fn(df_test, batch_size=8):
    df = df_test.copy()
    test_x, test_y = df, df.pop('MPG')
    dataset = tf.data.Dataset.from_tensor_slices(
        (dict(test_x), test_y))
    return dataset.batch(batch_size)

In [23]:
all_feature_columns = (numeric_features + bucketized_features + 
                      categorical_indicator_features)

### Instantiate a new Estimator

In [25]:
regressor = tf.estimator.DNNRegressor(feature_columns=all_feature_columns,
                                     hidden_units = [32, 10],
                                     model_dir = 'models/autompg=dnnregressor/')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'models/autompg=dnnregressor/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [26]:
total_steps = 1000 * int(np.ceil(len(df_train) / 8))

In [27]:
regressor.train(input_fn=lambda : train_input_fn(df_test_norm, batch_size=8),
               steps = total_steps)

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into models/autompg=dnnregressor/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 618.3035, step = 0
INFO:tensorflow:global_step/sec: 712.627
INFO:tensorflow:loss = 666.3574, step = 100 (0.141 sec)
INFO:tensorflow:global_step/sec: 940.99
INFO:tensorflow:loss = 530.6549, step = 200 (0.106 sec)
INFO:tensorflow:global_s

INFO:tensorflow:loss = 220.36803, step = 7100 (0.146 sec)
INFO:tensorflow:global_step/sec: 689.493
INFO:tensorflow:loss = 226.6959, step = 7200 (0.145 sec)
INFO:tensorflow:global_step/sec: 755.162
INFO:tensorflow:loss = 263.5672, step = 7300 (0.132 sec)
INFO:tensorflow:global_step/sec: 743.152
INFO:tensorflow:loss = 231.37871, step = 7400 (0.135 sec)
INFO:tensorflow:global_step/sec: 702.4
INFO:tensorflow:loss = 65.472786, step = 7500 (0.143 sec)
INFO:tensorflow:global_step/sec: 849.077
INFO:tensorflow:loss = 302.60565, step = 7600 (0.117 sec)
INFO:tensorflow:global_step/sec: 533.948
INFO:tensorflow:loss = 182.4241, step = 7700 (0.188 sec)
INFO:tensorflow:global_step/sec: 578.031
INFO:tensorflow:loss = 209.16647, step = 7800 (0.172 sec)
INFO:tensorflow:global_step/sec: 697.044
INFO:tensorflow:loss = 173.29652, step = 7900 (0.145 sec)
INFO:tensorflow:global_step/sec: 724.439
INFO:tensorflow:loss = 102.74114, step = 8000 (0.138 sec)
INFO:tensorflow:global_step/sec: 728.04
INFO:tensorflow:

INFO:tensorflow:loss = 20.274952, step = 15100 (0.133 sec)
INFO:tensorflow:global_step/sec: 636.347
INFO:tensorflow:loss = 11.974608, step = 15200 (0.157 sec)
INFO:tensorflow:global_step/sec: 703.047
INFO:tensorflow:loss = 54.497173, step = 15300 (0.142 sec)
INFO:tensorflow:global_step/sec: 748.391
INFO:tensorflow:loss = 52.75165, step = 15400 (0.133 sec)
INFO:tensorflow:global_step/sec: 784.843
INFO:tensorflow:loss = 17.806501, step = 15500 (0.128 sec)
INFO:tensorflow:global_step/sec: 981.287
INFO:tensorflow:loss = 56.872284, step = 15600 (0.102 sec)
INFO:tensorflow:global_step/sec: 1121.71
INFO:tensorflow:loss = 70.70575, step = 15700 (0.089 sec)
INFO:tensorflow:global_step/sec: 1021.96
INFO:tensorflow:loss = 65.85534, step = 15800 (0.098 sec)
INFO:tensorflow:global_step/sec: 963.643
INFO:tensorflow:loss = 37.292103, step = 15900 (0.104 sec)
INFO:tensorflow:global_step/sec: 709.612
INFO:tensorflow:loss = 28.838684, step = 16000 (0.142 sec)
INFO:tensorflow:global_step/sec: 706.519
INF

INFO:tensorflow:global_step/sec: 704.98
INFO:tensorflow:loss = 5.0432262, step = 23100 (0.139 sec)
INFO:tensorflow:global_step/sec: 645.965
INFO:tensorflow:loss = 22.710447, step = 23200 (0.155 sec)
INFO:tensorflow:global_step/sec: 755.585
INFO:tensorflow:loss = 9.818907, step = 23300 (0.132 sec)
INFO:tensorflow:global_step/sec: 733.626
INFO:tensorflow:loss = 23.147442, step = 23400 (0.136 sec)
INFO:tensorflow:global_step/sec: 807.507
INFO:tensorflow:loss = 15.296324, step = 23500 (0.123 sec)
INFO:tensorflow:global_step/sec: 743.71
INFO:tensorflow:loss = 34.09307, step = 23600 (0.135 sec)
INFO:tensorflow:global_step/sec: 823.561
INFO:tensorflow:loss = 9.077769, step = 23700 (0.121 sec)
INFO:tensorflow:global_step/sec: 800.678
INFO:tensorflow:loss = 3.8350062, step = 23800 (0.126 sec)
INFO:tensorflow:global_step/sec: 825.954
INFO:tensorflow:loss = 8.160716, step = 23900 (0.120 sec)
INFO:tensorflow:global_step/sec: 818.224
INFO:tensorflow:loss = 11.064217, step = 24000 (0.122 sec)
INFO:t

INFO:tensorflow:loss = 4.8944225, step = 31300 (0.148 sec)
INFO:tensorflow:global_step/sec: 723.764
INFO:tensorflow:loss = 4.609706, step = 31400 (0.138 sec)
INFO:tensorflow:global_step/sec: 726.327
INFO:tensorflow:loss = 8.130619, step = 31500 (0.138 sec)
INFO:tensorflow:global_step/sec: 681.997
INFO:tensorflow:loss = 11.136642, step = 31600 (0.146 sec)
INFO:tensorflow:global_step/sec: 816.066
INFO:tensorflow:loss = 5.6452656, step = 31700 (0.123 sec)
INFO:tensorflow:global_step/sec: 885.302
INFO:tensorflow:loss = 7.832017, step = 31800 (0.113 sec)
INFO:tensorflow:global_step/sec: 784.036
INFO:tensorflow:loss = 14.7066555, step = 31900 (0.127 sec)
INFO:tensorflow:global_step/sec: 706.175
INFO:tensorflow:loss = 14.569141, step = 32000 (0.142 sec)
INFO:tensorflow:global_step/sec: 825.22
INFO:tensorflow:loss = 10.798581, step = 32100 (0.120 sec)
INFO:tensorflow:global_step/sec: 766.024
INFO:tensorflow:loss = 33.275383, step = 32200 (0.131 sec)
INFO:tensorflow:global_step/sec: 789.304
INF

INFO:tensorflow:global_step/sec: 761.134
INFO:tensorflow:loss = 18.885649, step = 39300 (0.132 sec)
INFO:tensorflow:global_step/sec: 667.321
INFO:tensorflow:loss = 15.637043, step = 39400 (0.150 sec)
INFO:tensorflow:global_step/sec: 797.926
INFO:tensorflow:loss = 17.528738, step = 39500 (0.125 sec)
INFO:tensorflow:global_step/sec: 718.794
INFO:tensorflow:loss = 4.772501, step = 39600 (0.139 sec)
INFO:tensorflow:global_step/sec: 753.569
INFO:tensorflow:loss = 16.312023, step = 39700 (0.133 sec)
INFO:tensorflow:global_step/sec: 868.733
INFO:tensorflow:loss = 16.725487, step = 39800 (0.115 sec)
INFO:tensorflow:global_step/sec: 849.099
INFO:tensorflow:loss = 3.9985073, step = 39900 (0.118 sec)
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 40000...
INFO:tensorflow:Saving checkpoints for 40000 into models/autompg=dnnregressor/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 40000...
INFO:tensorflow:Loss for final step: 8.364199.


<tensorflow_estimator.python.estimator.canned.dnn.DNNRegressorV2 at 0x152a9fcf8>

In [28]:
reloaded_regressor = tf.estimator.DNNRegressor(feature_columns = all_feature_columns,
                                              hidden_units = [32, 10],
                                              warm_start_from = 'models/autompg=dnnregressor/',
                                              model_dir = 'models/autompg=dnnregressor/')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'models/autompg=dnnregressor/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [29]:
eval_results = reloaded_regressor.evaluate(input_fn=lambda:eval_input_fn(df_test_norm,
                                                                        batch_size = 8))
eval_results['average_loss']

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-05-14T15:52:00Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from models/autompg=dnnregressor/model.ckpt-40000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.31577s
INFO:tensorflow:Finished evaluation at 2021-05-14-15:52:01
INFO:tensorflow:Saving dict for global step 40000: average_loss = 12.616372, global_step = 40000, label/mean = 22.38734, loss = 12.565818, prediction/mean = 22.236649
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 40000: models/autompg=dnnregressor/model.ckpt-40000


12.616372

In [30]:
pred_res = regressor.predict(input_fn=lambda: eval_input_fn(df_test_norm, batch_size=8))

In [33]:
next(iter(pred_res))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from models/autompg=dnnregressor/model.ckpt-40000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


{'predictions': array([14.274119], dtype=float32)}