In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [2]:
train_df = pd.read_csv('./train.csv')

In [3]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 15 columns):
ID         333 non-null int64
crim       333 non-null float64
zn         333 non-null float64
indus      333 non-null float64
chas       333 non-null int64
nox        333 non-null float64
rm         333 non-null float64
age        333 non-null float64
dis        333 non-null float64
rad        333 non-null int64
tax        333 non-null int64
ptratio    333 non-null float64
black      333 non-null float64
lstat      333 non-null float64
medv       333 non-null float64
dtypes: float64(11), int64(4)
memory usage: 39.1 KB


# Define Feature Columns

In [4]:
crim = tf.feature_column.numeric_column('crim', dtype=tf.float64, shape=())
zn = tf.feature_column.numeric_column('zn', dtype=tf.float64, shape=())
indus = tf.feature_column.numeric_column('indus', dtype=tf.float64, shape=())
chas = tf.feature_column.numeric_column('chas', dtype=tf.int64, shape=())
nox = tf.feature_column.numeric_column('nox', dtype=tf.float64, shape=())
rm = tf.feature_column.numeric_column('rm', dtype=tf.float64, shape=())
age = tf.feature_column.numeric_column('age', dtype=tf.float64, shape=())
dis = tf.feature_column.numeric_column('dis', dtype=tf.float64, shape=())
rad = tf.feature_column.numeric_column('rad', dtype=tf.int64, shape=())
tax = tf.feature_column.numeric_column('tax', dtype=tf.int64, shape=())
ptratio = tf.feature_column.numeric_column('ptratio', dtype=tf.float64, shape=())
black = tf.feature_column.numeric_column('black', dtype=tf.float64, shape=())
lstat = tf.feature_column.numeric_column('lstat', dtype=tf.float64, shape=())

In [5]:
feature_cols = [crim, zn, indus, chas, nox, rm, age, dis, rad, tax, ptratio, black, lstat]

# Prepare Data for Input Functions

In [6]:
feature_names = ['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax', 'ptratio', 'black', 'lstat']
label_name = 'medv'

features_ndarray = train_df[feature_names]
label_ndarray = train_df[label_name]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(features_ndarray, label_ndarray, random_state=0, test_size=0.3)

# Traning Data

In [8]:
def train_input():
    _dataset = tf.data.Dataset.from_tensor_slices(({'crim': X_train['crim'], 
                                                   'zn': X_train['zn'], 
                                                   'indus': X_train['indus'],
                                                   'chas': X_train['chas'],
                                                   'nox': X_train['nox'],
                                                   'rm': X_train['rm'],
                                                   'age': X_train['age'],
                                                   'dis': X_train['dis'],
                                                   'rad': X_train['rad'],
                                                   'tax': X_train['tax'],
                                                   'ptratio': X_train['ptratio'],
                                                   'black': X_train['black'],
                                                   'lstat': X_train['lstat']
                                                  }, y_train))
    dataset = _dataset.batch(32)
    iterator = dataset.make_one_shot_iterator()
    features, labels = iterator.get_next()
    return features, labels

# Validation Data

In [34]:
def val_input():
    _dataset = tf.data.Dataset.from_tensor_slices(({'crim': X_test['crim'], 
                                                   'zn': X_test['zn'], 
                                                   'indus': X_test['indus'],
                                                   'chas': X_test['chas'],
                                                   'nox': X_test['nox'],
                                                   'rm': X_test['rm'],
                                                   'age': X_test['age'],
                                                   'dis': X_test['dis'],
                                                   'rad': X_test['rad'],
                                                   'tax': X_test['tax'],
                                                   'ptratio': X_test['ptratio'],
                                                   'black': X_test['black'],
                                                   'lstat': X_test['lstat']
                                                  }, y_test))
    dataset = _dataset.batch(32)
    iterator = dataset.make_one_shot_iterator()
    features, labels = iterator.get_next()
    return features, labels

# Instantiate LinearRegressor

In [10]:
estimator = tf.estimator.LinearRegressor(feature_columns=feature_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/2d/49w9fshj6ljb1kjldptz0w7h0000gn/T/tmpr7d16c29', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x11865bba8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [25]:
estimator.train(input_fn=train_input, steps=None)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/2d/49w9fshj6ljb1kjldptz0w7h0000gn/T/tmpr7d16c29/model.ckpt-16
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 17 into /var/folders/2d/49w9fshj6ljb1kjldptz0w7h0000gn/T/tmpr7d16c29/model.ckpt.
INFO:tensorflow:loss = 2495.4827, step = 17
INFO:tensorflow:Saving checkpoints for 24 into /var/folders/2d/49w9fshj6ljb1kjldptz0w7h0000gn/T/tmpr7d16c29/model.ckpt.
INFO:tensorflow:Loss for final step: 419.80994.


<tensorflow.python.estimator.canned.linear.LinearRegressor at 0x11865b8d0>

In [12]:
train_e = estimator.evaluate(input_fn=train_input)
test_e = estimator.evaluate(input_fn=val_input)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-06-30-16:52:01
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/2d/49w9fshj6ljb1kjldptz0w7h0000gn/T/tmpr7d16c29/model.ckpt-8
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-06-30-16:52:01
INFO:tensorflow:Saving dict for global step 8: average_loss = 101.03263, global_step = 8, loss = 2942.5754
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-06-30-16:52:02
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/2d/49w9fshj6ljb1kjldptz0w7h0000gn/T/tmpr7d16c29/model.ckpt-8
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-06-30-16:52:03
INFO:tensorflow:Saving dict for global 

In [28]:
preds = estimator.predict(input_fn=val_input)

In [29]:
predictions = np.array([item['predictions'][0] for item in preds])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/2d/49w9fshj6ljb1kjldptz0w7h0000gn/T/tmpr7d16c29/model.ckpt-24
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [31]:
predictions

array([ 2.3335239e+01,  2.3495895e+01,  7.0029507e+00,  1.8936348e+01,
        2.0762323e+01,  1.9568039e+01,  2.0482147e+01,  7.9193182e+00,
        2.1119211e+01,  2.0963366e+01,  1.9137793e+01,  2.1727903e+01,
        2.6139978e+01,  2.0977312e+01,  2.1505898e+01,  2.5764175e+01,
        2.0583954e+01,  2.0826008e+01,  2.2977926e+01,  2.1985798e+01,
        2.1712570e+01,  2.0485041e+01,  1.5247025e+01,  1.8009115e+01,
       -6.9752893e+00,  2.0675732e+01,  2.4069044e+01,  2.9649847e+01,
        2.0477301e+01,  2.1192108e+01,  2.0902830e+01,  2.3111647e+01,
        1.9437641e+01,  2.1532497e+01,  2.0787018e+01,  2.2773581e+01,
        2.2955055e+01,  1.8888590e+01,  2.2971472e+01,  2.1215263e+01,
        2.0927177e+01,  2.6998167e+01,  2.4059172e+01,  2.1545889e+01,
        2.2864855e+01,  2.3279930e+01,  2.1948973e+01,  2.7852308e+01,
        2.1165993e+01,  1.5227394e+01,  2.3209238e+01,  2.0501278e+01,
        2.0759714e+01,  1.5424297e+01,  2.1105978e+01,  2.0262516e+01,
      

In [32]:
print(train_e)

{'average_loss': 101.03263, 'loss': 2942.5754, 'global_step': 8}


In [33]:
print(test_e)

{'average_loss': 80.139626, 'loss': 2003.4907, 'global_step': 8}
