In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [2]:
train_df = pd.read_csv('./train.csv')

In [3]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 15 columns):
ID         333 non-null int64
crim       333 non-null float64
zn         333 non-null float64
indus      333 non-null float64
chas       333 non-null int64
nox        333 non-null float64
rm         333 non-null float64
age        333 non-null float64
dis        333 non-null float64
rad        333 non-null int64
tax        333 non-null int64
ptratio    333 non-null float64
black      333 non-null float64
lstat      333 non-null float64
medv       333 non-null float64
dtypes: float64(11), int64(4)
memory usage: 39.1 KB


# Define Feature Columns

In [4]:
crim = tf.feature_column.numeric_column('crim', dtype=tf.float64, shape=())
zn = tf.feature_column.numeric_column('zn', dtype=tf.float64, shape=())
indus = tf.feature_column.numeric_column('indus', dtype=tf.float64, shape=())
chas = tf.feature_column.numeric_column('chas', dtype=tf.int64, shape=())
nox = tf.feature_column.numeric_column('nox', dtype=tf.float64, shape=())
rm = tf.feature_column.numeric_column('rm', dtype=tf.float64, shape=())
age = tf.feature_column.numeric_column('age', dtype=tf.float64, shape=())
dis = tf.feature_column.numeric_column('dis', dtype=tf.float64, shape=())
rad = tf.feature_column.numeric_column('rad', dtype=tf.int64, shape=())
tax = tf.feature_column.numeric_column('tax', dtype=tf.int64, shape=())
ptratio = tf.feature_column.numeric_column('ptratio', dtype=tf.float64, shape=())
black = tf.feature_column.numeric_column('black', dtype=tf.float64, shape=())
lstat = tf.feature_column.numeric_column('lstat', dtype=tf.float64, shape=())

In [5]:
feature_cols = [crim, zn, indus, chas, nox, rm, age, dis, rad, tax, ptratio, black, lstat]

# Prepare Data for Input Functions

In [6]:
feature_names = ['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax', 'ptratio', 'black', 'lstat']
label_name = 'medv'

features_ndarray = train_df[feature_names]
label_ndarray = train_df[label_name]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(features_ndarray, label_ndarray, random_state=0, test_size=0.3)

# Traning Data

In [8]:
def train_input():
    _dataset = tf.data.Dataset.from_tensor_slices(({'crim': X_train['crim'], 
                                                   'zn': X_train['zn'], 
                                                   'indus': X_train['indus'],
                                                   'chas': X_train['chas'],
                                                   'nox': X_train['nox'],
                                                   'rm': X_train['rm'],
                                                   'age': X_train['age'],
                                                   'dis': X_train['dis'],
                                                   'rad': X_train['rad'],
                                                   'tax': X_train['tax'],
                                                   'ptratio': X_train['ptratio'],
                                                   'black': X_train['black'],
                                                   'lstat': X_train['lstat']
                                                  }, y_train))
    dataset = _dataset.batch(32)
    iterator = dataset.make_one_shot_iterator()
    features, labels = iterator.get_next()
    return features, labels

# Validation Data

In [9]:
def val_input():
    _dataset = tf.data.Dataset.from_tensor_slices(({'crim': X_test['crim'], 
                                                   'zn': X_test['zn'], 
                                                   'indus': X_test['indus'],
                                                   'chas': X_test['chas'],
                                                   'nox': X_test['nox'],
                                                   'rm': X_test['rm'],
                                                   'age': X_test['age'],
                                                   'dis': X_test['dis'],
                                                   'rad': X_test['rad'],
                                                   'tax': X_test['tax'],
                                                   'ptratio': X_test['ptratio'],
                                                   'black': X_test['black'],
                                                   'lstat': X_test['lstat']
                                                  }, y_test))
    dataset = _dataset.batch(32)
    iterator = dataset.make_one_shot_iterator()
    features, labels = iterator.get_next()
    return features, labels

# Instantiate LinearRegressor

In [10]:
estimator = tf.estimator.LinearRegressor(feature_columns=feature_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/2d/49w9fshj6ljb1kjldptz0w7h0000gn/T/tmpfp014wgm', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1a1b562ba8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [11]:
estimator.train(input_fn=train_input, steps=None)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/2d/49w9fshj6ljb1kjldptz0w7h0000gn/T/tmpfp014wgm/model.ckpt.
INFO:tensorflow:loss = 20052.04, step = 1
INFO:tensorflow:Saving checkpoints for 8 into /var/folders/2d/49w9fshj6ljb1kjldptz0w7h0000gn/T/tmpfp014wgm/model.ckpt.
INFO:tensorflow:Loss for final step: 561.62476.


<tensorflow.python.estimator.canned.linear.LinearRegressor at 0x1a1b5629b0>

In [12]:
train_e = estimator.evaluate(input_fn=train_input)
test_e = estimator.evaluate(input_fn=val_input)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-09-15-15:01:45
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/2d/49w9fshj6ljb1kjldptz0w7h0000gn/T/tmpfp014wgm/model.ckpt-8
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-09-15-15:01:46
INFO:tensorflow:Saving dict for global step 8: average_loss = 101.03263, global_step = 8, loss = 2942.5754
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-09-15-15:01:46
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/2d/49w9fshj6ljb1kjldptz0w7h0000gn/T/tmpfp014wgm/model.ckpt-8
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-09-15-15:01:46
INFO:tensorflow:Saving dict for global 

In [13]:
preds = estimator.predict(input_fn=val_input)

In [14]:
predictions = np.array([item['predictions'][0] for item in preds])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/2d/49w9fshj6ljb1kjldptz0w7h0000gn/T/tmpfp014wgm/model.ckpt-8
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [15]:
predictions

array([19.738384 , 19.701942 , 13.40074  , 19.329948 , 18.223768 ,
       17.593994 , 18.259476 , 13.8396015, 18.684002 , 19.07943  ,
       22.340336 , 20.918901 , 20.534126 , 18.870707 , 19.925318 ,
       22.046883 , 19.558393 , 18.667423 , 19.788708 , 21.093403 ,
       20.64836  , 20.164509 , 19.186544 , 22.82376  ,  7.2038608,
       17.727411 , 20.3971   , 22.121077 , 17.777039 , 18.86451  ,
       20.96296  , 19.79134  , 18.998981 , 19.568462 , 19.925522 ,
       24.814579 , 19.816513 , 19.497887 , 20.027397 , 18.72643  ,
       23.294796 , 22.643608 , 20.551254 , 19.256245 , 20.385565 ,
       19.558657 , 20.429382 , 21.438097 , 18.303116 , 16.40173  ,
       19.818329 , 18.213896 , 17.85627  , 19.45393  , 18.400267 ,
       17.83765  , 20.958357 , 15.161761 , 22.99537  , 23.50764  ,
       19.146425 , 21.547232 , 17.435684 , 23.489075 , 16.840118 ,
       18.455595 , 22.789526 , 23.59702  , 22.52855  , 11.23262  ,
       23.236038 , 18.722271 , 10.429    , 20.253323 , 18.7324

In [32]:
print(train_e)

{'average_loss': 101.03263, 'loss': 2942.5754, 'global_step': 8}


In [33]:
print(test_e)

{'average_loss': 80.139626, 'loss': 2003.4907, 'global_step': 8}
