# Predict California Housing Prices with TensorFlow

In [2]:
import pandas as pd
import tensorflow as tf
import math
import shutil
from IPython.core import display as ICD

In [5]:
from sklearn.datasets import load_boston
boston = load_boston()
print(boston.data.shape) #get (numer of rows, number of columns or 'features')
#print(boston.DESCR) #get a description of the dataset
# Next, we load the data into a 'dataframe' object for easier manipulation, and also print the first few rows in order to examine it
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df.head() #notice that the target variable (MEDV) is not included

(506, 13)


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [7]:
print('Original Dataset:')
ICD.display(df.head(5))
a = pd.DataFrame(df.isnull().sum())
a['# of null values'] = a[0]
b = a[['# of null values']]
print('Before Dropping Null Values:')
print('# of Rows, Columns: ',df.shape)
ICD.display(b)
df = df.dropna(axis=0)
a = pd.DataFrame(df.isnull().sum())
a['# of null values'] = a[0]
b = a[['# of null values']]
print('After Dropping Null Values:')
print('# of Rows, Columns: ',df.shape)
ICD.display(b)

Original Dataset:


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


Before Dropping Null Values:
# of Rows, Columns:  (506, 13)


Unnamed: 0,# of null values
CRIM,0
ZN,0
INDUS,0
CHAS,0
NOX,0
RM,0
AGE,0
DIS,0
RAD,0
TAX,0


After Dropping Null Values:
# of Rows, Columns:  (506, 13)


Unnamed: 0,# of null values
CRIM,0
ZN,0
INDUS,0
CHAS,0
NOX,0
RM,0
AGE,0
DIS,0
RAD,0
TAX,0


In [14]:
#For some reason, the loaded data does not include the target variable (MEDV), we add it here
df['MEDV'] = pd.Series(data=boston.target, index=df.index)

train_dataset = df.sample(frac=0.7,random_state=0)
test_dataset = df.drop(train_dataset.index)

train_stats = train_dataset.describe()
train_stats.pop("MEDV")
train_stats = train_stats.transpose()
train_stats

train_labels = train_dataset.pop('MEDV')
test_labels = test_dataset.pop('MEDV')

def norm(x):
  return (x - train_stats['mean']) / train_stats['std']

normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

In [15]:
def print_rmse(model, name, input_fn):
  metrics = model.evaluate(input_fn=input_fn, steps=1)
  print ('RMSE on {} dataset = {} USD'.format(name, np.sqrt(metrics['average_loss'])*SCALE))

In [19]:
featcols = {
  colname : tf.feature_column.numeric_column(colname) \
    for colname in 'CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,'.split(',')
}

In [26]:
import shutil

outdir = './housing_trained'
shutil.rmtree(outdir, ignore_errors = True) # start fresh each time
myopt = tf.keras.optimizers.Adam(learning_rate = 0.01)
model = tf.estimator.LinearRegressor(model_dir = outdir, feature_columns = featcols.values(), optimizer = myopt)
#NSTEPS = (100 * len(traindf)) / BATCH_SIZE
NSTEPS = 3000
model.train(input_fn = normed_train_data, steps = NSTEPS)
print_rmse(model, 'eval', normed_test_data)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './housing_trained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


TypeError: unsupported callable