In [37]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split

In [38]:
my_data = pd.read_csv('cal_housing_clean.csv')

In [44]:
my_data.head()

Unnamed: 0,housingMedianAge,totalRooms,totalBedrooms,population,households,medianIncome,medianHouseValue
0,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0
1,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0
2,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0
3,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0
4,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0


In [182]:
my_data.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
housingMedianAge,20640.0,28.639486,12.585558,1.0,18.0,29.0,37.0,52.0
totalRooms,20640.0,2635.763081,2181.615252,2.0,1447.75,2127.0,3148.0,39320.0
totalBedrooms,20640.0,537.898014,421.247906,1.0,295.0,435.0,647.0,6445.0
population,20640.0,1425.476744,1132.462122,3.0,787.0,1166.0,1725.0,35682.0
households,20640.0,499.53968,382.329753,1.0,280.0,409.0,605.0,6082.0
medianIncome,20640.0,3.870671,1.899822,0.4999,2.5634,3.5348,4.74325,15.0001
medianHouseValue,20640.0,206855.816909,115395.615874,14999.0,119600.0,179700.0,264725.0,500001.0


In [39]:
X_data = my_data.drop('medianHouseValue', axis=1) #only the features

In [40]:
y_data = my_data['medianHouseValue']

Split the data to features and labels

In [88]:
X_train, X_test, y_train, y_test = train_test_split(X_data,y_data, test_size = 0.33, random_state=101)

In [89]:
my_data['housingMedianAge'].describe()

count    20640.000000
mean        28.639486
std         12.585558
min          1.000000
25%         18.000000
50%         29.000000
75%         37.000000
max         52.000000
Name: housingMedianAge, dtype: float64

In [90]:
print(X_test.shape)

(6812, 6)


# Scale the feature data

In [91]:
from sklearn.preprocessing import MinMaxScaler

In [92]:
scaler = MinMaxScaler()

In [93]:
scaler.fit(X_train) #only training data

MinMaxScaler(copy=True, feature_range=(0, 1))

In [94]:
scaled_x_train = pd.DataFrame(data = scaler.transform(X_train),
                       columns = X_train.columns,
                       index = X_train.index) # creating data frame because scaler just return numpy arrays

In [95]:
scaled_x_test = pd.DataFrame(data = scaler.transform(X_test),
                       columns = X_test.columns,
                       index = X_test.index)

Create Feature columns

In [97]:
X_train.columns

Index(['housingMedianAge', 'totalRooms', 'totalBedrooms', 'population',
       'households', 'medianIncome'],
      dtype='object')

In [101]:
age = tf.feature_column.numeric_column('housingMedianAge')
rooms = tf.feature_column.numeric_column('totalRooms')
bedrooms = tf.feature_column.numeric_column('totalBedrooms')
population = tf.feature_column.numeric_column('population')
households = tf.feature_column.numeric_column('households')
income = tf.feature_column.numeric_column('medianIncome')

In [102]:
feat_cols = [age, rooms, bedrooms, population, households, income]

create input function

In [110]:
input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,
                                                 y=y_train, 
                                                 batch_size=10, 
                                                 num_epochs=1000, 
                                                 shuffle=True)

Create Estimator model

In [169]:
model = tf.estimator.DNNRegressor(feature_columns=feat_cols, hidden_units=[6,6,6])# 3 layers 6 unit each

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_summary_steps': 100, '_save_checkpoints_steps': None, '_tf_random_seed': 1, '_session_config': None, '_keep_checkpoint_every_n_hours': 10000, '_save_checkpoints_secs': 600, '_model_dir': '/tmp/tmp9y5deniv', '_log_step_count_steps': 100, '_keep_checkpoint_max': 5}


Train model for 1000 steps

In [170]:
model.train(input_fn=input_func, steps=1000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmp9y5deniv/model.ckpt.
INFO:tensorflow:step = 1, loss = 4.77402e+11
INFO:tensorflow:global_step/sec: 559.508
INFO:tensorflow:step = 101, loss = 2.47713e+11 (0.185 sec)
INFO:tensorflow:global_step/sec: 533.339
INFO:tensorflow:step = 201, loss = 3.71371e+11 (0.189 sec)
INFO:tensorflow:global_step/sec: 504.733
INFO:tensorflow:step = 301, loss = 2.10836e+11 (0.201 sec)
INFO:tensorflow:global_step/sec: 557.784
INFO:tensorflow:step = 401, loss = 3.24807e+11 (0.172 sec)
INFO:tensorflow:global_step/sec: 512.762
INFO:tensorflow:step = 501, loss = 1.23775e+11 (0.195 sec)
INFO:tensorflow:global_step/sec: 518.149
INFO:tensorflow:step = 601, loss = 2.87612e+11 (0.192 sec)
INFO:tensorflow:global_step/sec: 537.433
INFO:tensorflow:step = 701, loss = 1.18713e+11 (0.190 sec)
INFO:tensorflow:global_step/sec: 529.244
INFO:tensorflow:step = 801, loss = 1.35017e+11 (0.186 sec)
INFO:tensorflow:global_step/sec: 503

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x7faa4c7f02e8>

Create a prediction input function and then use the predict method of your estimator model to create a prediction list on your test data

In [171]:
pred_input_func = tf.estimator.inputs.pandas_input_fn(x=X_test,
                                                     batch_size=10,
                                                     num_epochs=1,
                                                     shuffle=False)
# for prediction we don't have any Y values

In [172]:
predictions = model.predict(pred_input_func)

In [173]:
preds = list(predictions)

INFO:tensorflow:Restoring parameters from /tmp/tmp9y5deniv/model.ckpt-1000


In [174]:
preds

[{'predictions': array([ 80984.265625], dtype=float32)},
 {'predictions': array([ 46881.10546875], dtype=float32)},
 {'predictions': array([ 72943.21875], dtype=float32)},
 {'predictions': array([ 79432.0625], dtype=float32)},
 {'predictions': array([ 141055.78125], dtype=float32)},
 {'predictions': array([ 189253.984375], dtype=float32)},
 {'predictions': array([ 109635.40625], dtype=float32)},
 {'predictions': array([ 99766.6875], dtype=float32)},
 {'predictions': array([ 101096.15625], dtype=float32)},
 {'predictions': array([ 4820.18554688], dtype=float32)},
 {'predictions': array([ 126914.828125], dtype=float32)},
 {'predictions': array([ 266212.5625], dtype=float32)},
 {'predictions': array([ 86429.796875], dtype=float32)},
 {'predictions': array([ 53200.1484375], dtype=float32)},
 {'predictions': array([ 106629.21875], dtype=float32)},
 {'predictions': array([ 35320.3984375], dtype=float32)},
 {'predictions': array([ 39428.87109375], dtype=float32)},
 {'predictions': array([ 112

Calculate RMS error

In [177]:
final_preds = []
for pred in preds:
    final_preds.append(pred['predictions'])

In [178]:
from sklearn.metrics import mean_squared_error

In [179]:
mean_squared_error(y_test, final_preds)**0.5

163699.72266837597