In [30]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
%matplotlib inline

In [31]:
housing_data = pd.read_csv('housing.csv')

In [32]:
housing_data.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [36]:
housing_data.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
longitude,20640.0,-119.569704,2.003532,-124.35,-121.8,-118.49,-118.01,-114.31
latitude,20640.0,35.631861,2.135952,32.54,33.93,34.26,37.71,41.95
housing_median_age,20640.0,28.639486,12.585558,1.0,18.0,29.0,37.0,52.0
total_rooms,20640.0,2635.763081,2181.615252,2.0,1447.75,2127.0,3148.0,39320.0
total_bedrooms,20433.0,537.870553,421.38507,1.0,296.0,435.0,647.0,6445.0
population,20640.0,1425.476744,1132.462122,3.0,787.0,1166.0,1725.0,35682.0
households,20640.0,499.53968,382.329753,1.0,280.0,409.0,605.0,6082.0
median_income,20640.0,3.870671,1.899822,0.4999,2.5634,3.5348,4.74325,15.0001
median_house_value,20640.0,206855.816909,115395.615874,14999.0,119600.0,179700.0,264725.0,500001.0


In [4]:
housing_data.isnull().sum()

longitude               0
latitude                0
housing_median_age      0
total_rooms             0
total_bedrooms        207
population              0
households              0
median_income           0
median_house_value      0
ocean_proximity         0
dtype: int64

In [5]:
y_val = housing_data['median_house_value']

In [6]:
x_val = housing_data.drop(['median_house_value','longitude','latitude','ocean_proximity'], axis=1)

In [7]:
x_val.head()

Unnamed: 0,housing_median_age,total_rooms,total_bedrooms,population,households,median_income
0,41.0,880.0,129.0,322.0,126.0,8.3252
1,21.0,7099.0,1106.0,2401.0,1138.0,8.3014
2,52.0,1467.0,190.0,496.0,177.0,7.2574
3,52.0,1274.0,235.0,558.0,219.0,5.6431
4,52.0,1627.0,280.0,565.0,259.0,3.8462


In [8]:
x_val = x_val.fillna(x_val.mean())

In [9]:
X_train, X_test, y_train, y_test = train_test_split(x_val, y_val, test_size=0.3, random_state=101)

In [10]:
scaler = MinMaxScaler()

In [11]:
scaler.fit(X_train)

MinMaxScaler(copy=True, feature_range=(0, 1))

In [12]:
X_train.isnull().sum()

housing_median_age    0
total_rooms           0
total_bedrooms        0
population            0
households            0
median_income         0
dtype: int64

In [13]:
scaler.transform(X_train)

array([[0.35294118, 0.06968818, 0.11716325, 0.04876939, 0.11544154,
        0.14250838],
       [0.60784314, 0.01124167, 0.01567349, 0.00836747, 0.01414241,
        0.04502697],
       [0.66666667, 0.02523017, 0.03134699, 0.02097119, 0.03025818,
        0.21286603],
       ...,
       [0.09803922, 0.08962816, 0.08659218, 0.0481392 , 0.08057885,
        0.38940153],
       [0.68627451, 0.02110992, 0.03258845, 0.0189756 , 0.03453379,
        0.19181804],
       [0.43137255, 0.08538074, 0.11235258, 0.0650842 , 0.10689031,
        0.14979104]])

In [14]:
X_train = pd.DataFrame(data=scaler.transform(X_train), columns=X_train.columns, index=X_train.index)

In [15]:
X_test = pd.DataFrame(data=scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

In [16]:
housing_data.columns

Index(['longitude', 'latitude', 'housing_median_age', 'total_rooms',
       'total_bedrooms', 'population', 'households', 'median_income',
       'median_house_value', 'ocean_proximity'],
      dtype='object')

In [17]:
age = tf.feature_column.numeric_column('housing_median_age')
rooms = tf.feature_column.numeric_column('total_rooms')
bedrooms = tf.feature_column.numeric_column('total_bedrooms')
population = tf.feature_column.numeric_column('population')
households = tf.feature_column.numeric_column('households')
income = tf.feature_column.numeric_column('median_income')

In [18]:
feat_cols = [age, rooms, bedrooms, population, households, income]

In [19]:
input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,
                                                 y=y_train,
                                                batch_size=10,
                                                num_epochs=1000,
                                                shuffle=True)

In [20]:
model = tf.estimator.DNNRegressor(hidden_units=[6,6,6], feature_columns=feat_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\ASUSPC~1\\AppData\\Local\\Temp\\tmpd0szvnld', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000019D13C49240>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [21]:
model.train(input_fn=input_func, steps=20000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\ASUSPC~1\AppData\Local\Temp\tmpd0szvnld\model.ckpt.
INFO:tensorflow:loss = 264027140000.0, step = 1
INFO:tensorflow:global_step/sec: 208.89
INFO:tensorflow:loss = 904005300000.0, step = 101 (0.481 sec)
INFO:tensorflow:global_step/sec: 283.993
INFO:tensorflow:loss = 548934250000.0, step = 201 (0.352 sec)
INFO:tensorflow:global_step/sec: 282.472
INFO:tensorflow:loss = 263664160000.0, step = 301 (0.359 sec)
INFO:tensorflow:global_step/sec: 337.601
INFO:tensorflow:loss = 487012430000.0, step = 401 (0.291 sec)
INFO:tensorflow:global_step/sec: 360.632
INFO:tensorflow:loss = 185789430000.0, step = 501 (0.281 sec)
INFO:tensorflow:global_step/sec: 379.109
INFO:tensorflow:loss = 365587140000.0, step = 601 (0.

INFO:tensorflow:global_step/sec: 682.09
INFO:tensorflow:loss = 216316540000.0, step = 7701 (0.146 sec)
INFO:tensorflow:global_step/sec: 721.351
INFO:tensorflow:loss = 98374820000.0, step = 7801 (0.142 sec)
INFO:tensorflow:global_step/sec: 731.878
INFO:tensorflow:loss = 134092956000.0, step = 7901 (0.137 sec)
INFO:tensorflow:global_step/sec: 716.199
INFO:tensorflow:loss = 101125290000.0, step = 8001 (0.139 sec)
INFO:tensorflow:global_step/sec: 731.879
INFO:tensorflow:loss = 103415450000.0, step = 8101 (0.138 sec)
INFO:tensorflow:global_step/sec: 634.604
INFO:tensorflow:loss = 73152225000.0, step = 8201 (0.156 sec)
INFO:tensorflow:global_step/sec: 696.303
INFO:tensorflow:loss = 103452050000.0, step = 8301 (0.145 sec)
INFO:tensorflow:global_step/sec: 691.501
INFO:tensorflow:loss = 107932760000.0, step = 8401 (0.146 sec)
INFO:tensorflow:global_step/sec: 668.449
INFO:tensorflow:loss = 92640330000.0, step = 8501 (0.150 sec)
INFO:tensorflow:global_step/sec: 691.501
INFO:tensorflow:loss = 1267

INFO:tensorflow:global_step/sec: 686.763
INFO:tensorflow:loss = 148870710000.0, step = 15601 (0.146 sec)
INFO:tensorflow:global_step/sec: 706.108
INFO:tensorflow:loss = 26804427000.0, step = 15701 (0.144 sec)
INFO:tensorflow:global_step/sec: 726.577
INFO:tensorflow:loss = 92948300000.0, step = 15801 (0.138 sec)
INFO:tensorflow:global_step/sec: 677.482
INFO:tensorflow:loss = 68974160000.0, step = 15901 (0.146 sec)
INFO:tensorflow:global_step/sec: 711.118
INFO:tensorflow:loss = 146341450000.0, step = 16001 (0.143 sec)
INFO:tensorflow:global_step/sec: 731.879
INFO:tensorflow:loss = 46368710000.0, step = 16101 (0.136 sec)
INFO:tensorflow:global_step/sec: 753.893
INFO:tensorflow:loss = 143419160000.0, step = 16201 (0.133 sec)
INFO:tensorflow:global_step/sec: 696.298
INFO:tensorflow:loss = 135435000000.0, step = 16301 (0.142 sec)
INFO:tensorflow:global_step/sec: 721.35
INFO:tensorflow:loss = 59739947000.0, step = 16401 (0.140 sec)
INFO:tensorflow:global_step/sec: 701.11
INFO:tensorflow:loss 

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x19d12f0bd30>

In [22]:
predict_input_func = tf.estimator.inputs.pandas_input_fn(x= X_test, 
                                                         batch_size=10, 
                                                         num_epochs=1,
                                                         shuffle=False)

In [23]:
pred_gen = model.predict(predict_input_func)

In [24]:
predictions = list(pred_gen)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\ASUSPC~1\AppData\Local\Temp\tmpd0szvnld\model.ckpt-20000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [25]:
predictions

[{'predictions': array([236890.72], dtype=float32)},
 {'predictions': array([307226.84], dtype=float32)},
 {'predictions': array([215906.81], dtype=float32)},
 {'predictions': array([186672.84], dtype=float32)},
 {'predictions': array([275857.25], dtype=float32)},
 {'predictions': array([199930.34], dtype=float32)},
 {'predictions': array([227354.02], dtype=float32)},
 {'predictions': array([207226.97], dtype=float32)},
 {'predictions': array([218448.53], dtype=float32)},
 {'predictions': array([192458.44], dtype=float32)},
 {'predictions': array([206195.19], dtype=float32)},
 {'predictions': array([224442.9], dtype=float32)},
 {'predictions': array([193742.22], dtype=float32)},
 {'predictions': array([179328.7], dtype=float32)},
 {'predictions': array([261481.03], dtype=float32)},
 {'predictions': array([178740.47], dtype=float32)},
 {'predictions': array([202177.84], dtype=float32)},
 {'predictions': array([188593.69], dtype=float32)},
 {'predictions': array([182105.36], dtype=float3

In [26]:
final_pred = []
for pred in predictions:
    final_pred.append(pred['predictions'])

In [29]:
mean_squared_error(y_test, final_pred)**0.5

99790.234721068