In [9]:
# import necessary libraries and packages

import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [10]:
# Import input file and check features and label for sample data

housing_data = pd.read_csv('cal_housing_clean.csv')
print(housing_data.head())
print(housing_data.columns)

   housingMedianAge  totalRooms  totalBedrooms  population  households  \
0              41.0       880.0          129.0       322.0       126.0   
1              21.0      7099.0         1106.0      2401.0      1138.0   
2              52.0      1467.0          190.0       496.0       177.0   
3              52.0      1274.0          235.0       558.0       219.0   
4              52.0      1627.0          280.0       565.0       259.0   

   medianIncome  medianHouseValue  
0        8.3252          452600.0  
1        8.3014          358500.0  
2        7.2574          352100.0  
3        5.6431          341300.0  
4        3.8462          342200.0  
Index(['housingMedianAge', 'totalRooms', 'totalBedrooms', 'population',
       'households', 'medianIncome', 'medianHouseValue'],
      dtype='object')


In [11]:
housing_data.describe()

Unnamed: 0,housingMedianAge,totalRooms,totalBedrooms,population,households,medianIncome,medianHouseValue
count,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0
mean,28.639486,2635.763081,537.898014,1425.476744,499.53968,3.870671,206855.816909
std,12.585558,2181.615252,421.247906,1132.462122,382.329753,1.899822,115395.615874
min,1.0,2.0,1.0,3.0,1.0,0.4999,14999.0
25%,18.0,1447.75,295.0,787.0,280.0,2.5634,119600.0
50%,29.0,2127.0,435.0,1166.0,409.0,3.5348,179700.0
75%,37.0,3148.0,647.0,1725.0,605.0,4.74325,264725.0
max,52.0,39320.0,6445.0,35682.0,6082.0,15.0001,500001.0


In [12]:
# Divide data in Feature set and label set
# Split data in training and testing sets

X = housing_data.iloc[:,:-1]
y = housing_data.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 101, test_size = 0.3)

In [13]:
# Since features are on different scales, used MinMAxScaler for scaling on scale of [0,1]
# Fit scaler object on training data. Transforming training and testing data using fitted Scaler
# Since Scaler returns numpy array, reconverted to Pandas Dataframe

Scaler = MinMaxScaler()
Scaler.fit(X_train)
X_train = pd.DataFrame(Scaler.transform(X_train), columns = X_train.columns, index = X_train.index)
X_test = pd.DataFrame(Scaler.transform(X_test), columns = ['housingMedianAge', 'totalRooms', 'totalBedrooms', 'population',
       'households', 'medianIncome'], index = X_test.index)

In [14]:
# Created feature columns using tf.feature_column.numeric_column 


hmedage = tf.feature_column.numeric_column('housingMedianAge')
totalR = tf.feature_column.numeric_column('totalRooms')
totalB = tf.feature_column.numeric_column('totalBedrooms')
popul = tf.feature_column.numeric_column('population')
hhold = tf.feature_column.numeric_column('households')
medinc = tf.feature_column.numeric_column('medianIncome')

feat_cols = [hmedage, totalR, totalB, popul, hhold, medinc]

In [15]:
# Created the input function for the estimator object.

input_func = tf.estimator.inputs.pandas_input_fn(x = X_train, y =  y_train, num_epochs=1000, batch_size = 2, shuffle = True)


# Created DNNRegressor Estimator Model having 4 hidden layers of 25 neurons each. 

model = tf.estimator.DNNRegressor(hidden_units = [25,25,25, 25,25], feature_columns=feat_cols)

# Trained model on range of steps [10000-45000]

model.train(input_fn = input_func, steps = 45000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_every_n_hours': 10000, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_save_summary_steps': 100, '_log_step_count_steps': 100, '_model_dir': 'C:\\Users\\GuptaMoh\\AppData\\Local\\Temp\\tmp5idlbyi_', '_tf_random_seed': 1}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\GuptaMoh\AppData\Local\Temp\tmp5idlbyi_\model.ckpt.
INFO:tensorflow:loss = 307937400000.0, step = 1
INFO:tensorflow:global_step/sec: 344.221
INFO:tensorflow:loss = 10506132000.0, step = 101 (0.291 sec)
INFO:tensorflow:global_step/sec: 328.395
INFO:tensorflow:loss = 35287730000.0, step = 201 (0.289 sec)
INFO:tensorflow:global_step/sec: 396.494
INFO:tensorflow:loss = 12097672000.0, step = 301 (0.252 sec)
INFO:tensorflow:global_step/sec: 378.202
INFO:tensorflow:loss = 7607733000.0, step = 401 (0.264 sec)
INFO:tensorflow:

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x6b1f828>

In [16]:
# Created a prediction input function and then use the .predict method off your estimator model 
# to create a list or predictions on our test data.

prediction = tf.estimator.inputs.pandas_input_fn(X_test, y_test, num_epochs=1, shuffle=False)

pred = list(model.predict(prediction))


final_preds = []

for val in pred:
    final_preds.append(val['predictions'])

# Used Root mean square error to determine model accuracy
    
from sklearn.metrics import mean_squared_error

mean_squared_error(y_test, final_preds)**0.5

INFO:tensorflow:Restoring parameters from C:\Users\GuptaMoh\AppData\Local\Temp\tmp5idlbyi_\model.ckpt-45000


79188.45369178959