# California Housing Price Prediction using Tensorflow

### Importing the libraries required for the prediction

In [5]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [6]:
housing = pd.read_csv("housing2.csv")

In [7]:
housing.head()

Unnamed: 0,housingMedianAge,totalRooms,totalBedrooms,population,households,medianIncome,medianHouseValue
0,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0
1,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0
2,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0
3,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0
4,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0


In [8]:
housing.head()

Unnamed: 0,housingMedianAge,totalRooms,totalBedrooms,population,households,medianIncome,medianHouseValue
0,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0
1,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0
2,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0
3,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0
4,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0


In [9]:
housing.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
housingMedianAge,20640.0,28.639486,12.585558,1.0,18.0,29.0,37.0,52.0
totalRooms,20640.0,2635.763081,2181.615252,2.0,1447.75,2127.0,3148.0,39320.0
totalBedrooms,20640.0,537.898014,421.247906,1.0,295.0,435.0,647.0,6445.0
population,20640.0,1425.476744,1132.462122,3.0,787.0,1166.0,1725.0,35682.0
households,20640.0,499.53968,382.329753,1.0,280.0,409.0,605.0,6082.0
medianIncome,20640.0,3.870671,1.899822,0.4999,2.5634,3.5348,4.74325,15.0001
medianHouseValue,20640.0,206855.816909,115395.615874,14999.0,119600.0,179700.0,264725.0,500001.0


In [11]:
y_val = housing['medianHouseValue']

In [12]:
x_data = housing.drop("medianHouseValue", axis=1)

### Splitting the data into the training and testing

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(x_data, y_val, test_size=0.3, random_state=42)

### Scaling the feature data

In [15]:
from sklearn.preprocessing import MinMaxScaler

In [16]:
scaler = MinMaxScaler()

In [17]:
scaler.fit(X_train)

MinMaxScaler(copy=True, feature_range=(0, 1))

In [18]:
X_train = pd.DataFrame(data= scaler.transform(X_train), columns= X_train.columns, index=X_train.index)

In [19]:
X_test = pd.DataFrame(data= scaler.transform(X_test), columns= X_test.columns, index=X_test.index)

### Creating the feature columns for the regression model

In [20]:
housing.columns

Index(['housingMedianAge', 'totalRooms', 'totalBedrooms', 'population',
       'households', 'medianIncome', 'medianHouseValue'],
      dtype='object')

In [21]:
age = tf.feature_column.numeric_column('housingMedianAge')
rooms = tf.feature_column.numeric_column('totalRooms')
bedrooms = tf.feature_column.numeric_column('totalBedrooms')
pop = tf.feature_column.numeric_column('population')
households = tf.feature_column.numeric_column('households')
income = tf.feature_column.numeric_column('medianIncome')

In [22]:
feat_cols = [age, rooms, bedrooms, pop, households, income]

In [23]:
input_func = tf.estimator.inputs.pandas_input_fn(x= X_train, y= y_train, 
                                                   batch_size=10, num_epochs= 1000,
                                                  shuffle = True)

### Deep Neural Network (DNN) regression model creation in which 3 hidden layers of 6 neurons each

In [24]:
model = tf.estimator.DNNRegressor(hidden_units= [6,6,6], feature_columns= feat_cols)

W0925 22:23:40.479834  8080 estimator.py:1811] Using temporary folder as model directory: C:\Users\ARPIT\AppData\Local\Temp\tmp_b8g0lo9


### Training the model by providing the feature data

In [25]:
model.train(input_fn=input_func, steps=10000)

W0925 22:23:41.491019  8080 deprecation.py:323] From C:\Users\ARPIT\Anaconda3\lib\site-packages\tensorflow\python\training\training_util.py:236: Variable.initialized_value (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
W0925 22:23:41.521934  8080 deprecation.py:323] From C:\Users\ARPIT\Anaconda3\lib\site-packages\tensorflow_estimator\python\estimator\inputs\queues\feeding_queue_runner.py:62: QueueRunner.__init__ (from tensorflow.python.training.queue_runner_impl) is deprecated and will be removed in a future version.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
W0925 22:23:41.524925  8080 deprecation.py:323] From C:\Users\ARPIT\Anaconda3\lib\site-packages\tensorflow_estimator\python\estimator\inputs\queues\feeding_functions.py:500: add_queue_runner (from

<tensorflow_estimator.python.estimator.canned.dnn.DNNRegressor at 0x17647631e08>

### Predicting the result using the testing data and converting it into the List data type

In [26]:
predict_input_func = tf.estimator.inputs.pandas_input_fn(x= X_test, 
                                                         batch_size=10,
                                                        num_epochs=1, shuffle = False)

In [27]:
pred_gen = model.predict(predict_input_func)

In [28]:
predictions = list(pred_gen)

W0925 22:24:05.397456  8080 deprecation.py:323] From C:\Users\ARPIT\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py:1276: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.


In [29]:
predictions

[{'predictions': array([172558.2], dtype=float32)},
 {'predictions': array([203677.28], dtype=float32)},
 {'predictions': array([267357.12], dtype=float32)},
 {'predictions': array([215820.94], dtype=float32)},
 {'predictions': array([216856.06], dtype=float32)},
 {'predictions': array([209145.62], dtype=float32)},
 {'predictions': array([227509.12], dtype=float32)},
 {'predictions': array([216073.23], dtype=float32)},
 {'predictions': array([180701.28], dtype=float32)},
 {'predictions': array([286041.25], dtype=float32)},
 {'predictions': array([179463.31], dtype=float32)},
 {'predictions': array([215881.03], dtype=float32)},
 {'predictions': array([213169.16], dtype=float32)},
 {'predictions': array([201724.75], dtype=float32)},
 {'predictions': array([232858.7], dtype=float32)},
 {'predictions': array([239025.53], dtype=float32)},
 {'predictions': array([217793.52], dtype=float32)},
 {'predictions': array([191054.62], dtype=float32)},
 {'predictions': array([219574.16], dtype=float3

### Calculating the Root Mean Square Error (RMSE)

In [30]:
final_pred = []

for pred in predictions:
    final_pred.append(pred['predictions'])

In [31]:
from sklearn.metrics import mean_squared_error

In [32]:
mean_squared_error(y_test, final_pred)**0.5

98836.19907279986