# Classification of House Prices using Tensorflow

In [2]:
# For working with data as tables
import pandas as pd
# For number matrices
import numpy as np
# For plotting
import matplotlib.pyplot as plot
# Tensorflow
import tensorflow as tf

In [3]:
# Load data
dataframe = pd.read_csv("../Datasets/house_pricing.csv")
# Remove unnecessary features
dataframe = dataframe.drop(['index', 'price', 'sq_price'], axis=1)
dataframe = dataframe[0:10]
dataframe

Unnamed: 0,area,bathrooms
0,2104,3
1,1600,3
2,2400,3
3,1416,2
4,3000,4
5,1985,4
6,1534,3
7,1427,3
8,1380,3
9,1494,3


In [4]:
# Label the data
# 1 is a good buy and 0 is a bad buy
dataframe.loc[:, ('y1')] = [1, 1, 1, 0, 0, 1, 0, 1, 1, 1]
# y2 is a negation of y1
dataframe.loc[:, ('y2')] = dataframe['y1'] == 0
# Converting TRUE/FALSE to 1 and 0
dataframe.loc[:, ('y2')] = dataframe['y2'].astype(int)
dataframe

Unnamed: 0,area,bathrooms,y1,y2
0,2104,3,1,0
1,1600,3,1,0
2,2400,3,1,0
3,1416,2,0,1
4,3000,4,0,1
5,1985,4,1,0
6,1534,3,0,1
7,1427,3,1,0
8,1380,3,1,0
9,1494,3,1,0


In [5]:
# Prepare data for Tensorflow (tensors)
# tensors - generic version of vectors and matrices
# vector - list of numbers (1D tensor)
# matrix - list of list of numbers (2D tensor)
# list of list of list of numbers - 3D tensor
# and the list continues
# tensors are how we represent data in tensorflow

# Convert features into input tensor
inputX = dataframe.loc[:, ['area', 'bathrooms']].as_matrix()
# Convert labels into input tensor
inputY = dataframe.loc[:, ['y1', 'y2']].as_matrix()

In [6]:
inputX

array([[  2.10400000e+03,   3.00000000e+00],
       [  1.60000000e+03,   3.00000000e+00],
       [  2.40000000e+03,   3.00000000e+00],
       [  1.41600000e+03,   2.00000000e+00],
       [  3.00000000e+03,   4.00000000e+00],
       [  1.98500000e+03,   4.00000000e+00],
       [  1.53400000e+03,   3.00000000e+00],
       [  1.42700000e+03,   3.00000000e+00],
       [  1.38000000e+03,   3.00000000e+00],
       [  1.49400000e+03,   3.00000000e+00]])

In [7]:
inputY

array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0]])

In [47]:
# Hyperparamets
learning_rate = 0.000001
training_epochs = 2000
# how often we want to display the process of training
display_step = 50
n_samples = inputY.size

<img src="../Assets/softmaxnn.png" />

In [23]:
# Create our computational graph/nueral network
# In tensorflow placeholders are gateways for data into our computation graph
# Placeholder for feature input tensor
# None => any number of example
# 2 => number of features
x = tf.placeholder(tf.float32, [None, 2])

# Define weights
# 2x2 float martix, starting with zeros and we will keep updating this through the training process
# Variables in tf hold and update parameters (in memory buffers holding tensors)
w = tf.Variable(tf.zeros([2,2]))

# define bias
# bias is like 'b' in y = mx + b equation in gradient descent
# we update values of bias through 'back propagation'
b = tf.Variable(tf.zeros([2]))

# Multiply our weights by inputs
# Weights govern the flow of data in our computation graph
# multiply inputs with weights and add biases
y_values = tf.add(tf.matmul(x, w), b)

# Applying softmax function to the above y_values
# Softmax function is our activation function - it normalizes our values
y = tf.nn.softmax(y_values)

# Feed in a matrix of labels
y_ = tf.placeholder(tf.float32, [None, 2])
y

<tf.Tensor 'Softmax_10:0' shape=(?, 2) dtype=float32>

In [27]:
# Perform Training

# Create our cost function - Mean squared error
# reduce_sum: Computes the sum of elements across dimensions of a tensor.
cost = tf.reduce_sum(tf.pow(y_ - y, 2))/(2*n_samples)

# Applying Gradient Descent
# GradientDescentOptimizer: Optimizer that implements the gradient descent algorithm.
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
optimizer

<tf.Operation 'GradientDescent_2' type=NoOp>

In [42]:
# Initialize all variables and start Tensorflow session
init = tf.initialize_all_variables()
# print(init)
session = tf.Session()
session.run(init)

Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [49]:
for i in range(training_epochs):  
    # Take a gradient descent step using our inputs and labels
    session.run(optimizer, feed_dict={x: inputX, y_: inputY})

    # That's all! The rest of the cell just outputs debug messages. 
    # Display logs per epoch step
    if (i) % display_step == 0:
        cc = session.run(cost, feed_dict={x: inputX, y_:inputY})
        print "Training step:", '%04d' % (i), "cost=", "{:.9f}".format(cc) 
        #, \"W=", sess.run(W), "b=", sess.run(b)

print "Optimization Finished!"
training_cost = session.run(cost, feed_dict={x: inputX, y_: inputY})
print "Training cost=", training_cost, "W=", session.run(w), "b=", session.run(b), '\n'

Training step: 0000 cost= 0.109530188
Training step: 0050 cost= 0.109530114
Training step: 0100 cost= 0.109530047
Training step: 0150 cost= 0.109529972
Training step: 0200 cost= 0.109529898
Training step: 0250 cost= 0.109529831
Training step: 0300 cost= 0.109529778
Training step: 0350 cost= 0.109529711
Training step: 0400 cost= 0.109529637
Training step: 0450 cost= 0.109529570
Training step: 0500 cost= 0.109529495
Training step: 0550 cost= 0.109529421
Training step: 0600 cost= 0.109529376
Training step: 0650 cost= 0.109529302
Training step: 0700 cost= 0.109529234
Training step: 0750 cost= 0.109529160
Training step: 0800 cost= 0.109529093
Training step: 0850 cost= 0.109529033
Training step: 0900 cost= 0.109528944
Training step: 0950 cost= 0.109528899
Training step: 1000 cost= 0.109528817
Training step: 1050 cost= 0.109528758
Training step: 1100 cost= 0.109528683
Training step: 1150 cost= 0.109528616
Training step: 1200 cost= 0.109528542
Training step: 1250 cost= 0.109528467
Training ste

In [55]:
# So It's guessing they're all good houses. 
# That makes it get 7/10 correct. Not terribly impressive. 
# A model with a hidden layer should do better, I guess.
# Btw, this is how I calculated the softmax values in the post:
session.run(tf.nn.softmax([1., 2.]))

array([ 0.26894143,  0.7310586 ], dtype=float32)