In [None]:
# This notebook is a simple guide to use neural networks to learn to determine if a house is a good or a bad buy 
# by classfication using tensorflow.
# Credits: https://github.com/llSourcell/How_to_use_Tensorflow_for_classification-LIVE

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
# Step 1 - Load data

dataframe = pd.read_csv('../data/etc/house_prices.csv') # Dataframe object

In [3]:
dataframe.head() # Sample dataframe

Unnamed: 0,index,area,bathrooms,price,sq_price
0,0,2104.0,3.0,399900.0,190.06654
1,1,1600.0,3.0,329900.0,206.1875
2,2,2400.0,3.0,369000.0,153.75
3,3,1416.0,2.0,232000.0,163.841808
4,4,3000.0,4.0,539900.0,179.966667


In [4]:
dataframe = dataframe.drop(['index','price','sq_price'], axis=1) # Drop features that I do not want to use
dataframe = dataframe[0:10] # Only use the first 10 rows
dataframe

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0
5,1985.0,4.0
6,1534.0,3.0
7,1427.0,3.0
8,1380.0,3.0
9,1494.0,3.0


In [5]:
# Step 2 - Add labels

# 1 = good buy, 0 = bad buy
dataframe.loc[:, 'y1'] = [1, 1, 1, 0, 0, 1, 0, 1, 1, 1] 
# y2 = oposite of y1
dataframe.loc[:, 'y2'] = dataframe['y1'] == 0 # If y1 == 0: y2 == True; else: y2 == False
dataframe.loc[:, 'y2'] = dataframe['y2'].astype(int)# Turn True/False values to 1s and 0s
dataframe

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,1,0
3,1416.0,2.0,0,1
4,3000.0,4.0,0,1
5,1985.0,4.0,1,0
6,1534.0,3.0,0,1
7,1427.0,3.0,1,0
8,1380.0,3.0,1,0
9,1494.0,3.0,1,0


In [6]:
# Step 3 - Prepare data for tensorflow

# Tensors are a generic version of vectors and matrices
# Vector = list of numbers (1D Tensor)
# Matrix = list of list of numbers (2D Tensor)
# List of list of list of numbers (3D Tensor)
# ...

# Convert features to input tensor
inputX = dataframe.loc[:, ['area', 'bathrooms']].as_matrix()
# Convert labels to input tensor
inputY = dataframe.loc[:, ['y1','y2']].as_matrix()

In [7]:
inputX, type(inputX)

(array([[2.104e+03, 3.000e+00],
        [1.600e+03, 3.000e+00],
        [2.400e+03, 3.000e+00],
        [1.416e+03, 2.000e+00],
        [3.000e+03, 4.000e+00],
        [1.985e+03, 4.000e+00],
        [1.534e+03, 3.000e+00],
        [1.427e+03, 3.000e+00],
        [1.380e+03, 3.000e+00],
        [1.494e+03, 3.000e+00]]), numpy.ndarray)

In [8]:
inputY, type(inputY)

(array([[1, 0],
        [1, 0],
        [1, 0],
        [0, 1],
        [0, 1],
        [1, 0],
        [0, 1],
        [1, 0],
        [1, 0],
        [1, 0]]), numpy.ndarray)

In [9]:
# Step 4 - Write out hyperparameters

learning_rate = 0.000001 # Learning rate defines how fast the model converge
training_epochs = 2000
display_step = 50
n_samples = inputY.size

In [10]:
# Step 5 - Create computation graph/neural network model (see the image ../data/house_prices_nn.jpg)

# For feature input tensors, None means any numbers of examples (blank card)
x = tf.placeholder(tf.float32, [None, 2]) # placeholders are gateways for data into computaton graphs

# Create weights
# Variables in tf hold and update parameters
W = tf.Variable(tf.zeros([2, 2])) # 2 x 2 float matrix that will be updated through the training process

# Add biases (example: in Y = mX + b, b is the bias)
b = tf.Variable(tf.zeros([2]))

# Multiply weights by inputs = first calculation
# Weights states how the data will flow in the newral network
# Multiply input by weights and add biases
y_values = tf.add(tf.matmul(x, W), b) # Y = mX + b

# Apply softmax = activation function that normalizes the value
# Softmax takes the values recieved and convert into probabilities
y = tf.nn.softmax(y_values)

# Feed in a matrix of labels
y_ = tf.placeholder(tf.float32, [None, 2])

In [11]:
# Step 6 perform training

# Create cost function = Mean Squared Error
# The function reduce_sum computes the sum of elements across dimensions of a tensor
cost = tf.reduce_sum(tf.pow(y_ - y, 2))/(2 * n_samples)

# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [12]:
# Initialize variables and tensorflow session
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [13]:
# Training loop

for i in range(training_epochs):
    sess.run(optimizer, feed_dict = {x:inputX, y_:inputY})
    
    # Write out logs of training
    if i % display_step == 0:
        cc = sess.run(cost, feed_dict = {x:inputX, y_:inputY})
        print('training step','%04d' % i, 'cost = ', '{:.9f}'.format(cc))
        
print('Optimization Finished!')
training_cost = sess.run(cost, feed_dict = {x:inputX, y_: inputY})
print('Training cost = ', training_cost, 'W = ',sess.run(W), 'b = ', sess.run(b))
        
        

training step 0000 cost =  0.114958666
training step 0050 cost =  0.109539948
training step 0100 cost =  0.109539881
training step 0150 cost =  0.109539807
training step 0200 cost =  0.109539740
training step 0250 cost =  0.109539665
training step 0300 cost =  0.109539606
training step 0350 cost =  0.109539531
training step 0400 cost =  0.109539464
training step 0450 cost =  0.109539405
training step 0500 cost =  0.109539330
training step 0550 cost =  0.109539263
training step 0600 cost =  0.109539188
training step 0650 cost =  0.109539129
training step 0700 cost =  0.109539054
training step 0750 cost =  0.109538995
training step 0800 cost =  0.109538913
training step 0850 cost =  0.109538853
training step 0900 cost =  0.109538786
training step 0950 cost =  0.109538712
training step 1000 cost =  0.109538652
training step 1050 cost =  0.109538570
training step 1100 cost =  0.109538510
training step 1150 cost =  0.109538451
training step 1200 cost =  0.109538391
training step 1250 cost =

In [14]:
# Prediction
sess.run(y, feed_dict = {x:inputX})

array([[0.7112522 , 0.2887478 ],
       [0.66498977, 0.33501023],
       [0.73657656, 0.26342347],
       [0.6471879 , 0.3528121 ],
       [0.78335613, 0.2166439 ],
       [0.70069474, 0.29930523],
       [0.6586633 , 0.34133676],
       [0.6482863 , 0.35171372],
       [0.6436828 , 0.35631716],
       [0.65480113, 0.3451989 ]], dtype=float32)

In [15]:
# It is saying all houses are a good buy 7/10
# To improve it, add a hidden layer