In [13]:
print('yo, the goal here is to predict a \'good\' and \'bad\' house purchase')

yo, the goal here is to predict a 'good' and 'bad' house purchase


In [5]:
import pandas as pd #work with data as tables
import numpy as np #use number matrices
import matplotlib.pyplot as plt
import tensorflow as tf

In [15]:
#Step 1 Load the data
dataframe = pd.read_csv('data.csv') #Dataframe object
#Remove the features we don't care about
dataframe = dataframe.drop(['index', 'price', 'sq_price'], axis=1)
dataframe = dataframe[0:10] #We are only going to use the first 10 rows
#print the data
dataframe

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0
5,1985.0,4.0
6,1534.0,3.0
7,1427.0,3.0
8,1380.0,3.0
9,1494.0,3.0


In [20]:
#Step 2 - Add labels
#1 is a good buy and 0 is a bad buy
dataframe.loc[:, ('y1')] = [1,1,1,0,0,1,0,1,1,1]
#y2 is a negation of y1 (opposite)
dataframe.loc[:, ('y2')] = dataframe['y1'] == 0
#convert true/false into 1/0
dataframe.loc[:, ('y2')] = dataframe['y2'].astype(int)
#print the data
dataframe

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,1,0
3,1416.0,2.0,0,1
4,3000.0,4.0,0,1
5,1985.0,4.0,1,0
6,1534.0,3.0,0,1
7,1427.0,3.0,1,0
8,1380.0,3.0,1,0
9,1494.0,3.0,1,0


In [26]:
#Step 3 - Now we are going to start bringing in tensorflow
#First we need to prepare the data (tensors)

#Some quick concept
#tensors are a generic version of vectors and matrices
#vector - is a list of numbers (1D Tensor)
#matrix - is a list of numbers (2D Tensor)
#list of list of list of numbers (3D Tensor)

#convert the features into input tensors
inputX = dataframe.loc[:, ['area', 'bathrooms']].as_matrix()
#convert labels into input tensors
inputY = dataframe.loc[:, ['y1', 'y2']].as_matrix()

In [27]:
inputX #Printing out our input matrix (sample inputs)

array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0]])

In [30]:
inputY #Printing out our label matrix (sample results)

array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0]])

In [31]:
#Step 4 - Write out our hyperparameters
learning_rate = 0.000001 #How fast we reach convergence (optimal fit)
training_epoch = 2000 #Number of iterations
display_step = 50
n_samples = inputY.size


In [47]:
#Step 5 - Create our computation graph / neural network
#for feature input tensor, none means any number of examples
#2 is the size of the input tensor (# of data features)
x = tf.placeholder(tf.float32, [None,2])

#create weights
#2x2 float matrix, this will be updated throughout the training process
#Variables in tf hold and update paramters
#in memory buffers containing tensors
W = tf.Variable(tf.zeros([2,2]))

#add biases (example is b in the y = mx + b, b is the bias) - helps fitting
b = tf.Variable(tf.zeros([2]))

#We are now doing our first calculation
#Multiply our weights by our inputs, and add biases 
#weights are how we govern how data flows in our computation graph
y_values = tf.add(tf.matmul(x, W), b)

#apply softmax function to value we just created
#softmax is our activation function
#converts into probabilities
y = tf.nn.softmax(y_values)

#feed in a matrix of labels
y_ = tf.placeholder(tf.float32, [None, 2])


In [63]:
#Step 5 perform training
#create our cost function, mean squared error
#reduce sum computes the sum of elements across dimensions of a tensor
cost = tf.reduce_sum(tf.pow(y_ - y, 2))/(2*n_samples)
#Gradient descient, computing partial derrivative with respect to our input variables (weights and biases)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [52]:
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [65]:
#training loop
for i in range(training_epoch):
    sess.run(optimizer, feed_dict={x: inputX, y_: inputY})
    
    #write out logs of training
    if (i) % display_step == 0:
        cc = sess.run(cost, feed_dict={x: inputX, y_ :inputY})
        print("Training step:", '%04d' % (i), 'cost=', '{:.9f}'.format(cc))
        
print("optimization Finished!")
training_cost = sess.run(cost, feed_dict={x: inputX, y_: inputY})
print("Training Cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')

Training step: 0000 cost= 0.109537281
Training step: 0050 cost= 0.109537207
Training step: 0100 cost= 0.109537147
Training step: 0150 cost= 0.109537080
Training step: 0200 cost= 0.109537005
Training step: 0250 cost= 0.109536931
Training step: 0300 cost= 0.109536871
Training step: 0350 cost= 0.109536804
Training step: 0400 cost= 0.109536745
Training step: 0450 cost= 0.109536670
Training step: 0500 cost= 0.109536603
Training step: 0550 cost= 0.109536529
Training step: 0600 cost= 0.109536454
Training step: 0650 cost= 0.109536394
Training step: 0700 cost= 0.109536313
Training step: 0750 cost= 0.109536268
Training step: 0800 cost= 0.109536193
Training step: 0850 cost= 0.109536126
Training step: 0900 cost= 0.109536052
Training step: 0950 cost= 0.109535977
Training step: 1000 cost= 0.109535910
Training step: 1050 cost= 0.109535851
Training step: 1100 cost= 0.109535791
Training step: 1150 cost= 0.109535709
Training step: 1200 cost= 0.109535649
Training step: 1250 cost= 0.109535575
Training ste

In [68]:
#This will output data between 0-1
#1 is considered a good buy, 0 is considered a bad buy
sess.run(y, feed_dict = {x: inputX})

array([[ 0.71123087,  0.28876919],
       [ 0.66498965,  0.33501032],
       [ 0.73654455,  0.26345551],
       [ 0.64717317,  0.3528268 ],
       [ 0.78332442,  0.21667559],
       [ 0.70069939,  0.29930058],
       [ 0.65866619,  0.34133381],
       [ 0.64829433,  0.3517057 ],
       [ 0.64369309,  0.35630691],
       [ 0.65480596,  0.34519401]], dtype=float32)

In [None]:
#It's saying all houses are a good buy, this is 7/10 correct
