In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [88]:
# step 1: load data
data = pd.read_csv('data.csv')

## remove unwanted features
data = data.drop(['index', 'price', 'sq_price'], axis=1)

# only use first 10 lines
data = data[:10] # could have written data[0:10] also
data

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0
5,1985.0,4.0
6,1534.0,3.0
7,1427.0,3.0
8,1380.0,3.0
9,1494.0,3.0


Probably want to normalize the area data.

In [90]:
# the dataset has no truth labels, so we need to make some up and add them
# step 2: add labels (to the 10 we're using)
# 1 is good buy, 0 is bad buy
data.loc[:, ('y1')] = [1,1,1,0,0,1,0,1,1,1]

# adding column negating the y1 column (this seems totally unnecessary)
data.loc[:, ('y2')] = data['y1'] == 0

# True/False => 1/0
data.loc[:, ('y2')] = data['y2'].astype(int)
data

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,1,0
3,1416.0,2.0,0,1
4,3000.0,4.0,0,1
5,1985.0,4.0,1,0
6,1534.0,3.0,0,1
7,1427.0,3.0,1,0
8,1380.0,3.0,1,0
9,1494.0,3.0,1,0


In [91]:
# step 3: prepare data for Tensorflow
# this means we need to make tensors

#input
inputX = data.loc[:, ['area', 'bathrooms']].as_matrix()
print('inputX shape: ', inputX.shape)

# labels
inputY = data.loc[:, ['y1', 'y2']].as_matrix()
print('inputY shape: ', inputY.shape)

inputX shape:  (10, 2)
inputY shape:  (10, 2)


In [86]:
inputX

array([[  2.10400000e+03,   3.00000000e+00],
       [  1.60000000e+03,   3.00000000e+00],
       [  2.40000000e+03,   3.00000000e+00],
       [  1.41600000e+03,   2.00000000e+00],
       [  3.00000000e+03,   4.00000000e+00],
       [  1.98500000e+03,   4.00000000e+00],
       [  1.53400000e+03,   3.00000000e+00],
       [  1.42700000e+03,   3.00000000e+00],
       [  1.38000000e+03,   3.00000000e+00],
       [  1.49400000e+03,   3.00000000e+00]])

In [92]:
inputY

array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0]])

In [93]:
# step 4: hyperparameters
learning_rate = 0.000001
epochs = 2000
display_every = 50
n_samples = inputY.size
n_features = inputX.shape[1]
n_outputs = inputY.shape[1]

In [94]:
# step 5: define network graph

# input feature tensor, None = any number of examples
# 2 is number of features
x = tf.placeholder(tf.float32, [None, n_features])

# weights
# 2x2 float matrix, 2 because number of features
W = tf.Variable(tf.zeros([n_features,n_features]))
tf.zeros

# bias
b = tf.Variable(tf.zeros([n_features]))

# forward pass
y = tf.add(tf.matmul(x, W), b)

# activation: softmax 
# softmax converts output to a probability
y = tf.nn.softmax(y)

# labels, needed for training loss
y_target = tf.placeholder(tf.float32, [None, n_outputs])

In [95]:
# step 6: training

# MSE cost function
cost = tf.reduce_sum(tf.pow(y_target - y, 2) / (2 * n_samples))

# gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [96]:
# step 7: initialize session
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [97]:
# step 8: define training loop

for i in range(epochs):
    sess.run(optimizer, feed_dict={x: inputX, y_target: inputY})
    
    # progress of training
    if (i % display_every) == 0:
        cc = sess.run(cost, feed_dict={x: inputX, y_target: inputY})
        print('epoch: ', i, ', cost: ', "{:.9f}".format(cc))
        
print('training complete')
training_cost = sess.run(cost, feed_dict={x: inputX, y_target: inputY})
print('training cost: ', training_cost, ', W: ', sess.run(W), ', b: ', sess.run(b))

epoch:  0 , cost:  0.114958666
epoch:  50 , cost:  0.109539941
epoch:  100 , cost:  0.109539881
epoch:  150 , cost:  0.109539807
epoch:  200 , cost:  0.109539732
epoch:  250 , cost:  0.109539673
epoch:  300 , cost:  0.109539606
epoch:  350 , cost:  0.109539539
epoch:  400 , cost:  0.109539472
epoch:  450 , cost:  0.109539405
epoch:  500 , cost:  0.109539330
epoch:  550 , cost:  0.109539263
epoch:  600 , cost:  0.109539196
epoch:  650 , cost:  0.109539129
epoch:  700 , cost:  0.109539062
epoch:  750 , cost:  0.109538995
epoch:  800 , cost:  0.109538920
epoch:  850 , cost:  0.109538853
epoch:  900 , cost:  0.109538779
epoch:  950 , cost:  0.109538719
epoch:  1000 , cost:  0.109538652
epoch:  1050 , cost:  0.109538570
epoch:  1100 , cost:  0.109538503
epoch:  1150 , cost:  0.109538451
epoch:  1200 , cost:  0.109538369
epoch:  1250 , cost:  0.109538317
epoch:  1300 , cost:  0.109538242
epoch:  1350 , cost:  0.109538175
epoch:  1400 , cost:  0.109538093
epoch:  1450 , cost:  0.109538034
epo

In [99]:
# looking at probabilitites of predictions
# the softmax converts these from probability to classification label
sess.run(y, feed_dict = {x:inputX})

array([[ 0.71125221,  0.28874779],
       [ 0.66498977,  0.33501023],
       [ 0.73657656,  0.26342347],
       [ 0.64718789,  0.35281211],
       [ 0.78335613,  0.2166439 ],
       [ 0.70069474,  0.29930523],
       [ 0.65866327,  0.34133676],
       [ 0.64828628,  0.35171372],
       [ 0.64368278,  0.35631716],
       [ 0.65480113,  0.3451989 ]], dtype=float32)

The predictions are not so great, so the next step would be to add a hidden layer.