In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

In [2]:
# Step 1 load data
dataframe = pd.read_csv('data.csv')
# remove the features we dont care about
dataframe = dataframe.drop(['index', 'price', 'sq_price'], axis=1)
# we only use the first 10 rows
dataframe = dataframe[0:10]
dataframe

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0
5,1985.0,4.0
6,1534.0,3.0
7,1427.0,3.0
8,1380.0,3.0
9,1494.0,3.0


In [6]:
# Step 2 - add labels
# 1 is good and 0 is bad buy
dataframe.loc[:, ('y1')] = [1,1,1,0,0,1,0,1,1,1]
# y2 is a negation of y1, opposite
dataframe.loc[:, ('y2')] = dataframe['y1'] == 0
# turn TRUE/FALSE values to 1s and 0s
dataframe.loc[:, ('y2')] = dataframe['y2'].astype(int)
dataframe

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,1,0
3,1416.0,2.0,0,1
4,3000.0,4.0,0,1
5,1985.0,4.0,1,0
6,1534.0,3.0,0,1
7,1427.0,3.0,1,0
8,1380.0,3.0,1,0
9,1494.0,3.0,1,0


In [8]:
# Step 3 - prepare data for tensprflow (tensors)
# convert features to input tensor
inputX = dataframe.loc[:, ['area', 'bathrooms']].as_matrix()
# convert labels to input tensors
inputY = dataframe.loc[:, ['y1', 'y2']].as_matrix()

  This is separate from the ipykernel package so we can avoid doing imports until
  """


In [9]:
inputX

array([[2.104e+03, 3.000e+00],
       [1.600e+03, 3.000e+00],
       [2.400e+03, 3.000e+00],
       [1.416e+03, 2.000e+00],
       [3.000e+03, 4.000e+00],
       [1.985e+03, 4.000e+00],
       [1.534e+03, 3.000e+00],
       [1.427e+03, 3.000e+00],
       [1.380e+03, 3.000e+00],
       [1.494e+03, 3.000e+00]])

In [10]:
inputY

array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0]])

In [29]:
# Step 4 write out our hyperparameters
learning_rate = 0.00001
training_epochs = 2000
display_step = 50
n_samples = inputY.size

In [30]:
# Step 5 - Create our computation graph/neural network
# for feature input tensors, none means any numbers of examples
# placeholders are gateways for data into our computation graph
x = tf.placeholder(tf.float32, [None, 2])

# create weights
# 2x2 float matrix, that we'll keep updating through the
# training process
# variables in tf hold and update parameters
# in memory buffers containing tensors
W = tf.Variable(tf.zeros([2,2]))

# add biases (axample is b in y = mx + b b is the bias, like that)
b = tf.Variable(tf.zeros([2]))

# multiply our weights by our inputs, first calculation
# weights are how we govern how data flows in our computation graph
# multiply input by weights and biases
y_values = tf.add(tf.matmul(x, W), b)

# apply softmax to value we just created
# softmax is our activation function
y = tf.nn.softmax(y_values)

# feed in a matrix of labels
y_ = tf.placeholder(tf.float32, [None, 2])

In [31]:
# Step 6 perform training
# create out cost function, mean squared error
# reduce sum computes the sum of elements across dimensions of a tensor
cost = tf.reduce_sum(tf.pow(y_ - y, 2)) / (2 * n_samples)
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [32]:
# initialize variables and tensorflow session
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

In [33]:
# training loop
for i in range(training_epochs):
    sess.run(optimizer, feed_dict={x: inputX, y_: inputY})
    
    # write out logs of training
    if (i) % display_step == 0:
        cc = sess.run(cost, feed_dict={x: inputX, y_:inputY})
        print "Training step:", '%04d' % (i), "cost=", "{:.9f}".format(cc) 

print "Optimization Finished!"
training_cost = sess.run(cost, feed_dict={x: inputX, y_: inputY})
print "Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n'


Training step: 0000 cost= 0.134503588
Training step: 0050 cost= 0.112963296
Training step: 0100 cost= 0.112963341
Training step: 0150 cost= 0.112963371
Training step: 0200 cost= 0.112963371
Training step: 0250 cost= 0.112963393
Training step: 0300 cost= 0.112963438
Training step: 0350 cost= 0.112963490
Training step: 0400 cost= 0.112963490
Training step: 0450 cost= 0.112963513
Training step: 0500 cost= 0.112963557
Training step: 0550 cost= 0.112963572
Training step: 0600 cost= 0.112963609
Training step: 0650 cost= 0.112963632
Training step: 0700 cost= 0.112963654
Training step: 0750 cost= 0.112963699
Training step: 0800 cost= 0.112963714
Training step: 0850 cost= 0.112963729
Training step: 0900 cost= 0.112963758
Training step: 0950 cost= 0.112963796
Training step: 1000 cost= 0.112963811
Training step: 1050 cost= 0.112963848
Training step: 1100 cost= 0.112963870
Training step: 1150 cost= 0.112963893
Training step: 1200 cost= 0.112963915
Training step: 1250 cost= 0.112963937
Training ste

In [37]:
sess.run(y, feed_dict = {x: inputX})

array([[0.64457554, 0.3554244 ],
       [0.61144364, 0.38855633],
       [0.6634294 , 0.3365706 ],
       [0.5988487 , 0.40115133],
       [0.70024157, 0.29975843],
       [0.6370743 , 0.36292574],
       [0.6070201 , 0.39297998],
       [0.59981126, 0.40018877],
       [0.5966308 , 0.40336913],
       [0.6043304 , 0.39566955]], dtype=float32)

In [None]:
# its saying all houses are good buy 7/10
# how to improve? add a hidden layer