## Tensorflow for Classification

Classify is good buy or bad buy based on area and bathrooms. Based on https://www.youtube.com/watch?v=4urPuRoT1sE&list=PL2-dafEMk2A7YdKv4XfKpfbTH5z6rEEj3&index=4

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [28]:
# Step 1 is to load the data

dataframe = pd.read_csv('data.csv')
print("Shape is :",dataframe.shape)
print("Dataframe columns :",dataframe.columns)


modified_dataframe = dataframe.drop(['index', 'price', 'sq_price'], axis = 1)

modified_dataframe = modified_dataframe[0:10]

('Shape is :', (47, 5))
('Dataframe columns :', Index([u'index', u'area', u'bathrooms', u'price', u'sq_price'], dtype='object'))


In [40]:
# Step 2 is to add labels
# 1 is good buy, 0 is bad buy
modified_dataframe['y1'] = [1,1,1,0,0,1,0,1,1,1]
# y2 is negation of y1
modified_dataframe['y2'] = modified_dataframe['y1'] == 0
# convert boolean type to int
modified_dataframe['y2'] = modified_dataframe['y2'].astype(int)

modified_dataframe

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,1,0
3,1416.0,2.0,0,1
4,3000.0,4.0,0,1
5,1985.0,4.0,1,0
6,1534.0,3.0,0,1
7,1427.0,3.0,1,0
8,1380.0,3.0,1,0
9,1494.0,3.0,1,0


In [49]:
# Step 3 is to prepare data for tensorflow 
# (tensors are a generic version of vectors and matrices)
# vector - list of numbers (1D tensor)
# matrices - list of list of numbers (2D tensor)
# list of list of list of numbers (3D tensors)
# ......

# convert features to input tensor
inputX = modified_dataframe.loc[:, ['area', 'bathrooms']].as_matrix()

#convert labels to input tensor
inputY = modified_dataframe.loc[:, ['y1', 'y2']].as_matrix()


In [51]:
inputX


array([[  2.10400000e+03,   3.00000000e+00],
       [  1.60000000e+03,   3.00000000e+00],
       [  2.40000000e+03,   3.00000000e+00],
       [  1.41600000e+03,   2.00000000e+00],
       [  3.00000000e+03,   4.00000000e+00],
       [  1.98500000e+03,   4.00000000e+00],
       [  1.53400000e+03,   3.00000000e+00],
       [  1.42700000e+03,   3.00000000e+00],
       [  1.38000000e+03,   3.00000000e+00],
       [  1.49400000e+03,   3.00000000e+00]])

In [52]:
inputY

array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0]])

In [53]:
# Step 4 is to write out hyperparameters
learning_rate = 0.00001
training_epochs = 2000
display_step = 50
n_samples =inputY.size

In [64]:
# Step 5 is to create our computation graph/neural network
# placeholders are gateways for data into our computation graph
x = tf.placeholder(tf.float32, [None, 2]) #None means any number of examples

# create weights
# 2x2 float matrix that we'll keep updating through the training process
# variables in tf hold and update parameters
# in memory buffers containing tensors
W = tf.Variable(tf.zeros([2, 2]), name = "weights")

# add biases
b = tf.Variable(tf.zeros([2]), name = "biases")

# multiply weights by the input, first calculation
# weights are how we govern how data flows in our computation graph
# multiply inputs by weights and add biases
y_values = tf.add(tf.matmul(x, W), b)

# apply softmax to values we just created
# softmax is the activation function
y = tf.nn.softmax(y_values)

#feed in a matrix of labels
y_ = tf.placeholder(tf.float32, [None, 2])


In [65]:
# Step 6 is to perform training
# create our cost function , mean squared error
cost = tf.reduce_sum(tf.pow(y_ - y, 2))/(2*n_samples)

# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [66]:
# initialize variables and tensorflow session
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [68]:
# training loop
for i in range(training_epochs):
    sess.run(optimizer, feed_dict={x: inputX, y_: inputY}) 
    
    if (i) % display_step == 0:
        cc = sess.run(cost, feed_dict={x: inputX, y_:inputY})
        print "Training step:", '%04d' % (i), "cost=", "{:.9f}".format(cc) #, \"W=", sess.run(W), "b=", sess.run(b)

print "Optimization Finished!"
training_cost = sess.run(cost, feed_dict={x: inputX, y_: inputY})
print "Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n'


Training step: 0000 cost= 0.126285717
Training step: 0050 cost= 0.110974386
Training step: 0100 cost= 0.110973932
Training step: 0150 cost= 0.110973477
Training step: 0200 cost= 0.110973045
Training step: 0250 cost= 0.110972598
Training step: 0300 cost= 0.110972144
Training step: 0350 cost= 0.110971712
Training step: 0400 cost= 0.110971257
Training step: 0450 cost= 0.110970818
Training step: 0500 cost= 0.110970378
Training step: 0550 cost= 0.110969923
Training step: 0600 cost= 0.110969469
Training step: 0650 cost= 0.110969029
Training step: 0700 cost= 0.110968590
Training step: 0750 cost= 0.110968135
Training step: 0800 cost= 0.110967681
Training step: 0850 cost= 0.110967256
Training step: 0900 cost= 0.110966802
Training step: 0950 cost= 0.110966362
Training step: 1000 cost= 0.110965922
Training step: 1050 cost= 0.110965475
Training step: 1100 cost= 0.110965036
Training step: 1150 cost= 0.110964581
Training step: 1200 cost= 0.110964142
Training step: 1250 cost= 0.110963702
Training ste

In [70]:
sess.run(y, feed_dict={x: inputX })

array([[ 0.8154093 ,  0.18459074],
       [ 0.75591207,  0.24408792],
       [ 0.84476656,  0.15523341],
       [ 0.73102421,  0.26897579],
       [ 0.89262539,  0.10737454],
       [ 0.80259722,  0.19740283],
       [ 0.74722916,  0.25277081],
       [ 0.73272437,  0.2672756 ],
       [ 0.7261886 ,  0.27381143],
       [ 0.74186832,  0.25813165]], dtype=float32)