## Tensorflow Classification 

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline               

In [2]:
## Step 1 - Load the data 

#dataFrame = pd.read_csv("data/data.csv", usecols = ['area', 'bathrooms'])
dataFrame = pd.read_csv("data/data.csv")
dataFrame = dataFrame.drop(['index','price', 'sq_price'],axis= 1)
dataFrame = dataFrame[:10]
dataFrame

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0
5,1985.0,4.0
6,1534.0,3.0
7,1427.0,3.0
8,1380.0,3.0
9,1494.0,3.0


In [5]:
## Step 2 - Add the labels
dataFrame.loc[:, ("y1")] = [1, 1, 1, 0, 0, 1, 0, 1, 1, 1] 

dataFrame.loc[:, ("y2")] = dataFrame["y1"] == 0
dataFrame.loc[:, ("y2")] =  dataFrame.loc[:, ("y2")].astype("int")
dataFrame

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,1,0
3,1416.0,2.0,0,1
4,3000.0,4.0,0,1
5,1985.0,4.0,1,0
6,1534.0,3.0,0,1
7,1427.0,3.0,1,0
8,1380.0,3.0,1,0
9,1494.0,3.0,1,0


In [6]:
## Step 3 - Preparing the data for tensorflow
### Creating the input tensors
# Tensors can be considered as the generic version of vectors and matrices
inputX = dataFrame.loc[:,["area","bathrooms"]].as_matrix()
inputY  = dataFrame.loc[:, ["y1","y2"]].as_matrix()

In [7]:
inputX

array([[  2.10400000e+03,   3.00000000e+00],
       [  1.60000000e+03,   3.00000000e+00],
       [  2.40000000e+03,   3.00000000e+00],
       [  1.41600000e+03,   2.00000000e+00],
       [  3.00000000e+03,   4.00000000e+00],
       [  1.98500000e+03,   4.00000000e+00],
       [  1.53400000e+03,   3.00000000e+00],
       [  1.42700000e+03,   3.00000000e+00],
       [  1.38000000e+03,   3.00000000e+00],
       [  1.49400000e+03,   3.00000000e+00]])

In [8]:
inputY

array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0]])

In [9]:
## Step 4 - Write out the hyperparameters
# If these don't work out, we will have to change them and try again
learning_rate = 0.000001
training_epochs = 2000
display_step = 50 
n_samples = inputY.size

In [10]:
## Step 5 - Building the computation graph
### Creating the placeholders
### These act as gateways for the data into the computation graph
    # None - batch size 
    ## 2 - number of features
x= tf.placeholder(tf.float32, [None, 2])


## Create the weights using Variables
## Variables in tf hold and update parameters 
# in in memory buffers containing data
# For transfer learning these are pre initialized

W = tf.Variable(tf.zeros([2,2]))

## Add biases
b = tf.Variable(tf.zeros([2]))

## Multiplying weights to the input  
## Weights are how we govern the way data flows in the computation graph
y_values = tf.add(tf.matmul(x,W),b)

## Applying Softmax function to Y_values to generate a normalized probability as our output
# Softmax is the activation function
y = tf.nn.softmax(y_values)

# Feed in matrix of layers
y_ = tf.placeholder(tf.float32, [None, 2])

In [11]:
## Step 6 - Perform training 
# Create a cost function 
# reduce sum computes sum across dimensions of a tensor
cost = tf.reduce_sum(tf.pow(y_- y, 2))/(2*n_samples)

## Gradient descent

optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [12]:
## Initialize variables and tensorflow session
init = tf.initialize_all_variables()
session = tf.Session()
# initializing varaibles happens here
session.run(init)

Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [13]:
## Running our training loop
# We use feed dict to supply values to the placeholders we created earlier

for i in range(training_epochs):
    session.run(optimizer,feed_dict={x: inputX, y_: inputY})
    
    if (i) % display_step == 0:
        cc = session.run(cost, feed_dict={x: inputX, y_: inputY})
        print "Training step:", '%04d' % (i), "cost=", "{:.9f}".format(cc) #, \"W=", sess.run(W), "b=", sess.run(b)

print "Optimization Finished!"
training_cost = session.run(cost, feed_dict={x: inputX, y_: inputY})
print "Training cost=", training_cost, "W=", session.run(W), "b=", session.run(b), '\n'

Training step: 0000 cost= 0.114958666
Training step: 0050 cost= 0.109539941
Training step: 0100 cost= 0.109539881
Training step: 0150 cost= 0.109539807
Training step: 0200 cost= 0.109539732
Training step: 0250 cost= 0.109539673
Training step: 0300 cost= 0.109539606
Training step: 0350 cost= 0.109539531
Training step: 0400 cost= 0.109539464
Training step: 0450 cost= 0.109539405
Training step: 0500 cost= 0.109539330
Training step: 0550 cost= 0.109539248
Training step: 0600 cost= 0.109539196
Training step: 0650 cost= 0.109539129
Training step: 0700 cost= 0.109539054
Training step: 0750 cost= 0.109538987
Training step: 0800 cost= 0.109538913
Training step: 0850 cost= 0.109538853
Training step: 0900 cost= 0.109538779
Training step: 0950 cost= 0.109538712
Training step: 1000 cost= 0.109538652
Training step: 1050 cost= 0.109538577
Training step: 1100 cost= 0.109538510
Training step: 1150 cost= 0.109538436
Training step: 1200 cost= 0.109538376
Training step: 1250 cost= 0.109538302
Training ste

In [15]:
session.run(y, feed_dict={x: inputX })

array([[ 0.71125221,  0.28874779],
       [ 0.66498977,  0.33501023],
       [ 0.73657656,  0.26342347],
       [ 0.64718789,  0.35281211],
       [ 0.78335613,  0.2166439 ],
       [ 0.70069474,  0.29930523],
       [ 0.65866327,  0.34133676],
       [ 0.64828628,  0.35171372],
       [ 0.64368278,  0.35631716],
       [ 0.65480113,  0.3451989 ]], dtype=float32)