In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
data = pd.read_csv('./Iris-cleaned.csv')

In [3]:
data.head()

Unnamed: 0,sepal length,sepal width,petal length,petal width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [4]:
data.dtypes

sepal length    float64
sepal width     float64
petal length    float64
petal width     float64
species          object
dtype: object

In [5]:
data['species'] = data.species.astype('category')
data.dtypes

sepal length     float64
sepal width      float64
petal length     float64
petal width      float64
species         category
dtype: object

In [6]:
features = data.iloc[:, 0:4]
species = pd.get_dummies(data.species)

In [7]:
features.head()

Unnamed: 0,sepal length,sepal width,petal length,petal width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [8]:
data.head(10)

Unnamed: 0,sepal length,sepal width,petal length,petal width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
6,4.6,3.4,1.4,0.3,setosa
7,5.0,3.4,1.5,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa


In [9]:
data.species.value_counts()

virginica     50
versicolor    50
setosa        50
Name: species, dtype: int64

In [10]:
species.head()

Unnamed: 0,setosa,versicolor,virginica
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0


In [11]:
feature_s = (features - features.mean()) / features.std()

In [12]:
print(feature_s.shape)
print(species.shape)

(150, 4)
(150, 3)


In [13]:
# x is not trainable, while w and b is trainable

x = tf.placeholder(dtype = 'float32',shape=[150,4])
y = tf.placeholder(dtype = 'float32', shape=[150,3])
w = tf.Variable(tf.truncated_normal([4,3], stddev=1))
b = tf.Variable(tf.truncated_normal([1,3], stddev=1))

In [14]:
y1 = tf.nn.softmax(tf.matmul(x, w) + b)
loss = tf.reduce_mean(tf.square(y - y1))

In [15]:
# define optimizer
# STEP_SIZE = 0.0001
optimizer = tf.train.AdamOptimizer().minimize(loss)
init = tf.global_variables_initializer()

In [16]:
sess = tf.Session()
MAXSTEPS = 200000
MOD=10000
#initialize all variables
sess.run(init)

In [17]:
for step in range(MAXSTEPS+1):
    (_, loss_opt) = sess.run([optimizer,loss],feed_dict={x: feature_s, y: species})
    if (step % MOD) == 0:
        print(step, loss_opt)

0 0.520878
10000 0.0103918
20000 0.00816703
30000 0.00668424
40000 0.00572412
50000 0.00515889
60000 0.00483517
70000 0.00465384
80000 0.00455473
90000 0.00450173
100000 0.00447398
110000 0.00445965
120000 0.00445232
130000 0.00444861
140000 0.00444673
150000 0.00444578
160000 0.00444529
170000 0.00444503
180000 0.00444487
190000 0.00444477
200000 0.00444471


In [18]:
(yp) = sess.run(y1, feed_dict={x:feature_s})

In [19]:
yp

array([[  1.00000000e+00,   5.30079915e-31,   0.00000000e+00],
       [  1.00000000e+00,   1.46121889e-18,   0.00000000e+00],
       [  1.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  1.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  1.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  1.00000000e+00,   2.14407066e-31,   0.00000000e+00],
       [  1.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  1.00000000e+00,   1.74366073e-33,   0.00000000e+00],
       [  1.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  1.00000000e+00,   2.49283166e-27,   0.00000000e+00],
       [  1.00000000e+00,   5.95534763e-24,   0.00000000e+00],
       [  1.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  1.00000000e+00,   3.72418306e-27,   0.00000000e+00],
       [  1.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  1.00000000e+00,   1.96875474e-10,   0.00000000e+00],
       [  1.00000000e+00,   4.15613132e-35,   0.0000000

In [20]:
species.var().mean()

0.22371364653243822

In [21]:
err=(yp.argmax(axis=1) != species.values.argmax(axis=1)).sum()

In [22]:
yp.argmax(axis=1) != species.values.argmax(axis=1)

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [23]:
err = err / species.shape[0]
print("error_rate: ", err)

error_rate:  0.00666666666667
