## TensorFlow Linear

In [1]:
import tensorflow as tf
import numpy as np

csvFile = 'pima-indians-diabetes.csv'

In [2]:
def clip(x, vmin = 1e-7, vmax = 1-1e-7):
    return tf.clip_by_value(x, clip_value_max= vmax, clip_value_min= vmin)

In [3]:
learning_rate = 0.01
epoch = 1
xy = np.loadtxt(csvFile, delimiter=',',dtype='float32')
x_data = xy[:,:-1]
y_data = xy[:,-1:]

In [4]:
x_data[0]

array([   6.        ,  148.        ,   72.        ,   35.        ,
          0.        ,   33.59999847,    0.62699997,   50.        ], dtype=float32)

In [5]:
def normalize(array):
    mu_x = np.mean(array)
    sigma_x = 0
    for i in range(len(array)):
        sigma_x+=(array[i]-mu_x)*(array[i]-mu_x)
    sigma_x/=len(array)
    sigma_x = np.sqrt(sigma_x)
    return (array-mu_x)/sigma_x

In [43]:
x_data_transpose = np.transpose(x_data, [1, 0])
for i in range(len(x_data_transpose)):
    x_data_transpose[i] = normalize(x_data_transpose[i])
x_data_normalize = np.transpose(x_data_transpose)
print(x_data_transpose, x_data)

[[ 0.63994718 -0.84488505  1.23388028 ...,  0.34298074 -0.84488505
  -0.84488505]
 [ 0.84832382 -1.1233964   1.94372392 ...,  0.00330089  0.1597866
  -0.87301922]
 [ 0.14964075 -0.16054575 -0.26394126 ...,  0.14964075 -0.47073224
   0.04624525]
 ..., 
 [ 0.20401259 -0.6844219  -1.10325563 ..., -0.7351895  -0.24020453
  -0.20212884]
 [ 0.46849188 -0.36506072  0.60439724 ..., -0.68519336 -0.37110099
  -0.47378504]
 [ 1.42599535 -0.19067189 -0.10558418 ..., -0.27575964  1.17073214
  -0.87137389]] [[ 0.63994718  0.84832382  0.14964075 ...,  0.20401259  0.46849188
   1.42599535]
 [-0.84488505 -1.1233964  -0.16054575 ..., -0.6844219  -0.36506072
  -0.19067189]
 [ 1.23388028  1.94372392 -0.26394126 ..., -1.10325563  0.60439724
  -0.10558418]
 ..., 
 [ 0.34298074  0.00330089  0.14964075 ..., -0.7351895  -0.68519336
  -0.27575964]
 [-0.84488505  0.1597866  -0.47073224 ..., -0.24020453 -0.37110099
   1.17073214]
 [-0.84488505 -0.87301922  0.04624525 ..., -0.20212884 -0.47378504
  -0.87137389]]


In [10]:
x_data_normalize.shape

(768, 8)

In [11]:
x_data_normalize[0]

array([ 0.63994724,  0.84832382,  0.14964074,  0.9072699 , -0.69289052,
        0.20401253,  0.46849191,  1.42599523], dtype=float32)

In [13]:
train_data = {}
test_data = {}
train_data['input'] = x_data_normalize[:600]
train_data['output'] = y_data[:600]
test_data['input'] = x_data_normalize[600:]
test_data['output'] = y_data[600:]

In [14]:
def print_shape(dataset):
    print("input data shape : {}, output_data shape : {}".format(dataset['input'].shape, dataset['output'].shape))

In [15]:
print_shape(train_data)
print_shape(test_data)

input data shape : (600, 8), output_data shape : (600, 1)
input data shape : (168, 8), output_data shape : (168, 1)


In [16]:
nfeatures = x_data.shape[1]
nclasses = y_data.shape[1]
# mean, var = tf.nn.moments(x=tf.constant(x_data[0]), axes=[0])

In [17]:
X = tf.placeholder(tf.float32, [None, nfeatures], name="input")
Y = tf.placeholder(tf.float32, [None, nclasses], name="output")

W = tf.Variable(tf.random_normal([nfeatures, nclasses]), name = 'weights')
b = tf.Variable(tf.random_normal([nclasses], name='bias'))

hypothesis = tf.sigmoid(tf.matmul(X, W) + b)

In [18]:
hypothesis

<tf.Tensor 'Sigmoid:0' shape=(?, 1) dtype=float32>

In [19]:
cost = -tf.reduce_mean(Y * tf.log(clip(hypothesis)) + (1 - Y) * tf.log(clip(1 - hypothesis)))
train = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

In [20]:
correct_prediction = tf.equal(tf.cast(hypothesis>0.5, tf.float32), Y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [21]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
atemp = sess.run(accuracy, feed_dict = {X : test_data['input'], Y : test_data['output']})
print("accuracy {}".format(atemp))

for i in range(epoch):
    for step in range(100000):
        opt, cc = sess.run([train,cost], feed_dict={X:train_data['input'], Y:train_data['output']})
        if step%500 ==0:
            atemp = sess.run(accuracy, feed_dict = {X : test_data['input'], Y : test_data['output']})
            print("{} step cost {} accuracy {}".format(step, cc, atemp))

accuracy 0.4345238208770752
0 step cost 1.2426375150680542 accuracy 0.4345238208770752
500 step cost 0.5898807644844055 accuracy 0.7321428656578064
1000 step cost 0.498197078704834 accuracy 0.7797619104385376
1500 step cost 0.48015451431274414 accuracy 0.7797619104385376
2000 step cost 0.4758443236351013 accuracy 0.7678571343421936
2500 step cost 0.4746570885181427 accuracy 0.7678571343421936
3000 step cost 0.47427690029144287 accuracy 0.7678571343421936
3500 step cost 0.4741356372833252 accuracy 0.7678571343421936
4000 step cost 0.4740765392780304 accuracy 0.773809552192688
4500 step cost 0.47404998540878296 accuracy 0.7797619104385376
5000 step cost 0.47403737902641296 accuracy 0.7797619104385376
5500 step cost 0.4740314185619354 accuracy 0.7797619104385376
6000 step cost 0.47402846813201904 accuracy 0.7797619104385376
6500 step cost 0.4740268886089325 accuracy 0.7797619104385376
7000 step cost 0.47402623295783997 accuracy 0.773809552192688
7500 step cost 0.4740258753299713 accuracy 

66000 step cost 0.4740256369113922 accuracy 0.773809552192688
66500 step cost 0.4740256369113922 accuracy 0.773809552192688
67000 step cost 0.4740256369113922 accuracy 0.773809552192688
67500 step cost 0.4740256369113922 accuracy 0.773809552192688
68000 step cost 0.4740256369113922 accuracy 0.773809552192688
68500 step cost 0.4740256369113922 accuracy 0.773809552192688
69000 step cost 0.4740256369113922 accuracy 0.773809552192688
69500 step cost 0.4740256369113922 accuracy 0.773809552192688
70000 step cost 0.4740256369113922 accuracy 0.773809552192688
70500 step cost 0.4740256369113922 accuracy 0.773809552192688
71000 step cost 0.4740256369113922 accuracy 0.773809552192688
71500 step cost 0.4740256369113922 accuracy 0.773809552192688
72000 step cost 0.4740256369113922 accuracy 0.773809552192688
72500 step cost 0.4740256369113922 accuracy 0.773809552192688
73000 step cost 0.4740256369113922 accuracy 0.773809552192688
73500 step cost 0.4740256369113922 accuracy 0.773809552192688
74000 st

KeyboardInterrupt: 

In [22]:
def run_hypothesis(input_):
    return sess.run(hypothesis, feed_dict = {X : np.reshape(input_, [-1, nfeatures])})

In [35]:
run_hypothesis(test_data['input'][8])

array([[ 0.51050395]], dtype=float32)

In [36]:
test_data['output'][8]

array([ 0.], dtype=float32)