## XOR with Logistic Regression (Binary classification)

### XOR data set

In [2]:
import numpy as np
import tensorflow as tf

x_data = np.array([[0,0],[0,1],[1,0],[1,1]], dtype=np.float32)
y_data = np.array([[0],[1],[1],[0]], dtype=np.float32)

  from ._conv import register_converters as _register_converters


### $ H(X)=sigmoid(XW)=\frac{1}{1+e^{-XW}}$
### $ cost(W)=-\frac{1}{m}\sum{ylog(H(x)) + (1-y)(log(1-H(x)))} $

In [8]:
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
W = tf.Variable(tf.random_normal([2,1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis using sigmoid
hypothesis = tf.sigmoid(tf.matmul(X,W)+b)

# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1-Y) * tf.log(1-hypothesis))
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else false
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())
    
    for step in range(10001):
        sess.run(train, feed_dict={X:x_data, Y:y_data})
        if step%1000==0:
            print(step, sess.run(cost, feed_dict={X:x_data, Y:y_data}), sess.run(W))
    
    # Accuracy report
    h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict={X:x_data, Y:y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)

0 0.8881937 [[-1.5906348]
 [-0.7871109]]
1000 0.6931636 [[-0.01641939]
 [-0.01485808]]
2000 0.6931472 [[-0.00030838]
 [-0.00030541]]
3000 0.6931472 [[-6.0149696e-06]
 [-6.0068373e-06]]
4000 0.6931472 [[-1.4240860e-07]
 [-1.4172647e-07]]
5000 0.6931472 [[-1.3346786e-07]
 [-1.3278574e-07]]
6000 0.6931472 [[-1.3346786e-07]
 [-1.3278574e-07]]
7000 0.6931472 [[-1.3346786e-07]
 [-1.3278574e-07]]
8000 0.6931472 [[-1.3346786e-07]
 [-1.3278574e-07]]
9000 0.6931472 [[-1.3346786e-07]
 [-1.3278574e-07]]
10000 0.6931472 [[-1.3346786e-07]
 [-1.3278574e-07]]

Hypothesis:  [[0.5]
 [0.5]
 [0.5]
 [0.5]] 
Correct:  [[0.]
 [0.]
 [0.]
 [0.]] 
Accuracy:  0.5


#### One logistic function unit cannot seperate XOR

## Nerual Net
### 2 layers

In [12]:
W1 = tf.Variable(tf.random_normal([2,2], name='weight1'))
b1 = tf.Variable(tf.random_normal([2], name='bias1'))
layer1 = tf.sigmoid(tf.matmul(X,W1)+b1)

W2 = tf.Variable(tf.random_normal([2,1], name='weight2'))
b2 = tf.Variable(tf.random_normal([1]), name='bias2')
hypothesis = tf.sigmoid(tf.matmul(layer1,W2)+b2)

In [15]:
# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1-Y) * tf.log(1-hypothesis))
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else false
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())
    
    for step in range(10001):
        sess.run(train, feed_dict={X:x_data, Y:y_data})
        if step%1000==0:
            print(step, sess.run(cost, feed_dict={X:x_data, Y:y_data}), sess.run([W1,W2]))
    
    # Accuracy report
    h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict={X:x_data, Y:y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)

0 0.77594507 [array([[ 0.7347666 ,  0.66437376],
       [-0.02470546,  0.07916614]], dtype=float32), array([[-0.0431173],
       [-1.6716932]], dtype=float32)]
1000 0.69338965 [array([[0.7161172 , 0.18289429],
       [0.05405389, 0.04264087]], dtype=float32), array([[-0.06648836],
       [-1.8869497 ]], dtype=float32)]
2000 0.6930875 [array([[0.7174946 , 0.03820392],
       [0.11313386, 0.02665523]], dtype=float32), array([[-0.09593342],
       [-1.8912709 ]], dtype=float32)]
3000 0.6927582 [array([[ 0.7373584 , -0.0222228 ],
       [ 0.22822186, -0.00304299]], dtype=float32), array([[-0.20983917],
       [-1.8883146 ]], dtype=float32)]
4000 0.6911157 [array([[ 0.8248022 , -0.11552987],
       [ 0.47638288, -0.0793582 ]], dtype=float32), array([[-0.46977276],
       [-1.8798715 ]], dtype=float32)]
5000 0.6747848 [array([[ 1.1150212 , -0.60414153],
       [ 0.9689868 , -0.57034194]], dtype=float32), array([[-1.103569 ],
       [-1.9634396]], dtype=float32)]
6000 0.42444843 [array([[ 2.1

### 4 layers

In [22]:
W1 = tf.Variable(tf.random_normal([2,10], name='weight1'))
b1 = tf.Variable(tf.random_normal([10], name='bias1'))
layer1 = tf.sigmoid(tf.matmul(X,W1)+b1)

W2 = tf.Variable(tf.random_normal([10,10], name='weight2'))
b2 = tf.Variable(tf.random_normal([10]), name='bias2')
layer2 = tf.sigmoid(tf.matmul(layer1,W2)+b2)

W3 = tf.Variable(tf.random_normal([10,10], name='weight3'))
b3 = tf.Variable(tf.random_normal([10]), name='bias3')
layer3 = tf.sigmoid(tf.matmul(layer2,W3)+b3)

W4 = tf.Variable(tf.random_normal([10,1], name='weight4'))
b4 = tf.Variable(tf.random_normal([1]), name='bias4')
hypothesis = tf.sigmoid(tf.matmul(layer3,W4)+b4)

In [23]:
# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1-Y) * tf.log(1-hypothesis))
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else false
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())
    
    for step in range(1001):
        sess.run(train, feed_dict={X:x_data, Y:y_data})
        if step%100==0:
            print(step, sess.run(cost, feed_dict={X:x_data, Y:y_data}))
    
    # Accuracy report
    h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict={X:x_data, Y:y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)

0 0.72602177
100 0.6512417
200 0.61226815
300 0.56935006
400 0.5139572
500 0.4377269
600 0.33946592
700 0.23931846
800 0.1605539
900 0.10927792
1000 0.078076124

Hypothesis:  [[0.05227613]
 [0.90632373]
 [0.9487957 ]
 [0.10209578]] 
Correct:  [[0.]
 [1.]
 [1.]
 [0.]] 
Accuracy:  1.0


### Formal defination
### $ W:=W-\alpha \frac{\partial }{\partial W} \frac{1}{2m} \sum _{i=1}^m{(Wx^{(i)}-y^{(i)})^2} $
### $ W:=W-\alpha \frac{1}{2m} \sum_{i=1}^m{2(Wx^{(i)}-y^{(i)})x^{(i)}} $
### $ W:=W-\alpha \frac{1}{m} \sum_{i=1}^m{(Wx^{(i)}-y^{(i)})x^{(i)}} $

 ### Gradient descent algorithm
 ### $ W:=W-\alpha \frac{1}{m} \sum_{i=1}^m{(Wx^{(i)}-y^{(i)})x^{(i)}} $
 ### $ w = w - \alpha \frac{\partial{E}}{\partial{W}} $
 #### train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

### Go deep & wide

In [36]:
# 9 hidden layers!
# 11 Weights
W1 = tf.Variable(tf.random_uniform([2,5], -1.0, 1.0), name='Weight1')
W2 = tf.Variable(tf.random_uniform([5,5], -1.0, 1.0), name='Weight2')
W3 = tf.Variable(tf.random_uniform([5,5], -1.0, 1.0), name='Weight3')
W4 = tf.Variable(tf.random_uniform([5,5], -1.0, 1.0), name='Weight4')
W5 = tf.Variable(tf.random_uniform([5,5], -1.0, 1.0), name='Weight5')
W6 = tf.Variable(tf.random_uniform([5,5], -1.0, 1.0), name='Weight6')
W7 = tf.Variable(tf.random_uniform([5,5], -1.0, 1.0), name='Weight7')
W8 = tf.Variable(tf.random_uniform([5,5], -1.0, 1.0), name='Weight8')
W9 = tf.Variable(tf.random_uniform([5,5], -1.0, 1.0), name='Weight9')
W10 = tf.Variable(tf.random_uniform([5,5], -1.0, 1.0), name='Weight10')
W11 = tf.Variable(tf.random_uniform([5,1], -1.0, 1.0), name='Weight11')

# 11 Bias
b1 = tf.Variable(tf.zeros([5], name='Bias1'))
b2 = tf.Variable(tf.zeros([5], name='Bias2'))
b3 = tf.Variable(tf.zeros([5], name='Bias3'))
b4 = tf.Variable(tf.zeros([5], name='Bias4'))
b5 = tf.Variable(tf.zeros([5], name='Bias5'))
b6 = tf.Variable(tf.zeros([5], name='Bias6'))
b7 = tf.Variable(tf.zeros([5], name='Bias7'))
b8 = tf.Variable(tf.zeros([5], name='Bias8'))
b9 = tf.Variable(tf.zeros([5], name='Bias9'))
b10 = tf.Variable(tf.zeros([5], name='Bias10'))
b11 = tf.Variable(tf.zeros([1], name='Bias11'))

# 11 Layers
with tf.name_scope('layer1') as scope:
    L1 = tf.sigmoid(tf.matmul(X,W1)+b1)
with tf.name_scope('layer2') as scope:
    L2 = tf.sigmoid(tf.matmul(L1,W2)+b2)
with tf.name_scope('layer3') as scope:
    L3 = tf.sigmoid(tf.matmul(L2,W3)+b3)
with tf.name_scope('layer4') as scope:
    L4 = tf.sigmoid(tf.matmul(L3,W4)+b4)
with tf.name_scope('layer5') as scope:
    L5 = tf.sigmoid(tf.matmul(L4,W5)+b5)
with tf.name_scope('layer6') as scope:
    L6 = tf.sigmoid(tf.matmul(L5,W6)+b6)
with tf.name_scope('layer7') as scope:
    L7 = tf.sigmoid(tf.matmul(L6,W7)+b7)
with tf.name_scope('layer8') as scope:
    L8 = tf.sigmoid(tf.matmul(L7,W8)+b8)
with tf.name_scope('layer9') as scope:
    L9 = tf.sigmoid(tf.matmul(L8,W9)+b9)
with tf.name_scope('layer10') as scope:
    L10 = tf.sigmoid(tf.matmul(L9,W10)+b10)
with tf.name_scope('last') as scope:
    hypothesis = tf.sigmoid(tf.matmul(L10,W11)+b11)

In [38]:
# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1-Y) * tf.log(1-hypothesis))
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else false
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())
    
    for step in range(401):
        sess.run(train, feed_dict={X:x_data, Y:y_data})
        if step%100==0:
            print(step, sess.run(cost, feed_dict={X:x_data, Y:y_data}))
    
    # Accuracy report
    h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict={X:x_data, Y:y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)

0 0.85092825
100 0.69314986
200 0.6931472
300 0.6931472
400 0.6931472

Hypothesis:  [[0.5000001 ]
 [0.5000006 ]
 [0.49999952]
 [0.5       ]] 
Correct:  [[1.]
 [1.]
 [0.]
 [0.]] 
Accuracy:  0.5


##### Not work!

### ReLU: Rectified Linear Unit
#### L1 = tf.sigmoid(tf.matmul(X,W1)+b1)
#### L1 = tf.nn.relu(tf.matmul(X,W1)+b1)

In [41]:
# 11 Layers with ReLU
with tf.name_scope('layer1') as scope:
    L1 = tf.nn.relu(tf.matmul(X,W1)+b1)
with tf.name_scope('layer2') as scope:
    L2 = tf.nn.relu(tf.matmul(L1,W2)+b2)
with tf.name_scope('layer3') as scope:
    L3 = tf.nn.relu(tf.matmul(L2,W3)+b3)
with tf.name_scope('layer4') as scope:
    L4 = tf.nn.relu(tf.matmul(L3,W4)+b4)
with tf.name_scope('layer5') as scope:
    L5 = tf.nn.relu(tf.matmul(L4,W5)+b5)
with tf.name_scope('layer6') as scope:
    L6 = tf.nn.relu(tf.matmul(L5,W6)+b6)
with tf.name_scope('layer7') as scope:
    L7 = tf.nn.relu(tf.matmul(L6,W7)+b7)
with tf.name_scope('layer8') as scope:
    L8 = tf.nn.relu(tf.matmul(L7,W8)+b8)
with tf.name_scope('layer9') as scope:
    L9 = tf.nn.relu(tf.matmul(L8,W9)+b9)
with tf.name_scope('layer10') as scope:
    L10 = tf.nn.relu(tf.matmul(L9,W10)+b10)
with tf.name_scope('last') as scope:
    hypothesis = tf.sigmoid(tf.matmul(L10,W11)+b11)

In [51]:
# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1-Y) * tf.log(1-hypothesis))
train = tf.train.GradientDescentOptimizer(learning_rate=0.05).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else false
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

writer = tf.summary.FileWriter("logs/", sess.graph)

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())
    
    for step in range(10001):
        sess.run(train, feed_dict={X:x_data, Y:y_data})
        if step%1000==0:
            print(step, sess.run(cost, feed_dict={X:x_data, Y:y_data}))
    
    # Accuracy report
    h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict={X:x_data, Y:y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)

0 0.6848593
1000 0.023016274
2000 0.010894345
3000 0.007102621
4000 0.0052616266
5000 0.0041755983
6000 0.0034604608
7000 0.0029537505
8000 0.002576195
9000 0.002284031
10000 0.0020512838

Hypothesis:  [[0.00408929]
 [0.99999166]
 [0.99999857]
 [0.00408929]] 
Correct:  [[0.]
 [1.]
 [1.]
 [0.]] 
Accuracy:  1.0


##### Works very well!

## Nerual Network tips

### 1. Initializing weights
** W = tf.Variable(tf.random_normal([1]), name='weight') **

** W = tf.get_variable("W", shape=[784, 256], initializer=tf.contrib.layers.xavier_initializer()) **

### 2. Activation functions
** tf.sigmoid **

** tf.tanh **

** tf.nn.relu **

### 3. Regularization
#### Solutions for overfitting
* More training data
* Reduce the number of features
* Regularization

#### $ L(W)=\frac{1}{N}\sum_{i=1}^N{L_i(f(x_i,W),y_i})+\lambda R(W) $
#### $ L=\frac{1}{N}\sum_{i=1}^N \sum_{j\neq y_i} max \left[ 0,f(x_i;W)_j - f(x_i;W)_{y_i}+1 \right] + \lambda R(W) $
#### λ =  regularization strength (hyperparameter)

### In common use:
#### L2 regularization   $ R(W)=\sum_k \sum_l W_{k,l}^2 $
#### L1 regularization   $ R(W)=\sum_k \sum_l \left|W_{k,l}\right| $
#### Elastic net (L1+L2)    $ R(W)=\sum_k \sum_l \beta W_{k,l}^2 + \left|W_{k.l}\right| $
#### Max norm rgularization
#### Dropout
#### Fancier: Batch normalization, stochastic depth

In [117]:
# from tensorflow.examples.tutorials.mnist import input_data
# tf.get_variable_scope().reuse_variables()

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

with tf.variable_scope(tf.get_variable_scope()) as scope:
    # dropout (keep_prob) rate 0.7 on training, but should be 1 for testing
    keep_prob = tf.placeholder(tf.float32)

    X = tf.placeholder(tf.float32, [None, 784])
    Y = tf.placeholder(tf.float32, [None, 10])

    W1 = tf.get_variable("W1", shape=[784,512], initializer=tf.contrib.layers.xavier_initializer())
    b1 = tf.Variable(tf.random_normal([512]))
    L1 = tf.nn.relu(tf.matmul(X,W1)+b1)
    L1 = tf.nn.dropout(L1, keep_prob=keep_prob)

    W2 = tf.get_variable("W2", shape=[512,512], initializer=tf.contrib.layers.xavier_initializer())
    b2 = tf.Variable(tf.random_normal([512]))
    L2 = tf.nn.relu(tf.matmul(L1,W2)+b2)
    L2 = tf.nn.dropout(L2, keep_prob=keep_prob)

    # parameters
    learning_rate = 0.001
    training_epochs = 15
    batch_size = 100
    total_batch = int(mnist.test.num_examples / batch_size)

    # define cost/loss & optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    # train model
    for epoch in range(training_epochs):

        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            feed_dict = {X:batch_xs, Y:batch_ys, keep_prob:0.7}
            c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
            avg_cost += c/total_batch

    # Test model and check accuracy
    correct_prediction = tf.equal(tf.argmax(hypothesis,1), tf.argmax(Y,1))
    accuracy = tf.reduce_mean(tf.cast(correction_prediction, tf.float32))
    print('Accuracy:', sess.run(accuracy, feed_dict={X:mnist.test.images, Y:mnist.test.labels, keep_prob:1}))

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


ValueError: Variable Weight1_6/Adam_2/ does not exist, or was not created with tf.get_variable(). Did you mean to set reuse=tf.AUTO_REUSE in VarScope?

In [2]:
# Lab 10 MNIST and Dropout
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import random
# import matplotlib.pyplot as plt

tf.set_random_seed(777)  # reproducibility

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# Check out https://www.tensorflow.org/get_started/mnist/beginners for
# more information about the mnist dataset

# tf.get_variable_scope().reuse_variables()

# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
total_batch = int(mnist.train.num_examples / batch_size)

# input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# dropout (keep_prob) rate  0.7 on training, but should be 1 for testing
keep_prob = tf.placeholder(tf.float32)

# weights & bias for nn layers
# http://stackoverflow.com/questions/33640581/how-to-do-xavier-initialization-on-tensorflow
W1 = tf.get_variable("W1", shape=[784, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([512]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
L1 = tf.nn.dropout(L1, keep_prob=keep_prob)

W2 = tf.get_variable("W2", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([512]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
L2 = tf.nn.dropout(L2, keep_prob=keep_prob)

W3 = tf.get_variable("W3", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([512]))
L3 = tf.nn.relu(tf.matmul(L2, W3) + b3)
L3 = tf.nn.dropout(L3, keep_prob=keep_prob)

W4 = tf.get_variable("W4", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([512]))
L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)
L4 = tf.nn.dropout(L4, keep_prob=keep_prob)

W5 = tf.get_variable("W5", shape=[512, 10],
                     initializer=tf.contrib.layers.xavier_initializer())
b5 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L4, W5) + b5

# define cost/loss & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# train my model
for epoch in range(training_epochs):
    avg_cost = 0

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: 0.7}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning Finished!')

# Test model and check accuracy
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy:', sess.run(accuracy, feed_dict={
      X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1}))

# Get one and predict
r = random.randint(0, mnist.test.num_examples - 1)
print("Label: ", sess.run(tf.argmax(mnist.test.labels[r:r + 1], 1)))
print("Prediction: ", sess.run(
    tf.argmax(hypothesis, 1), feed_dict={X: mnist.test.images[r:r + 1], keep_prob: 1}))

# plt.imshow(mnist.test.images[r:r + 1].
#           reshape(28, 28), cmap='Greys', interpolation='nearest')
# plt.show()

'''
Epoch: 0001 cost = 0.447322626
Epoch: 0002 cost = 0.157285590
Epoch: 0003 cost = 0.121884535
Epoch: 0004 cost = 0.098128681
Epoch: 0005 cost = 0.082901778
Epoch: 0006 cost = 0.075337573
Epoch: 0007 cost = 0.069752543
Epoch: 0008 cost = 0.060884363
Epoch: 0009 cost = 0.055276413
Epoch: 0010 cost = 0.054631256
Epoch: 0011 cost = 0.049675195
Epoch: 0012 cost = 0.049125314
Epoch: 0013 cost = 0.047231930
Epoch: 0014 cost = 0.041290121
Epoch: 0015 cost = 0.043621063
Learning Finished!
Accuracy: 0.9804
'''


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

Epoch: 0001 cost = 0.451240691
Epoch: 0002 cost = 0.174481454
Epoch: 0003 cost = 0.128587236
Epoch: 0004 cost = 0.109119285
Epoch: 0005 cost = 0.092670795
Epoch: 0006 cost = 0.082088990
Epoch: 0007 cost = 0.077748027
Epoch: 0008 cost = 0.070568659
Epoch: 0009 cost = 0.063686291
Epoch: 0010 cost = 0.057611010
Epoch: 0011 cost = 0.062052387
Epoch: 0012 cost = 0.052562773
Epoch: 0013 cost = 0.051294356
Epoch: 0014 cost = 0.046317724
Epoch: 0015 cost = 0.047256546
Learning Finished!
Accuracy: 0.9812
Label:  [1]
Prediction:  [1]


'\nEpoch: 0001 cost = 0.447322626\nEpoch: 0002 cost = 0.157285590\nEpoch: 0003 cost = 0.121884535\nEpoch: 0004 cost = 0.098128681\nEpoch: 0005 cost = 0.082901778\nEpoch: 0006 cost = 0.075337573\nEpoch: 0007 cost = 0.069752543\nEpoch: 0008 cost = 0.060884363\nEpoch: 0009 cost = 0.055276413\nEpoch: 0010 cost = 0.054631256\nEpoch: 0011 cost = 0.049675195\nEpoch: 0012 cost = 0.049125314\nEpoch: 0013 cost = 0.047231930\nEpoch: 0014 cost = 0.041290121\nEpoch: 0015 cost = 0.043621063\nLearning Finished!\nAccuracy: 0.9804\n'

### 4. Optimizers
** tf.train.GradientDescentOptimizer **

** tf.train.AdadeltaOptimizer **

** tf.train.AdagradOptimizer **

** tf.train.AdagradDAOptimizer **

** tf.train.MomentumOptimer **

** tf.train.AdamOptimizer **

** tf.train.FtrlOptimer **

** tf.train.ProximalGradientDescentOptimizer **

** tf.train.ProxiamlAdagradOptimizer **

** tf.train.RMSPropOptimizer **

In [None]:
#define cost/loss & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

## TensorBoard

### 1. From TF graph, decide which tensors you want to log
w2_hist = tf.summary.histogram("weight2", w2) <br> cost_summ = tf.summary.scalar("cost", cost)

### 2. Merge all summaries
summary = tf.summary.merge_all()

### 3. Create writer and add graph
writer =tf.summary.FileWriter('./logs') <br>
writer.add_graph(sess.graph)

### 4. Run summary merge and add_summary
s, _ = sess.run([summary, optimizer], feed_dict=feed_dict) <br>
writer.add_summary(s, global_step=global_step)

### 5. Launch TensorBoard
tensorboard --logdir=./logs

In [None]:
# Scalar tensors
cost_summ = tf.summary.scalar("cost", cost)

# Histogram(muti-dimensional tensors)
W2 = tf.Variable(tf.random_normal([2,1]), name='weight2')
b2 = tf.Variable(tf.random_normal([1]), name='bias2')
hypothesis = tf.sigmoid(tf.matmul(layer1,W2) + b2)

W2_hist = tf.summary.histogram('weight2', W2)
b2_hist = tf.summary.histogram('bias2', b2)
hypothesis_hist = tf.summary.histogram('hypothesis', hypothesis)

# Add scope for better graph hierarchy
with tf.name_scope('layer1') as scope:
    W1 = tf.Variable(tf.random_normal([2,2]), name='weight1')
    b1 = tf.Variable(tf.random_normal([2]), name='bias1')
    hypothesis = tf.sigmoid(tf.matmul(X,W1) + b1)
    
    W1_hist = tf.summary.histogram('weight1', W1)
    b1_hist = tf.summary.histogram('bias1', b1)
    layer1 = tf.summary.histogram('layer1', layer1)
    
with tf.name_scope('layer2') as scope:
    W2 = tf.Variable(tf.random_normal([2,1]), name='weight2')
    b2 = tf.Variable(tf.random_normal([1]), name='bias2')
    hypothesis = tf.sigmoid(tf.matmul(layer1,W2) + b2)
    
    W2_hist = tf.summary.histogram('weight2', W2)
    b2_hist = tf.summary.histogram('bias2', b2)
    hypothesis_hist = tf.summary.histogram('hypothesis', hypothesis)

### 2/3. Merge summaries and create writer after creating session

In [None]:
# Summaray
summary = tf.summary.merge_all()

# Initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# Crate summary writer
writer = tf.summary.FileWriter(TB_SUMMARY_DIR)
writer.add_graph(sess.graph)

### 4. Run merged summary and write (add summary)

In [None]:
s, _ = sess.run([summary, optimizer], feed_dict=feed_dict)
writer.add_summary(s, global_step=global_step)
global_step += 1

### 5. Launch tensorboard
#### Local

In [None]:
writer = tf.summary.FileWriter("./logs/xor_logs")

$ tensorboard -logdir=./logs/xor_logs

#### Remote server

$ ssh -L local_port:127.0.0.1:remote_port username@server.com

$ tensorboard -logdir=./logs/xor_logs

### P1 矩阵相乘

In [54]:
with tf.name_scope('graph') as scope:
    matrix1 = tf.constant([[3., 3.]],name ='matrix1')  #1 row by 2 column
    matrix2 = tf.constant([[2.],[2.]],name ='matrix2') # 2 row by 1 column
    product = tf.matmul(matrix1, matrix2, name='product')

sess = tf.Session()
writer = tf.summary.FileWriter("logs/matmul", sess.graph)
init = tf.global_variables_initializer()
sess.run(init)

### P2 线性拟合1

In [80]:
# Prepare the original data
with tf.name_scope('data'):
    x_data = np.random.rand(100).astype(np.float32)
    y_data = 0.3 * x_data + 0.1

# Create parameters
with tf.name_scope('paramters'):
    with tf.name_scope('weights'):
        weight = tf.Variable(tf.random_uniform([1],-1.0,1.0))
        tf.summary.histogram('weight', weight)
    with tf.name_scope('biases'):
        bias = tf.Variable(tf.zeros([1]))
        tf.summary.histogram('bias', bias)

# Get y_prediction
with tf.name_scope('y_prediction'):
    y_prediction = weight * x_data + bias
    
# Compute the loss
with tf.name_scope('loss'):
    loss = tf.reduce_mean(tf.square(y_data - y_prediction))
    tf.summary.scalar('loss', loss)
    
# Create optimizer
optimizer = tf.train.GradientDescentOptimizer(0.5)

# Create train, minimize the loss
with tf.name_scope('train'):
    train = optimizer.minimize(loss)

# Create init
with tf.name_scope('init'):
    init = tf.global_variables_initializer()
    
# Create a session
sess = tf.Session()

# Merged
merged = tf.summary.merge_all()

# Initialize
writer = tf.summary.FileWriter("logs/lr1", sess.graph)
sess.run(init)

# Loop
for step in range(101):
    sess.run(train)
    # rs = sess.run(merged)
    # writer.add_summary(rs, step)
    if step%10 == 0:
        print(step, 'weight:', sess.run(weight), 'bias:', sess.run(bias))


0 weight: [-0.0441124] bias: [0.37764052]
10 weight: [0.10195768] bias: [0.20252737]
20 weight: [0.19805713] bias: [0.15277623]
30 weight: [0.24752462] bias: [0.1271667]
40 weight: [0.27298817] bias: [0.11398415]
50 weight: [0.28609556] bias: [0.10719839]
60 weight: [0.29284266] bias: [0.1037054]
70 weight: [0.29631573] bias: [0.10190737]
80 weight: [0.29810348] bias: [0.10098183]
90 weight: [0.29902375] bias: [0.10050541]
100 weight: [0.2994975] bias: [0.10026016]
