# Using tf.data

https://www.tensorflow.org/programmers_guide/datasets

In [1]:
from PIL import Image
import numpy as np
import tensorflow as tf

In [3]:
dataset = tf.data.Dataset.range(10)
print(dataset.output_types)
print(dataset.output_shapes)

iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
    for i in range(10):
        print(sess.run(next_element))

<dtype: 'int64'>
()
0
1
2
3
4
5
6
7
8
9


## Q
計算 $1+2+...+10$

### make_initializable_iterator

In [4]:
tf.reset_default_graph()
min_value = tf.placeholder(tf.int64, shape=[])
max_value = tf.placeholder(tf.int64, shape=[])
dataset = tf.data.Dataset.range(min_value, max_value)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()

with tf.Session() as sess:
    # Initialize an iterator over a dataset with 10 elements.
    sess.run(iterator.initializer, feed_dict={min_value: 0, max_value: 5})
    for i in range(5):
        print(sess.run(next_element))

    # Initialize the same iterator over a dataset with 10 elements.
    sess.run(iterator.initializer, feed_dict={min_value: 100, max_value: 105})
    for i in range(5):
        value = sess.run(next_element)
        print(value)

0
1
2
3
4
100
101
102
103
104


### Reinitializable (one interator with different datasets)

In [5]:
# Define training and validation datasets with the same structure.
training_dataset = tf.data.Dataset.range(10).map(
    lambda x: x + tf.random_uniform([], -10, 10, tf.int64))
validation_dataset = tf.data.Dataset.range(5)

# two dataset are compatible
assert training_dataset.output_types == validation_dataset.output_types
assert training_dataset.output_shapes == validation_dataset.output_shapes

iterator = tf.data.Iterator.from_structure(training_dataset.output_types,
                                           training_dataset.output_shapes)
next_element = iterator.get_next()

training_init_op = iterator.make_initializer(training_dataset)
validation_init_op = iterator.make_initializer(validation_dataset)

def loop_through_dataset(ds_name, n):
        for _ in range(n):
            print(ds_name, _, sess.run(next_element))

with tf.Session() as sess:
    for epoch in range(3):    
        print("epoch", epoch)
        # training
        sess.run(training_init_op)
        loop_through_dataset("train", 10)

        # Validation
        sess.run(validation_init_op)
        loop_through_dataset("validation", 5)


epoch 0
train 0 1
train 1 4
train 2 -3
train 3 6
train 4 -4
train 5 5
train 6 -2
train 7 7
train 8 14
train 9 5
validation 0 0
validation 1 1
validation 2 2
validation 3 3
validation 4 4
epoch 1
train 0 -9
train 1 -9
train 2 8
train 3 -3
train 4 -3
train 5 2
train 6 7
train 7 11
train 8 4
train 9 7
validation 0 0
validation 1 1
validation 2 2
validation 3 3
validation 4 4
epoch 2
train 0 8
train 1 -6
train 2 0
train 3 -5
train 4 -4
train 5 6
train 6 8
train 7 16
train 8 6
train 9 0
validation 0 0
validation 1 1
validation 2 2
validation 3 3
validation 4 4


In [None]:
import lzma
import pickle
with lzma.open("mnist.pkl.xz", 'rb') as f:
    train_set, validation_set, test_set = pickle.load(f, encoding='latin1')

In [None]:
train_X, train_y = train_set
validation_X, validation_y = validation_set
test_X, test_y = test_set
train_Y = np.eye(10)[train_y]
test_Y = np.eye(10)[test_y]
validation_Y = np.eye(10)[validation_y]

In [None]:
from IPython.display import display
def showX(X):
    int_X = (X*255).clip(0,255).astype('uint8')
    # N*784 -> N*28*28 -> 28*N*28 -> 28 * 28N
    int_X_reshape = int_X.reshape(-1,28,28).swapaxes(0,1).reshape(28,-1)
    display(Image.fromarray(int_X_reshape))
# 訓練資料， X 的前 20 筆
showX(train_X[:20])
print(train_y)

## 開始 Tensorflow

In [None]:
import tensorflow as tf
from tfdot import tfdot

# Multilayer Convolutional Network

In [None]:
# 使用 gfile 來讀檔
from tensorflow.python.platform import gfile
# 讀入 graph_def
with gfile.FastGFile("mnist_simple.pb",'rb') as f:
    graph_def = tf.GraphDef()
    x = f.read()
    #print(x)
    graph_def.ParseFromString(x)

In [None]:
# 使用之前存下來的模型
X, Y_, prediction, accuracy, train_step, keep_prob, init_op= tf.import_graph_def(graph_def, name="", 
        return_elements=["X:0", "Y_:0", "prediction:0", 
                         "accuracy:0", "Adam", 'dropout/keep_prob:0', "init"])

In [None]:
sess = tf.InteractiveSession()
init_op.run()
tf.summary.scalar(accuracy.op.name, accuracy)
summary_op = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter("log1", graph=sess.graph)

In [None]:
for i in range(5000):
    rnd_idx = np.random.choice(train_X.shape[0], 50, replace=False)
    if i%250 == 0:       
        summary_str, validation_accuracy = sess.run([summary_op, accuracy],
                        {X: validation_X[:1000], 
                                       Y_: validation_Y[:1000], 
                                       keep_prob: 1.0 })
        summary_writer.add_summary(summary_str, i)
        print("step %d, validation accuracy: %g"%(i, validation_accuracy))
    train_step.run({X: train_X[rnd_idx], Y_: train_Y[rnd_idx], keep_prob: 0.5 })

In [None]:
summary_writer.close()
sess.close()

run `tensorboard --logdir=log1` in terminal and open http://localhost:6006



In [None]:
!tensorboard --logdir=log1

### 同時紀錄三種準確度

In [None]:
sess = tf.InteractiveSession()
init_op.run()
acc_summary = tf.summary.scalar("accuracy", accuracy)
training_summary_writer = tf.summary.FileWriter("log2/training", graph=sess.graph)
validation_summary_writer = tf.summary.FileWriter("log2/validation", graph=sess.graph)
testing_summary_writer = tf.summary.FileWriter("log2/testing", graph=sess.graph)


In [None]:
for i in range(5000):
    rnd_idx = np.random.choice(train_X.shape[0], 50, replace=False)
    if i%50 == 0:       
        summary_str, training_acc = sess.run([acc_summary, accuracy],
                        {X: train_X[:1000], Y_: train_Y[:1000], keep_prob: 1.0 })
        training_summary_writer.add_summary(summary_str, i)
        summary_str, validation_acc = sess.run([acc_summary, accuracy],
                        {X: validation_X[:1000], Y_: validation_Y[:1000], keep_prob: 1.0 })
        validation_summary_writer.add_summary(summary_str, i)
        summary_str, testing_acc = sess.run([acc_summary, accuracy],
                        {X: test_X[:1000], Y_: test_Y[:1000], keep_prob: 1.0 })
        testing_summary_writer.add_summary(summary_str, i)
        if i%250==0:
            print("step %d, train: %g, validation: %g, test: %g"%(i, training_acc, 
                                                              validation_acc, testing_acc))
    train_step.run({X: train_X[rnd_idx], Y_: train_Y[rnd_idx], keep_prob: 0.5 })

In [None]:
testing_summary_writer.close()
validation_summary_writer.close()
training_summary_writer.close()
sess.close()

In [None]:
!tensorboard --logdir=log2