# Using tf.data

https://www.tensorflow.org/programmers_guide/datasets

In [None]:
from PIL import Image
import numpy as np
import tensorflow as tf

In [None]:
dataset = tf.data.Dataset.range(10)
print(dataset.output_types)
print(dataset.output_shapes)

iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
    for i in range(10):
        print(sess.run(next_element))

### make_initializable_iterator

In [None]:
tf.reset_default_graph()
min_value = tf.placeholder(tf.int64, shape=[])
max_value = tf.placeholder(tf.int64, shape=[])
dataset = tf.data.Dataset.range(min_value, max_value)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()

with tf.Session() as sess:
    # Initialize an iterator over a dataset with 10 elements.
    sess.run(iterator.initializer, feed_dict={min_value: 0, max_value: 5})
    for i in range(5):
        print(sess.run(next_element))

    # Initialize the same iterator over a dataset with 10 elements.
    sess.run(iterator.initializer, feed_dict={min_value: 100, max_value: 105})
    for i in range(5):
        value = sess.run(next_element)
        print(value)

## from_tensor_slices

In [None]:
dataset = tf.data.Dataset.from_tensor_slices(tf.random_uniform([10, 3]))

print(dataset.output_types)
print(dataset.output_shapes)

iterator = dataset.make_initializable_iterator()

next_element = iterator.get_next()
with tf.Session() as sess:
    sess.run(iterator.initializer)
    for i in range(10):
        print(sess.run(next_element))

## Q
計算 $1+2+...+10$

### Reinitializable (one interator with different datasets)

In [None]:
# Define training and validation datasets with the same structure.
training_dataset = tf.data.Dataset.range(10).map(
    lambda x: x + tf.random_uniform([], -10, 10, tf.int64))
validation_dataset = tf.data.Dataset.range(5)

# two dataset are compatible
assert training_dataset.output_types == validation_dataset.output_types
assert training_dataset.output_shapes == validation_dataset.output_shapes

iterator = tf.data.Iterator.from_structure(training_dataset.output_types,
                                           training_dataset.output_shapes)
next_element = iterator.get_next()

training_init_op = iterator.make_initializer(training_dataset)
validation_init_op = iterator.make_initializer(validation_dataset)

def loop_through_dataset(ds_name, n):
        for _ in range(n):
            print(ds_name, _, sess.run(next_element))

with tf.Session() as sess:
    for epoch in range(3):    
        print("epoch", epoch)
        # training
        sess.run(training_init_op)
        loop_through_dataset("train", 10)

        # Validation
        sess.run(validation_init_op)
        loop_through_dataset("validation", 5)


## MNIST Dataset

In [None]:
import lzma
import pickle
with lzma.open("mnist.pkl.xz", 'rb') as f:
    train_set, validation_set, test_set = pickle.load(f, encoding='latin1')

In [None]:
train_X, train_y = train_set
validation_X, validation_y = validation_set
test_X, test_y = test_set
train_Y = np.eye(10)[train_y]
test_Y = np.eye(10)[test_y]
validation_Y = np.eye(10)[validation_y]

In [None]:
from IPython.display import display
def showX(X):
    int_X = (X*255).clip(0,255).astype('uint8')
    # N*784 -> N*28*28 -> 28*N*28 -> 28 * 28N
    int_X_reshape = int_X.reshape(-1,28,28).swapaxes(0,1).reshape(28,-1)
    display(Image.fromarray(int_X_reshape))
# 訓練資料， X 的前 20 筆
showX(train_X[:20])
print(train_y)

In [None]:
train_data = tf.data.Dataset.from_tensor_slices((train_X, train_Y))

iterator = train_data.batch(4).make_initializable_iterator()

next_minibatch = iterator.get_next()
with tf.Session() as sess:
    sess.run(iterator.initializer)
    for i in range(3):
        print(sess.run(next_minibatch)[1])

# Multilayer Convolutional Network

In [None]:
iterator = train_data.batch(32).make_initializable_iterator()
X, Y_ = iterator.get_next()

In [None]:
# 設定 weight 和 bais
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name ='W')
def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial, name = 'b')

# 設定 cnn 的 layers
def conv2d(X, W):
    return tf.nn.conv2d(X, W, strides=[1,1,1,1], padding='SAME')
def max_pool_2x2(X):
    return tf.nn.max_pool(X, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

# fisrt layer
with tf.name_scope('conv1'):
    ## variables
    W_conv1 = weight_variable([3,3,1,32])
    b_conv1 = bias_variable([32])
    ## build the layer
    X_image = tf.reshape(X, [-1, 28, 28, 1])
    h_conv1 = tf.nn.relu(conv2d(X_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

# second layer
with tf.name_scope('conv2'):
    ## variables
    W_conv2 = weight_variable([3,3,32,64])
    b_conv2 = bias_variable([64])
    ## build the layer
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)
    
# fully-connected layer
with tf.name_scope('full'):
    W_fc1 = weight_variable([7*7*64, 1024])
    b_fc1 = bias_variable([1024])
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1)+b_fc1)
    
# Dropout:  A Simple Way to Prevent Neural Networks from Over fitting
# https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf
with tf.name_scope('dropout'):
    keep_prob = tf.placeholder("float", name="keep_prob")
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# Readout
with tf.name_scope('readout'):
    W_fc2 = weight_variable([1024,10])
    b_fc2 = bias_variable([10])
    Y = tf.matmul(h_fc1_drop, W_fc2)+b_fc2
    
cross_entropy =  tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y_, logits=Y))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
prediction = tf.argmax(Y, 1, name="prediction")
correct_prediction = tf.equal(prediction, tf.argmax(Y_, 1), name="correction")
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy")

In [None]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
sess.run(iterator.initializer)
for i in range(50):
    print(i)
    sess.run(train_step, {keep_prob: 0.5 })