## 참고

이거 `inputpipe.ipynb` 파일처럼 바꾸다가 귀찮아서 중간에 그만둠. 실행해보려면 마저 바꿔야함... 귀찮으면 그냥 `inputpipe.ipynb` 를 참고하자.

# Problems

* 행이 걸리는 경우가 있음
    * tfrecords 를 안만들어도 될 때 writer_io 로 열고 그냥 넘어갔음
    * 이렇게 되면 데이터가 다 지워짐
    * 이상태에서 실행하면 행이 걸림!
* TFRecords 파일을 하나로 만들면 shuffle 이 제대로 안됨
    * 이걸 여러개로 만들어줘야하는듯
    * https://stackoverflow.com/questions/35657015/tfrecords-and-record-shuffling
        * sharding your input
        * 전체를 다 로드하지 않으면 full-shuffle 은 안됨
    * https://stackoverflow.com/questions/34258043/getting-good-mixing-with-many-input-datafiles-in-tensorflow
        * `tf.train.shuffle_batch` 를 쓰게 되면 `min_after_dequeue` 를 높게 잡아줘야함. 그래야 셔플이 제대로 된다
        * `tf.train.shuffle_batch_join` 를 쓰면 여러 파일에서 인풋을 받을 수 있다. 이걸 쓰는게 좋은가봉가

## Prepare data

In [1]:
%matplotlib inline
import tensorflow as tf
import scipy
import matplotlib.pyplot as plt
slim = tf.contrib.slim

In [2]:
import os, sys, glob, shutil
import urllib
import tarfile
import numpy as np
from scipy.io import loadmat
import time

In [3]:
def download_file(url, dest=None):
    if not dest:
        dest = 'data/' + url.split('/')[-1]
    urllib.urlretrieve(url, dest)

### Download TF Flower dataset

In [4]:
LABELS = ["daisy", "dandelion", "roses", "sunflowers", "tulips"]
url = "http://download.tensorflow.org/example_images/flower_photos.tgz"

In [5]:
if not os.path.exists("data/flower_photos"):
    print("Download flower dataset..")
    download_file(url)
    print("Extracting dataset..")
    tarfile.open("data/flower_photos.tgz", "r:gz").extractall(path="data/")
#     os.remove("data/flower_photos.tgz") # 굳이...

### Split dataset into train/test

In [6]:
train_ratio = 0.9
remake = False
parent_dir = "data/flower_photos"
train_dir = os.path.join(parent_dir, "train")
test_dir = os.path.join(parent_dir, "test")

if not os.path.exists(train_dir) or not os.path.exists(test_dir) or remake:
    # make dirs
    for label in LABELS:
        # tf.gfile.MakeDirs make dir recursively & ignore exist dir
        tf.gfile.MakeDirs(os.path.join(train_dir, label))
        tf.gfile.MakeDirs(os.path.join(test_dir, label))

    # copy files
    for i, label in enumerate(LABELS):
        dir_name = os.path.join(parent_dir, label)
        paths = glob.glob(dir_name + "/*.jpg")
        num_examples = len(paths)
        for j, path in enumerate(paths):
            fn = os.path.basename(path)
            is_train = j < (num_examples * train_ratio)

            if is_train:
                to_path = os.path.join(train_dir, label, fn)
            else:
                to_path = os.path.join(test_dir, label, fn)
            
            tf.gfile.Copy(path, to_path)

In [7]:
!find ./data/flower_photos/test ./data/flower_photos/train -type f | cut -d/ -f4 | uniq -c

    364 test
   3306 train


### Convert to `TFRecords` format

In [8]:
def _bytes_features(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))


def _int64_features(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

In [9]:
def dir_to_tfrecords(dir_name, tfrecords_path):
    '''convert image-containing dir to tfrecords without exist check.
    return: # of image files
    '''
    num_files = 0
    with tf.python_io.TFRecordWriter(tfrecords_path) as writer:
        for i, label in enumerate(LABELS):
            cur_dir = os.path.join(dir_name, label)
            paths = glob.glob(cur_dir + "/*.jpg")
            num_examples = len(paths)
            for j, path in enumerate(paths):
                im = scipy.misc.imread(path)
                im = scipy.misc.imresize(im, [64, 64])

                im_raw = im.tostring()
                features = {
                    "shape": _int64_features(im.shape),
                    "image": _bytes_features([im_raw]),
                    "label": _int64_features([i])
                }

                example = tf.train.Example(features=tf.train.Features(feature=features))

                is_train = j < (num_examples * train_ratio)

                num_files += 1
                writer.write(example.SerializeToString())
    
    return num_files

In [10]:
print("Convert dataset to TFRecord format..")

tfrecords_train_fn = "data/flower_photos_train.tfrecords"
tfrecords_test_fn = "data/flower_photos_test.tfrecords"

num_train = 0
num_test = 0

remake_tfrecords = False

if not (tf.gfile.Exists(tfrecords_train_fn) and tf.gfile.Exists(tfrecords_test_fn) and remake_tfrecords == False):
    num_train = dir_to_tfrecords('data/flower_photos/train/', tfrecords_train_fn)
    num_test = dir_to_tfrecords('data/flower_photos/test/', tfrecords_test_fn)
else:
    num_train = 3306
    num_test = 364
    
# how to get num_examples from tfrecords file?
print num_train, num_test

Convert dataset to TFRecord format..
3306 364


## Read data from TFRecords

In [12]:
def read_data(filename_queue):
    with tf.variable_scope('read_data'):
        reader = tf.TFRecordReader()
        key, records = reader.read(filename_queue)
        
        # parse records
        features = tf.parse_single_example(
            records,
            features={
                "shape": tf.FixedLenFeature([3], tf.int64),
                "image": tf.FixedLenFeature([], tf.string),
                "label": tf.FixedLenFeature([], tf.int64)
            }
        )

        image = tf.decode_raw(features["image"], tf.uint8)
        shape = tf.cast(features["shape"], tf.int32)
        label = tf.cast(features["label"], tf.int32)

        # preproc
        image = tf.reshape(image, [64, 64, 3])
        image = tf.image.resize_images(images=image, size=[64, 64])
        image = tf.cast(image, tf.float32)
#         image = resized_image / 255.0
        image = tf.image.per_image_standardization(image)

        label = tf.one_hot(label, depth=5)
        
        return image, label

In [16]:
# https://www.tensorflow.org/programmers_guide/reading_data

def get_batch_join(tfrecords_path_list, batch_size, shuffle=False, num_threads=5, num_epochs=None):
    with tf.variable_scope("get_batch_join"):
        # make input pipeline
        filename_queue = tf.train.string_input_producer(tfrecords_path_list, shuffle=shuffle, num_epochs=num_epochs)
        example_list = [read_data(filename_queue) for _ in range(num_threads)]
        
        # train case (shuffle)
        min_aftter_dequeue = batch_size*10
        capacity = min_after_dequeue + 3*batch_size
        if shuffle:
            images, labels = tf.train.shuffle_batch_join(tensors_list=example_list, batch_size=batch_size,
                                                         capacity=capacity, min_after_dequeue=min_after_dequeue,
                                                         allow_smaller_final_batch=True)
        else:
            images, labels = tf.train.batch_join(example_list, batch_size, capacity=capacity, 
                                                 allow_smaller_final_batch=True)
            
        return images, labels

In [17]:
def build_nets(name, X, y, training, batch_shuffle=True):
    with tf.variable_scope(name):
#         X, y = get_batch(tfrecords_fn, batch_size=batch_size, shuffle=batch_shuffle)

        net = X
        n_filters = 32
        bn_param = {'is_training': training, 'scale': True, 'decay': 0.99}
        with slim.arg_scope([slim.conv2d], kernel_size=[3,3],
                            normalizer_fn=slim.batch_norm, normalizer_params=bn_param):
            for _ in range(3):
                net = slim.conv2d(net, n_filters)
                net = slim.conv2d(net, n_filters)
                net = slim.max_pool2d(net, kernel_size=[2,2], padding='same')
                net = slim.dropout(net, 0.7, is_training=training)
                n_filters *= 2

        flat = slim.flatten(net)
        logits = slim.fully_connected(flat, 5, activation_fn=None)
        with tf.variable_scope('softmax'):
            prob = tf.nn.softmax(logits)

        with tf.variable_scope('accuracy'):
            correct = tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1))
            accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
        with tf.variable_scope('loss'):
            loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)
            loss = tf.reduce_mean(loss)

        # must do this even with slim
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

        tf.summary.scalar("loss", loss)
        tf.summary.scalar("accuracy", accuracy)
        
        # heavy operation ...
#         for var in tf.trainable_variables():
#             tf.summary.histogram(var.op.name, var)
        summary_op = tf.summary.merge_all()

        return accuracy, loss, train_op, summary_op

SyntaxError: non-default argument follows default argument (<ipython-input-17-f6ee51bd68f0>, line 1)

In [15]:
summary_root_dir = './summary/comparison/'
summary_train_dir = os.path.join(summary_root_dir, 'train')
summary_test_dir = os.path.join(summary_root_dir, 'test')
model_name = 'full-shuffle'

In [33]:
tf.reset_default_graph()

# batch_size 를 여기서 정하는 게 이상한가...
n_epoch = 60
batch_size = 128
X, y = get_batch_join(tfrecords_train_fn, batch_size=batch_size, shuffle=True, num_epochs=n_epoch)
accuracy, loss, train_op, summary_op = build_nets(model_name, 
                                                  tfrecords_train_fn, 
                                                  training=True, 
                                                  batch_size=batch_size)

In [34]:
# sess = tf.Session()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    saver = tf.train.Saver(max_to_keep=100) # None 으로 하면 안 됨
    summary_writer = tf.summary.FileWriter(logdir=summary_train_dir, graph=sess.graph, flush_secs=10)
    
    iter_per_epoch = num_train // batch_size
    global_step = 0

    for i in range(n_epoch):
        avg_acc = 0.
        avg_loss = 0.
        st = time.time()
        for _ in range(iter_per_epoch):
            _, cur_summary, cur_acc, cur_loss = sess.run([train_op, summary_op, accuracy, loss])
            avg_acc += cur_acc
            avg_loss += cur_loss
            summary_writer.add_summary(cur_summary, global_step=global_step)
            global_step += 1
        
        avg_acc /= iter_per_epoch
        avg_loss /= iter_per_epoch
        print "epoch: {}, acc: {:.2%}, loss: {:.4f} - {:.0f}s".format(i, avg_acc, avg_loss, time.time()-st)
        saver.save(sess, 'checkpoints/flower', global_step=global_step)

    summary_writer.close()
    coord.request_stop()
    coord.join(threads)

epoch: 0, acc: 39.97%, loss: 2.7265 - 2s
epoch: 1, acc: 51.06%, loss: 1.3215 - 2s
epoch: 2, acc: 56.12%, loss: 1.2440 - 2s
epoch: 3, acc: 58.50%, loss: 1.1427 - 2s
epoch: 4, acc: 60.62%, loss: 1.0805 - 2s
epoch: 5, acc: 60.97%, loss: 1.0710 - 2s
epoch: 6, acc: 64.00%, loss: 0.9791 - 2s
epoch: 7, acc: 64.66%, loss: 0.9756 - 2s
epoch: 8, acc: 67.69%, loss: 0.9193 - 2s
epoch: 9, acc: 66.22%, loss: 0.9532 - 2s
epoch: 10, acc: 69.88%, loss: 0.8479 - 2s
epoch: 11, acc: 71.97%, loss: 0.7725 - 2s
epoch: 12, acc: 72.91%, loss: 0.7360 - 2s
epoch: 13, acc: 76.97%, loss: 0.6455 - 2s
epoch: 14, acc: 76.19%, loss: 0.6664 - 2s
epoch: 15, acc: 75.66%, loss: 0.6717 - 2s
epoch: 16, acc: 77.56%, loss: 0.6189 - 2s
epoch: 17, acc: 78.41%, loss: 0.5700 - 2s
epoch: 18, acc: 77.75%, loss: 0.6266 - 2s
epoch: 19, acc: 79.03%, loss: 0.5840 - 2s
epoch: 20, acc: 80.78%, loss: 0.5279 - 2s
epoch: 21, acc: 80.12%, loss: 0.5348 - 2s
epoch: 22, acc: 79.47%, loss: 0.5476 - 2s
epoch: 23, acc: 81.22%, loss: 0.5066 - 2s
ep

## Evaluation

In [35]:
# build evaluation graph
tf.reset_default_graph()
accuracy, loss, train_op, summary_op = build_nets(model_name, tfrecords_test_fn, training=False, 
                                                  batch_size=num_test, batch_shuffle=False)

In [36]:
with tf.Session() as sess:
    saver = tf.train.Saver() 
    ckpt = tf.train.get_checkpoint_state("checkpoints/")
    print "=== checkpoints ==="
    print ckpt
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    summary_writer = tf.summary.FileWriter(logdir=summary_test_dir, graph=sess.graph, flush_secs=10)
    
    for v in ckpt.all_model_checkpoint_paths:
        saver.restore(sess, v)
        global_step = v.split('/')[-1].split('-')[-1]

        cur_summary, cur_acc, cur_loss = sess.run([summary_op, accuracy, loss])
        summary_writer.add_summary(cur_summary, global_step=global_step)

        print global_step, cur_acc, cur_loss
    
    summary_writer.close()
    coord.request_stop()
    coord.join(threads)

=== checkpoints ===
model_checkpoint_path: "checkpoints/flower-1500"
all_model_checkpoint_paths: "checkpoints/flower-25"
all_model_checkpoint_paths: "checkpoints/flower-50"
all_model_checkpoint_paths: "checkpoints/flower-75"
all_model_checkpoint_paths: "checkpoints/flower-100"
all_model_checkpoint_paths: "checkpoints/flower-125"
all_model_checkpoint_paths: "checkpoints/flower-150"
all_model_checkpoint_paths: "checkpoints/flower-175"
all_model_checkpoint_paths: "checkpoints/flower-200"
all_model_checkpoint_paths: "checkpoints/flower-225"
all_model_checkpoint_paths: "checkpoints/flower-250"
all_model_checkpoint_paths: "checkpoints/flower-275"
all_model_checkpoint_paths: "checkpoints/flower-300"
all_model_checkpoint_paths: "checkpoints/flower-325"
all_model_checkpoint_paths: "checkpoints/flower-350"
all_model_checkpoint_paths: "checkpoints/flower-375"
all_model_checkpoint_paths: "checkpoints/flower-400"
all_model_checkpoint_paths: "checkpoints/flower-425"
all_model_checkpoint_paths: "chec

1450 0.739011 1.04018
INFO:tensorflow:Restoring parameters from checkpoints/flower-1475
1475 0.730769 1.17497
INFO:tensorflow:Restoring parameters from checkpoints/flower-1500
1500 0.763736 1.03598


In [None]:
# [n.name for n in tf.get_default_graph().as_graph_def().node]

# Why TF version works worse than Keras version?

* 먼저 데이터셋을 확인해보자.

In [None]:
tf.reset_default_graph()
X, y = get_batch(tfrecords_train_fn, batch_size=128, shuffle=True)

# sess = tf.Session()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    cur_X, cur_y = sess.run([X, y])

    coord.request_stop()
    coord.join(threads)

In [None]:
cur_X.shape, cur_y.shape

In [None]:
np.max(cur_X), np.min(cur_X)

In [None]:
cur_y