# Problems

* 행이 걸리는 경우가 있음
    * tfrecords 를 안만들어도 될 때 writer_io 로 열고 그냥 넘어갔음
    * 이렇게 되면 데이터가 다 지워짐
    * 이상태에서 실행하면 행이 걸림!

## Prepare data

In [1]:
%matplotlib inline
import tensorflow as tf
import scipy
import matplotlib.pyplot as plt
slim = tf.contrib.slim

In [23]:
import os, sys, glob, shutil
import urllib
import tarfile
import numpy as np
from scipy.io import loadmat
import time

In [3]:
def download_file(url, dest=None):
    if not dest:
        dest = 'data/' + url.split('/')[-1]
    urllib.urlretrieve(url, dest)

### Download TF Flower dataset

In [4]:
LABELS = ["daisy", "dandelion", "roses", "sunflowers", "tulips"]
url = "http://download.tensorflow.org/example_images/flower_photos.tgz"

In [5]:
if not os.path.exists("data/flower_photos"):
    print("Download flower dataset..")
    download_file(url)
    print("Extracting dataset..")
    tarfile.open("data/flower_photos.tgz", "r:gz").extractall(path="data/")
#     os.remove("data/flower_photos.tgz") # 굳이...

### Split dataset into train/test

In [6]:
train_ratio = 0.9
remake = False
parent_dir = "data/flower_photos"
train_dir = os.path.join(parent_dir, "train")
test_dir = os.path.join(parent_dir, "test")

if not os.path.exists(train_dir) or not os.path.exists(test_dir) or remake:
    # make dirs
    for label in LABELS:
        # tf.gfile.MakeDirs make dir recursively & ignore exist dir
        tf.gfile.MakeDirs(os.path.join(train_dir, label))
        tf.gfile.MakeDirs(os.path.join(test_dir, label))

    # copy files
    for i, label in enumerate(LABELS):
        dir_name = os.path.join(parent_dir, label)
        paths = glob.glob(dir_name + "/*.jpg")
        num_examples = len(paths)
        for j, path in enumerate(paths):
            fn = os.path.basename(path)
            is_train = j < (num_examples * train_ratio)

            if is_train:
                to_path = os.path.join(train_dir, label, fn)
            else:
                to_path = os.path.join(test_dir, label, fn)
            
            tf.gfile.Copy(path, to_path)

In [7]:
!find ./data/flower_photos/test ./data/flower_photos/train -type f | cut -d/ -f4 | uniq -c

    364 test
   3306 train


### Convert to `TFRecords` format

In [8]:
def _bytes_features(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))


def _int64_features(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

In [9]:
def dir_to_tfrecords(dir_name, tfrecords_path):
    '''convert image-containing dir to tfrecords without exist check.
    return: # of image files
    '''
    num_files = 0
    with tf.python_io.TFRecordWriter(tfrecords_path) as writer:
        for i, label in enumerate(LABELS):
            cur_dir = os.path.join(dir_name, label)
            paths = glob.glob(cur_dir + "/*.jpg")
            num_examples = len(paths)
            for j, path in enumerate(paths):
                im = scipy.misc.imread(path)
                im = scipy.misc.imresize(im, [64, 64])

                im_raw = im.tostring()
                features = {
                    "shape": _int64_features(im.shape),
                    "image": _bytes_features([im_raw]),
                    "label": _int64_features([i])
                }

                example = tf.train.Example(features=tf.train.Features(feature=features))

                is_train = j < (num_examples * train_ratio)

                num_files += 1
                writer.write(example.SerializeToString())
    
    return num_files

In [10]:
print("Convert dataset to TFRecord format..")

tfrecords_train_fn = "data/flower_photos_train.tfrecords"
tfrecords_test_fn = "data/flower_photos_test.tfrecords"

num_train = 0
num_test = 0

remake_tfrecords = False

if not (tf.gfile.Exists(tfrecords_train_fn) and tf.gfile.Exists(tfrecords_test_fn) and remake_tfrecords == False):
    num_train = dir_to_tfrecords('data/flower_photos/train/', tfrecords_train_fn)
    num_test = dir_to_tfrecords('data/flower_photos/test/', tfrecords_test_fn)
else:
    num_train = 3306
    num_test = 364
    
# how to get num_examples from tfrecords file?
print num_train, num_test

Convert dataset to TFRecord format..
3306 364


## Read data from TFRecords

In [33]:
def get_batch(tfrecords_fn, batch_size, shuffle=False):
    with tf.variable_scope("get_batch"):
        # make input pipeline
        filename_queue = tf.train.string_input_producer([tfrecords_fn])
        reader = tf.TFRecordReader()
        key, records = reader.read(filename_queue)

        features = tf.parse_single_example(
            records,
            features={
                "shape": tf.FixedLenFeature([3], tf.int64),
                "image": tf.FixedLenFeature([], tf.string),
                "label": tf.FixedLenFeature([], tf.int64)
            }
        )

        image = tf.decode_raw(features["image"], tf.uint8)
        shape = tf.cast(features["shape"], tf.int32)
        label = tf.cast(features["label"], tf.int32)

        image = tf.reshape(image, [64, 64, 3])
        resized_image = tf.image.resize_images(images=image, size=[64, 64])
        resized_image = tf.cast(resized_image, tf.float32)
        resized_image = resized_image / 255.0
#         resized_image = resized_image / 127.5 - 1.0 
#         resized_image = tf.image.per_image_standardization(resized_image)
        # 사실 이 normalization 을 굳이 여기서 해 줄 필요는 없을 것 같기는 함
        # 다만 TF docs 에는 fn_queue - reader - decoder - preprocessing 으로 되어 있으니 그 위치가 여기는 맞음

        one_hot_label = tf.one_hot(label, depth=5)

        # 여기 들어오는 resized_image 는 fixed_size 이어야 함 (same size)
        # 생각해보니 하나의 텐서로 mini-batch 가 구성되어야 하니 당연한 것 같기는 하네.
        # Q. FCN 같은건 그럼 어떻게 구현하지?
        min_after_dequeue = batch_size * 10 # recommended from cs20si 
        capacity = min_after_dequeue + batch_size * 3 # recommended from tf official docs
        params = {
            'tensors': [resized_image, one_hot_label],
            'batch_size': batch_size,
            'capacity': capacity,
            'num_threads': 1,
            'allow_smaller_final_batch': True
        }
        
        if shuffle:
            params['min_after_dequeue'] = min_after_dequeue
            images, labels = tf.train.shuffle_batch(**params)
        else:
            images, labels = tf.train.batch(**params)
        
#             images, labels = tf.train.shuffle_batch(
#                 [resized_image, one_hot_label],
#                 batch_size=batch_size,
#                 capacity=capacity,
#                 num_threads=1,
#                 min_after_dequeue=min_after_dequeue,
#                 allow_smaller_final_batch=True)

        return images, labels

In [34]:
def build_nets(tfrecords_fn, training, batch_size, batch_shuffle=True):
    with tf.variable_scope("build_nets"):
        X, y = get_batch(tfrecords_fn, batch_size=batch_size, shuffle=batch_shuffle)

        net = X
        n_filters = 32
        bn_param = {'is_training': training, 'scale': True, 'decay': 0.99}
        with slim.arg_scope([slim.conv2d], kernel_size=[3,3],
                            normalizer_fn=slim.batch_norm, normalizer_params=bn_param):
            for _ in range(3):
                net = slim.conv2d(net, n_filters)
                net = slim.conv2d(net, n_filters)
                net = slim.max_pool2d(net, kernel_size=[2,2], padding='same')
                net = slim.dropout(net, 0.7, is_training=training)
                n_filters *= 2

        flat = slim.flatten(net)
        logits = slim.fully_connected(flat, 5, activation_fn=None)
        with tf.variable_scope('softmax'):
            prob = tf.nn.softmax(logits)

        with tf.variable_scope('accuracy'):
            correct = tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1))
            accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
        with tf.variable_scope('loss'):
            loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)
            loss = tf.reduce_mean(loss)

        # must do this even with slim
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

        tf.summary.scalar("loss", loss)
        tf.summary.scalar("accuracy", accuracy)
        
        # heavy operation ...
#         for var in tf.trainable_variables():
#             tf.summary.histogram(var.name.replace(":", "_"), var)
        summary_op = tf.summary.merge_all()

        return accuracy, loss, train_op, summary_op

In [35]:
tf.reset_default_graph()

# batch_size 를 여기서 정하는 게 이상한가...
batch_size = 128
accuracy, loss, train_op, summary_op = build_nets(tfrecords_train_fn, training=True, batch_size=batch_size)

In [36]:
# sess = tf.Session()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    saver = tf.train.Saver(max_to_keep=100) # None 으로 하면 안 됨
    summary_writer = tf.summary.FileWriter(logdir='./summary/train', graph=sess.graph, flush_secs=10)
    
    n_epoch = 60
    iter_per_epoch = num_train // batch_size

    for i in range(n_epoch):
        avg_acc = 0.
        avg_loss = 0.
        st = time.time()
        for _ in range(iter_per_epoch):
            _, cur_summary, cur_acc, cur_loss = sess.run([train_op, summary_op, accuracy, loss])
            avg_acc += cur_acc
            avg_loss += cur_loss
            summary_writer.add_summary(cur_summary, global_step=i)
        
        avg_acc /= iter_per_epoch
        avg_loss /= iter_per_epoch
        print "epoch: {}, acc: {:.2%}, loss: {:.4f} - {:.0f}s".format(i, avg_acc, avg_loss, time.time()-st)
        saver.save(sess, 'checkpoints/flower', global_step=i)

    coord.request_stop()
    coord.join(threads)

epoch: 0, acc: 33.12%, loss: 6.0187 - 3s
epoch: 1, acc: 28.22%, loss: 3.6577 - 3s
epoch: 2, acc: 29.47%, loss: 2.7814 - 3s
epoch: 3, acc: 30.56%, loss: 2.3881 - 3s
epoch: 4, acc: 29.62%, loss: 2.2485 - 3s
epoch: 5, acc: 30.00%, loss: 2.2522 - 3s
epoch: 6, acc: 28.50%, loss: 2.2389 - 3s
epoch: 7, acc: 32.62%, loss: 1.9379 - 3s
epoch: 8, acc: 38.94%, loss: 1.7381 - 3s
epoch: 9, acc: 42.47%, loss: 1.6688 - 3s
epoch: 10, acc: 43.47%, loss: 1.8308 - 3s
epoch: 11, acc: 44.09%, loss: 1.6163 - 3s
epoch: 12, acc: 46.53%, loss: 1.5229 - 3s
epoch: 13, acc: 48.12%, loss: 1.5584 - 3s
epoch: 14, acc: 43.72%, loss: 1.5569 - 3s
epoch: 15, acc: 42.53%, loss: 1.5322 - 3s
epoch: 16, acc: 45.12%, loss: 1.3845 - 3s
epoch: 17, acc: 46.56%, loss: 1.3609 - 3s
epoch: 18, acc: 50.00%, loss: 1.2463 - 3s
epoch: 19, acc: 50.06%, loss: 1.2234 - 3s
epoch: 20, acc: 51.53%, loss: 1.2144 - 3s
epoch: 21, acc: 52.72%, loss: 1.2221 - 3s
epoch: 22, acc: 50.44%, loss: 1.2387 - 3s
epoch: 23, acc: 52.28%, loss: 1.1629 - 3s
ep

## Evaluation

In [37]:
# build evaluation graph
tf.reset_default_graph()
accuracy, loss, train_op, summary_op = build_nets(tfrecords_test_fn, training=False, 
                                                  batch_size=num_test, batch_shuffle=False)

In [38]:
with tf.Session() as sess:
#     sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver() 
    ckpt = tf.train.get_checkpoint_state("checkpoints/")
    print "=== checkpoints ==="
    print ckpt
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    summary_writer = tf.summary.FileWriter(logdir='./summary/test', graph=sess.graph, flush_secs=10)
    
    for v in ckpt.all_model_checkpoint_paths:
        saver.restore(sess, v)
        global_step = v.split('/')[-1].split('-')[-1]
        
#         n_iter = int(np.ceil(num_test/800.))

        cur_summary, cur_acc, cur_loss = sess.run([summary_op, accuracy, loss])
        summary_writer.add_summary(cur_summary, global_step=global_step)

        print global_step, cur_acc, cur_loss

    coord.request_stop()
    coord.join(threads)

=== checkpoints ===
model_checkpoint_path: "checkpoints/flower-59"
all_model_checkpoint_paths: "checkpoints/flower-0"
all_model_checkpoint_paths: "checkpoints/flower-1"
all_model_checkpoint_paths: "checkpoints/flower-2"
all_model_checkpoint_paths: "checkpoints/flower-3"
all_model_checkpoint_paths: "checkpoints/flower-4"
all_model_checkpoint_paths: "checkpoints/flower-5"
all_model_checkpoint_paths: "checkpoints/flower-6"
all_model_checkpoint_paths: "checkpoints/flower-7"
all_model_checkpoint_paths: "checkpoints/flower-8"
all_model_checkpoint_paths: "checkpoints/flower-9"
all_model_checkpoint_paths: "checkpoints/flower-10"
all_model_checkpoint_paths: "checkpoints/flower-11"
all_model_checkpoint_paths: "checkpoints/flower-12"
all_model_checkpoint_paths: "checkpoints/flower-13"
all_model_checkpoint_paths: "checkpoints/flower-14"
all_model_checkpoint_paths: "checkpoints/flower-15"
all_model_checkpoint_paths: "checkpoints/flower-16"
all_model_checkpoint_paths: "checkpoints/flower-17"
all_mod

In [17]:
# [n.name for n in tf.get_default_graph().as_graph_def().node]