# Problems

* 행이 걸리는 경우가 있음
    * tfrecords 를 안만들어도 될 때 writer_io 로 열고 그냥 넘어갔음
    * 이렇게 되면 데이터가 다 지워짐
    * 이상태에서 실행하면 행이 걸림!

## Prepare data

In [1]:
%matplotlib inline
import tensorflow as tf
import scipy
import matplotlib.pyplot as plt
slim = tf.contrib.slim

In [2]:
import os
import sys
import glob
import urllib
import tarfile
import numpy as np
from scipy.io import loadmat

In [3]:
def download_file(url, dest=None):
    if not dest:
        dest = 'data/' + url.split('/')[-1]
    urllib.urlretrieve(url, dest)

### TF Flower example

In [4]:
LABELS = ["daisy", "dandelion", "roses", "sunflowers", "tulips"]
url = "http://download.tensorflow.org/example_images/flower_photos.tgz"

In [5]:
if not os.path.exists("data/flower_photos"):
    print("Download flower dataset..")
    download_file(url)
    print("Extracting dataset..")
    tarfile.open("data/flower_photos.tgz", "r:gz").extractall(path="data/")
#     os.remove("data/flower_photos.tgz") # 굳이...

In [6]:
def _bytes_features(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))


def _int64_features(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

In [8]:
print("Convert dataset to TFRecord format..")

# 귀찮으니 valid 는 빼자
train_ratio = 0.9

tfrecords_train_fn = "data/flower_photos_train.tfrecords"
tfrecords_test_fn = "data/flower_photos_test.tfrecords"

num_train = 0
num_test = 0

remake_tfrecords = False

if not (tf.gfile.Exists(tfrecords_train_fn) and tf.gfile.Exists(tfrecords_test_fn) and remake_tfrecords == False):
    train_writer = tf.python_io.TFRecordWriter(tfrecords_train_fn)
    test_writer = tf.python_io.TFRecordWriter(tfrecords_test_fn)
    
    for i, label in enumerate(LABELS):
        dir_name = os.path.join("data/flower_photos", label)
        paths = glob.glob(dir_name + "/*.jpg")
        num_examples = len(paths)
        for j, path in enumerate(paths):
            im = scipy.misc.imread(path)
            im = scipy.misc.imresize(im, [64, 64])

            im_raw = im.tostring()
            features = {
                "shape": _int64_features(im.shape),
                "image": _bytes_features([im_raw]),
                "label": _int64_features([i])
            }

            example = tf.train.Example(features=tf.train.Features(feature=features))

            is_train = j < (num_examples * train_ratio)

            if is_train:
                num_train += 1
                train_writer.write(example.SerializeToString())
            else:
                num_test += 1
                test_writer.write(example.SerializeToString())

    train_writer.close()
    test_writer.close()
else:
    num_train = 3306
    num_test = 364
    
# how to get num_examples from tfrecords file?
print num_train, num_test

Convert dataset to TFRecord format..
3306 364


## Read data from TFRecords

In [9]:
def get_batch(tfrecords_fn, batch_size=100, shuffle=False):
    with tf.variable_scope("get_batch"):
        # make input pipeline
        filename_queue = tf.train.string_input_producer([tfrecords_fn])
        reader = tf.TFRecordReader()
        key, records = reader.read(filename_queue)

        features = tf.parse_single_example(
            records,
            features={
        #         "height": tf.FixedLenFeature([], tf.int64),
        #         "width": tf.FixedLenFeature([], tf.int64),
                "shape": tf.FixedLenFeature([3], tf.int64),
                "image": tf.FixedLenFeature([], tf.string),
                "label": tf.FixedLenFeature([], tf.int64)
            }
        )

        image = tf.decode_raw(features["image"], tf.uint8)
        shape = tf.cast(features["shape"], tf.int32)
        label = tf.cast(features["label"], tf.int32)

        image = tf.reshape(image, [64, 64, 3])
        resized_image = tf.image.resize_images(images=image, size=[64, 64])
        resized_image = tf.cast(resized_image, tf.float32)
        resized_image = resized_image / 127.5 - 1.0 
#         resized_image = tf.image.per_image_standardization(resized_image)
        # 사실 이 normalization 을 굳이 여기서 해 줄 필요는 없을 것 같기는 함
        # 다만 TF docs 에는 fn_queue - reader - decoder - preprocessing 으로 되어 있으니 그 위치가 여기는 맞음

        one_hot_label = tf.one_hot(label, depth=5)

        # 여기 들어오는 resized_image 는 fixed_size 이어야 함 (same size)
        # 생각해보니 하나의 텐서로 mini-batch 가 구성되어야 하니 당연한 것 같기는 하네.
        # Q. FCN 같은건 그럼 어떻게 구현하지?
        min_after_dequeue = batch_size * 10 # recommended from cs20si 
        capacity = min_after_dequeue + batch_size * 3 # recommended from tf official docs
        params = {
            'tensors': [resized_image, one_hot_label],
            'batch_size': batch_size,
            'capacity': capacity,
            'num_threads': 1,
            'allow_smaller_final_batch': True
        }
#         'min_after_dequeue': min_after_dequeue,
        
        if shuffle:
            params['min_after_dequeue'] = min_after_dequeue
            images, labels = tf.train.shuffle_batch(**params)
        else:
            images, labels = tf.train.batch(**params)
        
#             images, labels = tf.train.shuffle_batch(
#                 [resized_image, one_hot_label],
#                 batch_size=batch_size,
#                 capacity=capacity,
#                 num_threads=1,
#                 min_after_dequeue=min_after_dequeue,
#                 allow_smaller_final_batch=True)

        return images, labels

In [10]:
# graph 에 어차피 박아넣을거라면 training 이 placeholder 일 필요가 없음
def build_nets(tfrecords_fn, training, batch_size=100, batch_shuffle=True):
    with tf.variable_scope("build_nets"):
        X, y = get_batch(tfrecords_fn, batch_size=batch_size, shuffle=batch_shuffle)

        net = X
        n_filters = 64
        bn_param = {'is_training': training, 'scale': True, 'decay': 0.99}
        with slim.arg_scope([slim.conv2d], kernel_size=[3,3],
                            normalizer_fn=slim.batch_norm, normalizer_params=bn_param):
            for _ in range(4):
                net = slim.conv2d(net, n_filters)
                net = slim.conv2d(net, n_filters)
                net = slim.max_pool2d(net, kernel_size=[2,2], padding='same')
                n_filters *= 2

        # l0: [64,64,3]
        # l1: [32, 32, 64]
        # l2: [16, 16, 128]
        # l3: [8, 8, 256]
        # l4: [4, 4, 512]

        flat = slim.flatten(net)
        fc = slim.fully_connected(flat, 1024, normalizer_fn=slim.batch_norm, normalizer_params=bn_param)
        logits = slim.fully_connected(fc, 5, activation_fn=None)
        with tf.variable_scope('softmax'):
            prob = tf.nn.softmax(logits)

        with tf.variable_scope('accuracy'):
            correct = tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1))
            accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
        with tf.variable_scope('loss'):
            loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)
            loss = tf.reduce_mean(loss)

        # must do this even with slim
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

        tf.summary.scalar("loss", loss)
        tf.summary.scalar("accuracy", accuracy)
        for var in tf.trainable_variables():
            tf.summary.histogram(var.name.replace(":", "_"), var)
        summary_op = tf.summary.merge_all()

        return accuracy, loss, train_op, summary_op

In [11]:
tf.reset_default_graph()

# batch_size 를 여기서 정하는 게 이상한가...
accuracy, loss, train_op, summary_op = build_nets(tfrecords_train_fn, training=True, batch_size=128)

In [12]:
# sess = tf.Session()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    # 에러의 이유를 알아낸 것 같다.
    # https://stackoverflow.com/questions/37632102/tensorflow-trouble-re-opening-queues-after-restoring-a-session
    # input-pipeline 을 사용한 경우에는 이렇게 그냥 다 저장하면 안되고 var_list 를 만들어서 저장해줘야함.
    # => 그게문제가 아니었음 -_-
    saver = tf.train.Saver(max_to_keep=100) # None 으로 하면 안 되네
    summary_writer = tf.summary.FileWriter(logdir='./summary/train', graph=sess.graph, flush_secs=10)
    
    n_iter = 1800

    for i in range(n_iter):
        _, cur_summary, cur_acc, cur_loss = sess.run([train_op, summary_op, accuracy, loss])
        summary_writer.add_summary(cur_summary, global_step=i)
        
        if i % 60 == 0 or i == n_iter-1: # 30 번이 1에퐄
            print i, cur_acc, cur_loss
            saver.save(sess, 'checkpoints/flower', global_step=i)

    coord.request_stop()
    coord.join(threads)

0 0.617188 1.14451
60 0.585938 1.26716
120 0.5 1.5387
180 0.539062 1.20299
240 0.617188 1.00528
300 0.601562 0.898494
360 0.601562 1.04775
420 0.742188 0.836873
480 0.726562 0.710021
540 0.648438 0.83435
600 0.789062 0.55791
660 0.726562 0.754714
720 0.828125 0.516672
780 0.789062 0.503422
840 0.84375 0.394094
900 0.875 0.319184
960 0.960938 0.186435
1020 0.953125 0.175627
1080 0.929688 0.186613
1140 0.953125 0.190176
1200 0.9375 0.136219
1260 0.992188 0.0327191
1320 0.96875 0.0799625
1380 0.992188 0.0310558
1440 0.992188 0.0161073
1500 0.992188 0.0147804
1560 0.992188 0.021153
1620 1.0 0.00017167
1680 1.0 0.000622992
1740 1.0 0.000266943
1799 1.0 0.000608409


## Evaluation

In [13]:
# build evaluation graph
tf.reset_default_graph()
accuracy, loss, train_op, summary_op = build_nets(tfrecords_test_fn, training=False, 
                                                  batch_size=num_test, batch_shuffle=False)

In [14]:
with tf.Session() as sess:
#     sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver() 
    ckpt = tf.train.get_checkpoint_state("checkpoints/")
    print "=== checkpoints ==="
    print ckpt
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    summary_writer = tf.summary.FileWriter(logdir='./summary/test', graph=sess.graph, flush_secs=10)
    
    for v in ckpt.all_model_checkpoint_paths:
        saver.restore(sess, v)
        global_step = v.split('/')[-1].split('-')[-1]
        
        n_iter = int(np.ceil(num_test/800.))

        cur_summary, cur_acc, cur_loss = sess.run([summary_op, accuracy, loss])
        summary_writer.add_summary(cur_summary, global_step=global_step)

        print global_step, cur_acc, cur_loss

    coord.request_stop()
    coord.join(threads)

=== checkpoints ===
model_checkpoint_path: "checkpoints/flower-1799"
all_model_checkpoint_paths: "checkpoints/flower-0"
all_model_checkpoint_paths: "checkpoints/flower-60"
all_model_checkpoint_paths: "checkpoints/flower-120"
all_model_checkpoint_paths: "checkpoints/flower-180"
all_model_checkpoint_paths: "checkpoints/flower-240"
all_model_checkpoint_paths: "checkpoints/flower-300"
all_model_checkpoint_paths: "checkpoints/flower-360"
all_model_checkpoint_paths: "checkpoints/flower-420"
all_model_checkpoint_paths: "checkpoints/flower-480"
all_model_checkpoint_paths: "checkpoints/flower-540"
all_model_checkpoint_paths: "checkpoints/flower-600"
all_model_checkpoint_paths: "checkpoints/flower-660"
all_model_checkpoint_paths: "checkpoints/flower-720"
all_model_checkpoint_paths: "checkpoints/flower-780"
all_model_checkpoint_paths: "checkpoints/flower-840"
all_model_checkpoint_paths: "checkpoints/flower-900"
all_model_checkpoint_paths: "checkpoints/flower-960"
all_model_checkpoint_paths: "chec

In [None]:
[n.name for n in tf.get_default_graph().as_graph_def().node]