## Load flower dataset

In [1]:
import os
import glob

DATA_DIR = "/tmp/flower_photos"
LABELS = ["daisy", "dandelion", "roses", "sunflowers", "tulips"]

for label in LABELS:
    l = glob.glob(os.path.join(DATA_DIR, label)+"/*.jpg")
    print(label, len(l))

daisy 633
dandelion 898
roses 641
sunflowers 699
tulips 799


Although flower dataset has unbalanced data in each label, it might not major problem.<br>
So for simplicity, we're goona just shuffle this dataset and split into train/test.

In [2]:
flower_list = list()
for label in LABELS:
    files = glob.glob(os.path.join(DATA_DIR, label)+"/*.jpg")
    for file in files:
        flower_list.append({"label": label, "path": file})
        
print(flower_list[:5])

[{'label': 'daisy', 'path': '/tmp/flower_photos/daisy/1879567877_8ed2a5faa7_n.jpg'}, {'label': 'daisy', 'path': '/tmp/flower_photos/daisy/3445110406_0c1616d2e3_n.jpg'}, {'label': 'daisy', 'path': '/tmp/flower_photos/daisy/517054463_036db655a1_m.jpg'}, {'label': 'daisy', 'path': '/tmp/flower_photos/daisy/3706420943_66f3214862_n.jpg'}, {'label': 'daisy', 'path': '/tmp/flower_photos/daisy/12193032636_b50ae7db35_n.jpg'}]


### Load all imagese
Load image is really slow job. So instead load images in train stage, first load all image in here and just use loaded image in train stage.

In [3]:
import util

for i, flower in enumerate(flower_list):
    im = util.load_and_preprocess_image(flower["path"], [224, 224])
    flower_list[i]["img"] = im

In [4]:
import random

# split train/test : 0.7/0.3
data = dict()

random.shuffle(flower_list)
num_train = int(len(flower_list) * 0.7)
data["train"] = flower_list[:num_train]
data["test"]  = flower_list[num_train:]

print(len(data["train"]), len(data["test"]))

2569 1101


## Build CNN using tensorflow.contrib.slim
What is slim? Just see [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim)

In [5]:
import numpy as np
import tensorflow as tf
slim = tf.contrib.slim

def my_arg_scope():
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                         activation_fn=tf.nn.relu,
                         weights_initializer=slim.variance_scaling_initializer(),
                         biases_initializer=tf.zeros_initializer):
        with slim.arg_scope([slim.conv2d], padding="SAME") as arg_sc:
            return arg_sc

        
def build_model( inputs, is_training ):
    with slim.arg_scope(my_arg_scope()):
        net = slim.conv2d(inputs, 64, [3, 3], scope="conv1")
        net = slim.max_pool2d(net, [2, 2], scope="pool1")
        net = slim.conv2d(net, 128, [3, 3], scope="conv2")
        net = slim.max_pool2d(net, [2, 2], scope="pool2")
        net = slim.conv2d(net, 256, [3, 3], scope="conv3")
        net = slim.max_pool2d(net, [2, 2], scope="pool3")      
        net = slim.flatten(net, scope="flatten")
        net = slim.fully_connected(net, 1024, scope="fc1")
        net = slim.dropout(net, 0.5, is_training=is_training, scope="dropout1")
        net = slim.fully_connected(net, 5, activation_fn=None, scope="fc2")
        
    return net

In [6]:
def create_model( lr ):
    model = dict()
    
    g = tf.Graph()
    with g.as_default():
        inputs = tf.placeholder(tf.float32, [None, 224, 224, 3], name="inputs")
        labels = tf.placeholder(tf.int32, [None], name="labels")
        is_training = tf.placeholder(tf.bool, name="is_training")

        pred = build_model(inputs, is_training)

        one_hot_labels = slim.one_hot_encoding(labels, 5)
        slim.losses.softmax_cross_entropy(pred, one_hot_labels)
        total_loss = slim.losses.get_total_loss()
        opt = tf.train.AdamOptimizer(lr).minimize(total_loss)

        config = tf.ConfigProto(allow_soft_placement=True,
                        gpu_options=tf.GPUOptions(allow_growth=True))
        sess = tf.Session(config=config)

        sess.run(tf.global_variables_initializer())
    
    model["inputs"] = inputs
    model["labels"] = labels
    model["is_training"] = is_training
    model["pred"] = pred
    model["loss"] = total_loss
    model["opt"] = opt
    model["sess"] = sess
    
    return model

In [7]:
NUM_STEP = 50
BATCH_SIZE = 16

def fit( model, data, verbose=True ):
    sess, opt, total_loss = model["sess"],  model["opt"], model["loss"]
    inputs, labels, is_training = model["inputs"], model["labels"], model["is_training"]
    
    def next_batch( indices ):
        batch_img = np.zeros((len(indices), 224, 224, 3))
        batch_label = np.zeros((len(indices)))
        
        for i, idx in enumerate(indices):
            batch_img[i] = data[idx]["img"]
            batch_label[i] = LABELS.index(data[idx]["label"])
            
        return batch_img, batch_label
        
        
    for step in range(NUM_STEP):
        indices = np.random.randint(len(data), size=BATCH_SIZE)
        batch_img, batch_label = next_batch(indices)
        
        _, loss = sess.run([opt, total_loss], feed_dict={
                inputs: batch_img, labels: batch_label, is_training:True})
        
        if verbose and (step+1) % 100 == 0:
            print(step+1, loss)
    

In [8]:
def predict( data ):
    sess, pred = model["sess"], model["pred"]
    inputs, is_training = model["inputs"], model["is_training"]

    def next_batch( indices ):
        batch_img = np.zeros((len(indices), 224, 224, 3))
        
        for i, idx in enumerate(indices):
            batch_img[i] = data[idx]["img"]
            
        return batch_img
    
    predicts = np.zeros((len(data), 5))
    num_step = np.ceil(len(data)/BATCH_SIZE).astype(int)
    for step in range(num_step):
        start = step*BATCH_SIZE
        end   = min(len(data), (step+1)*BATCH_SIZE)
        indices = np.arange(start, end)
        
        batch_img = next_batch(indices)
        
        preds = sess.run(pred, feed_dict={
                inputs: batch_img, is_training:False})

        predicts[start:end] = np.reshape(preds, (-1, 5))
        
    return predicts

In [9]:
def eval( pred, label ):
    pred_argmax = np.argmax(pred, axis=1)
    return (label==pred_argmax).sum() / len(label)

In [10]:
label_test = [ LABELS.index(data["test"][idx]["label"]) for idx in range(len(data["test"])) ]

lr_list = np.arange(0.0001, 0.01, 0.001)
print(lr_list.shape)
for i, lr in enumerate(lr_list) :
    model = create_model(lr=lr)
    fit(model, data["train"], verbose=False)
    pred_test = predict(data["test"])
    
    print(lr, eval(pred_test, label_test))
    
    tf.reset_default_graph()

(10,)
0.0001 0.473206176203
0.0011 0.242506811989
0.0021 0.242506811989
0.0031 0.244323342416
0.0041 0.243415077203
0.0051 0.246139872843
0.0061 0.245231607629
0.0071 0.244323342416
0.0081 0.211625794732
0.0091 0.245231607629
