In [1]:
import tensorflow as tf
from tensorflow.contrib import layers
import numpy as np
import math
import os
from nbutil import imshow_multi
%matplotlib inline
from simple import flatten, avg
import random
from show_graph import show_graph
from skimage import io
from skimage.transform import resize
import charttt

In [2]:
IMAGE_SIZE = 64
N_CLASSES = 102 # 101 foods + null class
BATCH_SIZE = 32

In [3]:
# bias_init = tf.truncated_normal_initializer(mean=0.1, stddev=0.02)

class Net(object):
    def __init__(self, img_size, n_classes):
        with tf.variable_scope('net'):
            n_layers = int(math.log(img_size) / math.log(2))

            inputs = tf.placeholder(tf.float32, [None, img_size, img_size, 3], name='inputs')
            labels = tf.placeholder(tf.int64, [None], name='labels')
            dropout = tf.placeholder(tf.float32, name='dropout')

            initial_chans = 12
            x = self.conv(inputs, initial_chans, 1)
            print x.get_shape()

            for i, layer in enumerate(xrange(n_layers)):
                chans = initial_chans * 2 ** (i+1)
                x = self.inception_module(x, chans, 'inception-{}'.format(i))
                x = self.inception_module(x, chans, 'inception-{}-2'.format(i))
                x = layers.batch_norm(x, is_training=True, updates_collections=None)
                x = self.pool(x, 2, 2, type='avg')
                if i == n_layers - 1:
                    x = tf.nn.dropout(x, dropout)
                print x.get_shape()
                        
            logits = self.conv(x, n_classes, 1, activation=tf.identity)
            logits = tf.reshape(logits, [-1, n_classes])

            loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits))
            predictions = tf.argmax(logits, axis=-1)
            accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, labels), tf.float32))

            global_step = tf.contrib.framework.get_or_create_global_step()
            lr = tf.placeholder(tf.float32, name='lr')
            train_op = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)

            self.inputs = inputs
            self.labels = labels
            self.loss = loss
            self.logits = logits
            self.predictions = predictions
            self.accuracy = accuracy
            self.global_step = global_step
            self.train_op = train_op
            self.lr = lr
            self.dropout = dropout
    
    def conv(self, x, channels, ksize, activation=tf.nn.relu):
        return layers.conv2d(x, 
                             channels,
                             kernel_size=ksize, 
                             activation_fn=activation)
    
    def pool(self, x, ksize, stride, type='max'):
        fn = {'max': tf.nn.max_pool, 'avg': tf.nn.avg_pool}[type]
        return fn(x, [1, ksize, ksize, 1], [1, stride, stride, 1], 'SAME')
    
    def inception_module(self, x, out_channels, name):
        # https://culurciello.github.io/tech/2016/06/04/nets.html
        # out_channels channels must be a multiple of 4
        
        with tf.variable_scope(name):
            channels = x.get_shape()[-1].value
            bsize = out_channels / 4
            
            y1 = self.conv(x, bsize, 1)
            y2 = self.conv(self.pool(x, 3, 1, 'avg'), bsize, 1)
            y3 = self.conv(self.conv(x, bsize, 1), bsize, 3)
            y4 = self.conv(self.conv(self.conv(x, bsize, 1), bsize, 3), bsize, 3)
            
            return tf.concat([y1, y2, y3, y4], axis=-1)

n = Net(IMAGE_SIZE, N_CLASSES)
print 'ok'

(?, 64, 64, 12)
(?, 32, 32, 24)
(?, 16, 16, 48)
(?, 8, 8, 96)
(?, 4, 4, 192)
(?, 2, 2, 384)
(?, 1, 1, 768)
ok


## Inception module v3 

<img src='https://culurciello.github.io/assets/nets/inceptionv3.jpg' width=300 />

In [4]:
root = '../data/food-101/images'
labels = sorted(os.listdir(root))

paths_to_labels = {}

for label in labels:
    label_dir = os.path.join(root, label)
    for name in os.listdir(label_dir):
        if name.endswith('.jpg') or name.endswith('.png') or name.endswith('.gif'):
            path = os.path.join(label_dir, name)
            paths_to_labels[path] = label

print labels

['apple_pie', 'baby_back_ribs', 'baklava', 'beef_carpaccio', 'beef_tartare', 'beet_salad', 'beignets', 'bibimbap', 'bread_pudding', 'breakfast_burrito', 'bruschetta', 'caesar_salad', 'cannoli', 'caprese_salad', 'carrot_cake', 'ceviche', 'cheese_plate', 'cheesecake', 'chicken_curry', 'chicken_quesadilla', 'chicken_wings', 'chocolate_cake', 'chocolate_mousse', 'churros', 'clam_chowder', 'club_sandwich', 'crab_cakes', 'creme_brulee', 'croque_madame', 'cup_cakes', 'deviled_eggs', 'donuts', 'dumplings', 'edamame', 'eggs_benedict', 'escargots', 'falafel', 'filet_mignon', 'fish_and_chips', 'foie_gras', 'french_fries', 'french_onion_soup', 'french_toast', 'fried_calamari', 'fried_rice', 'frozen_yogurt', 'garlic_bread', 'gnocchi', 'greek_salad', 'grilled_cheese_sandwich', 'grilled_salmon', 'guacamole', 'gyoza', 'hamburger', 'hot_and_sour_soup', 'hot_dog', 'huevos_rancheros', 'hummus', 'ice_cream', 'lasagna', 'lobster_bisque', 'lobster_roll_sandwich', 'macaroni_and_cheese', 'macarons', 'miso_sou

In [5]:
def create_image_queue(filenames, batch_size=BATCH_SIZE, grayscale=False, size=128):
    filename_tensor = tf.convert_to_tensor(filenames, dtype=tf.string)
    filename_q = tf.train.slice_input_producer([filename_tensor], num_epochs=None, shuffle=True)[0]

    image_255 = tf.image.decode_jpeg(tf.read_file(filename_q), channels=3)
    image = tf.cast(image_255, tf.float32) / 255.0
    if grayscale:
        image = tf.image.grayscale_to_rgb(image)
    
    def resize_image(image):
        pre_crop_size = tf.cast(size + tf.random_uniform([]) * size * 0.3, tf.int32)
        image_as_batch_of_1 = tf.expand_dims(image, 0)
        image = tf.image.resize_bilinear(image_as_batch_of_1, [pre_crop_size, pre_crop_size])[0]
        image = tf.random_crop(image, [size, size, 3])
        return image
    
    image = resize_image(image)

    def distort_image(image):
        # noise_amt = tf.abs(tf.random_normal([], stddev=0.2))
        # distorted_image = image + tf.random_uniform([299, 299, 3], maxval=noise_amt)
        distorted_image = tf.image.random_flip_left_right(image)
        distorted_image = tf.image.random_brightness(distorted_image, max_delta=0.15)
        distorted_image = tf.image.random_contrast(distorted_image, lower=0.8, upper=1.2)
        return distorted_image
    
    image = distort_image(image)
    
    images_batch, filenames_batch = tf.train.shuffle_batch([image, filename_q], batch_size=batch_size, capacity=128, min_after_dequeue=64)
    return images_batch, filenames_batch

images_batch, filenames_batch = create_image_queue(paths_to_labels.keys(), size=IMAGE_SIZE)


In [6]:
# sess = tf.Session()
# images_batch, filenames_batch = create_image_queue(paths_to_labels.keys(), size=IMAGE_SIZE)

# init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
# sess.run(init_op)
# tf.train.start_queue_runners(sess=sess)


In [7]:
# show_graph(sess.graph)

In [8]:
# images, filenames = sess.run([images_batch, filenames_batch])
# imshow_multi(list(images)[:5])


In [11]:
chart = charttt.Board('foodnetwork').chart('foodnetwork 10 (64x64 input)')

sv = tf.train.Supervisor(logdir='models/foodnetwork-10', save_model_secs=60 * 5)
with sv.managed_session() as sess:
    
    # init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    # sess.run(init_op)
    tf.train.start_queue_runners(sess=sess)
    
    losses = []
    accs = []
    
    while not sv.should_stop():
        step = sess.run(n.global_step)
        images, filenames = sess.run([images_batch, filenames_batch])
        label_strings = [paths_to_labels[path] for path in filenames]
        label_indices = [labels.index(label) for label in label_strings]
        
        epoch = step * BATCH_SIZE * 1.0 / len(paths_to_labels)
        lr = 0.001
        if epoch > 0.1:
            lr = 0.0005
        if epoch > 1:
            lr = 0.0001
        if epoch > 3.2:
            lr = 0.00005
        
        _, loss, acc = sess.run([n.train_op, n.loss, n.accuracy], feed_dict={
            n.lr: lr,
            n.inputs: images,
            n.labels: label_indices,
            n.dropout: 0.5
        })
        losses.append(loss)
        accs.append(acc)
        # print logits[0]
        if step % 16 == 0:
            chart.write(epoch, loss=avg(losses), accuracy=avg(accs))
            print "{}: loss={}, acc={}, epoch={}".format(step, avg(losses), avg(accs), epoch)
            losses = []
            accs = []
    

9520: loss=118.897351583, acc=0.145833333333, epoch=3.01623762376
9536: loss=104.361433983, acc=0.228515625, epoch=3.02130693069
9552: loss=107.740716457, acc=0.234375, epoch=3.02637623762
9568: loss=108.427424908, acc=0.201171875, epoch=3.03144554455
9584: loss=110.983247757, acc=0.212890625, epoch=3.03651485149
9600: loss=106.301193714, acc=0.208984375, epoch=3.04158415842
9616: loss=108.37438345, acc=0.1953125, epoch=3.04665346535
9632: loss=109.72287941, acc=0.19140625, epoch=3.05172277228
9648: loss=109.233715534, acc=0.1953125, epoch=3.05679207921
9664: loss=109.048866272, acc=0.18359375, epoch=3.06186138614
9680: loss=106.629406929, acc=0.185546875, epoch=3.06693069307
9696: loss=107.713367939, acc=0.224609375, epoch=3.072
9712: loss=110.637801647, acc=0.181640625, epoch=3.07706930693
9728: loss=106.35400486, acc=0.2109375, epoch=3.08213861386
9744: loss=111.653268814, acc=0.19140625, epoch=3.08720792079
9760: loss=108.195783138, acc=0.201171875, epoch=3.09227722772
9776: loss=1

KeyboardInterrupt: 

In [None]:

# create a directory of 1000 random images:

def create_null_dir():

    import shutil

    print nulldir

    def iterate_images_recursively(path, extensions=['.jpeg', '.jpg', '.png', '.gif']):
        def is_image(path):
            for ext in extensions:
                if path.lower().endswith(ext):
                    return True
            return False

        for (dirpath, dirname, filenames) in os.walk(path):
            for filename in filenames:
                subpath = os.path.join(dirpath, filename)
                if is_image(subpath):
                    yield subpath

    tiny_imagenet_images = list(iterate_images_recursively('../data/tiny-imagenet-200/train'))
    floor_images = list(iterate_images_recursively('../data/floors'))

    nulldir = os.path.join(root, 'null')
    try:
        os.rmdir(nulldir)
    except OSError:
        pass
    os.mkdir(nulldir)

    for i in xrange(1000):
        path = random.choice(random.choice([tiny_imagenet_images, floor_images]))
        ext = path.split('.')[-1]
        name = os.path.join(nulldir, '{}.{}'.format(i, ext))
        shutil.copyfile(path, name)

# create_null_dir()


In [None]:
! ls ../data/food-101/images/null

In [None]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

with sv.managed_session() as sess:

    img = io.imread('http://3.bp.blogspot.com/-agOrGBzjpKw/UHMB77A5NnI/AAAAAAAAD0Q/wOL7KXEh1-Y/s1600/DSC_0422.JPG')
    img = resize(img, [IMAGE_SIZE, IMAGE_SIZE])
    img = np.expand_dims(img, 0)
    print img.shape

    feed = {
        n.dropout: 1,
        n.inputs: img
    }
    predictions = softmax(sess.run(n.logits, feed_dict=feed)[0])
    indices = sorted(range(102), key=lambda i: predictions[i], reverse=True)
    for i in indices:
        print labels[i], ':', predictions[i]
    