# tf.Dataset

In [None]:
## Assorted examples, code doesn't actually run
import tensorflow as tf

### Example using TFRecords
filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
dataset = tf.data.TFRecordDataset(filenames) # create dataset from tfrecord files
dataset = dataset.map(parser_fn) # parse tf record files into tensors
dataset = dataset.shuffle(buffer_size=10000) # randomly sample 10000 from dataset
dataset = dataset.batch(32) # batch tensors into batch size of 32
dataset = dataset.repeat() # repeat dataset indefinitely

### Example using python generator
filenames = [..., ...]
def gen():
    for fname in filenames:
        inputs, outputs = read_file(fname) # load numpy arrays from file
        inputs, outputs = preprocess(inputs, outputs) # preprocess your inputs and outputs
        yield inputs, outputs # yield inputs and outputs
        
dataset = tf.data.Dataset.from_generator(gen, output_types=(tf.float32, tf.float32), 
                                 output_shapes=(tf.TensorShape([None]), tf.TensorShape([None]))) \
                 .shard(num_workers, worker_index) \
                 .skip(5) \
                 .take(100) \
                 .filter(lambda x: True) \
                 .shuffle(num_examples_per_epoch * num_epochs) \
                 .batch(32) \
                 .map(map_func) # very useful if you need to run per batch preprocessing


### Using datasets
iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next() # tensor corresponding to output of dataset
sess.run(next_element) # get one element from dataset

training_op = model(next_element) # define some tensorflow graph on the inputs
sess.run(training_op) # will pull the next_element from the dataset and then run it throught the defined tf graph


### an alternative kind of iterator
max_value = tf.placeholder(tf.int64, shape=[])
dataset = tf.data.Dataset.range(max_value) # create a dataset of 1, 2, 3, ..., max_value
iterator = dataset.make_intializable_iterator()
next_element = iterator.get_next()

sess.run(iterator.initializer, feed_dict={max_value: 5})
for _ in range(5):
    sess.run(next_element) # gets 1, 2, 3, 4, 5 in order

sess.run(iterator.initializer, feed_dict={max_value: 200})
for _ in range(200):
    sess.run(next_element) # gets 1, 2, 3 ..., 200 in order
    
### ^ this kind of iterator is very useful for doing train/val splits and other such things

# Image Classification

## Download data from here: http://vision.stanford.edu/aditya86/ImageNetDogs/

In [1]:
from keras.layers import Dense, Flatten
from keras.applications import VGG16
from keras.objectives import categorical_crossentropy
import tensorflow as tf

def model(inputs, targets, num_classes):
    ### write a model which has the following 'layers'
    # VGG16
    # FC 256 neurons
    # softmax with num_classes neurons
    
    vgg_out = VGG16(weights='imagenet', include_top=False)(inputs)
    ###YOUR CODE HERE
    
    # calculate scalar loss based on targets and output of net
    ### YOUR CODE HERE
    
    training_op = tf.train.AdamOptimizer().minimize(loss)
    return training_op, loss

Using TensorFlow backend.


In [2]:
import os
import re
import random
import numpy as np

filenames_with_labels = []

for path in os.listdir('Images'):
    m = re.match(".*-(.*)", path)
    if m is None:
        continue
    label = m.group(1)
    for fname in os.listdir(os.path.join('Images', path)):
        filenames_with_labels.append((os.path.join('Images', path, fname), label))
unique_labels =  np.unique([x[1] for x in filenames_with_labels])
label_to_ind = {label: i for i, label in enumerate(unique_labels)}

In [3]:
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
def generator():
    for image_fname, label in filenames_with_labels:
        ### load image from file into shape (224, 224, 3)
        
        ### rescale image to -1 to 1
        ### hint look at the imports above for useful tools
        
        # create one hot vector from label
        
        yield {'image': img, 'target': one_hot}

In [4]:
num_epochs = 1
batch_size = 2
output_types = ??? #hint: should be a dictionary
output_shapes = ??? # hint: should be a dictionary
dataset = ## generate dataset from generator, then shuffle and batch

iterator = # make an iterator from the dataset
next_element = #get the next dictionary of tensors from the iterator
training_op, loss = model(next_element['image'], next_element['target'], len(unique_labels))

In [None]:
import sys
with tf.Session() as sess:
    step = 0
    ### initialize tf global variables here
    while True:
        try:
            _, l = sess.run([training_op, loss])
            print("Loss for step {}: {}".format(step, l))
            if step % len(filenames_with_labels) == 0:
                print("Starting epoch: {}".format(step / len(filenames_with_labels)))
            step += 1
            sys.stdout.flush()
        except tf.errors.OutOfRangeError:
            print("Finished training")
            break

# Tensorboard

In [5]:
from keras.layers import Dense, Flatten
from keras.applications import VGG16
from keras.objectives import categorical_crossentropy
import tensorflow as tf

def model(inputs, targets, num_classes):
    ### Copy code from above model and then add a histogram for fc_1
    # a histogram for softmax called activations and
    # a scalar summary for the loss and
    # an image summary for inputs
    vgg_out = VGG16(weights='imagenet', include_top=False)(inputs)

    
    training_op = tf.train.AdamOptimizer().minimize(loss)
    return training_op, loss

In [None]:
## HINT: this stuff is all the same as before
num_epochs = 1
batch_size = 2
output_types = ??? #hint: should be a dictionary
output_shapes = ??? # hint: should be a dictionary
dataset = ## generate dataset from generator, then shuffle and batch

iterator = # make an iterator from the dataset
next_element = #get the next dictionary of tensors from the iterator
training_op, loss = model(next_element['image'], next_element['target'], len(unique_labels))

In [None]:
import sys
with tf.Session() as sess:
    # create a file writer to write summaries
    step = 0
    # initialize tf global variables as before

    while True:
        try:
            merged = # create an op to merge all summaries into one op
            summary, _, l = sess.run([merged, training_op, loss])
            
            file_writer.??? # add summary result to file writer
            print("Loss for step {}: {}".format(step, l))
            if step % len(filenames_with_labels) == 0:
                print("Starting epoch: {}".format(step / len(filenames_with_labels)))
            step += 1
        except tf.errors.OutOfRangeError:
            print("Finished training")
            break