In [1]:
from general_tools.notebook.gpu_utils import setup_one_gpu
GPU = 0
setup_one_gpu(GPU)

Picking GPU 0


In [2]:
import argparse
import os
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets
import numpy as np
from general_tools.notebook.tf import reset_tf_graph

In [3]:
VGG_MEAN = [123.68, 116.78, 103.94]

In [72]:
def list_images(directory, in_view='image_p020_t337_r005'):
    """
    Get all the images and labels in directory/label/model_name/view.png
    """
    labels = os.listdir(directory)
    files_and_labels = []
    for label in labels:
        for f in os.listdir(os.path.join(directory, label)):
            files_and_labels.append((os.path.join(directory, label, f, in_view + '.png'), label))

    filenames, labels = zip(*files_and_labels)
    filenames = list(filenames)
    labels = list(labels)
    unique_labels = list(set(labels))

    label_to_int = {}
    for i, label in enumerate(unique_labels):
        label_to_int[label] = i

    labels = [label_to_int[l] for l in labels]
    
    rids = np.random.choice(np.arange(len(labels)), 1000, replace=False)    
    return (np.array(filenames)[rids]).tolist(), (np.array(labels)[rids]).tolist()
    return filenames, labels

def check_accuracy(sess, correct_prediction, is_training, dataset_init_op):
    """
    Check the accuracy of the model on either train or val (depending on dataset_init_op).
    """
    # Initialize the correct dataset
    sess.run(dataset_init_op)
    num_correct, num_samples = 0, 0
    while True:
        try:
            correct_pred = sess.run(correct_prediction, {is_training: False})
            num_correct += correct_pred.sum()
            num_samples += correct_pred.shape[0]
        except tf.errors.OutOfRangeError:
            break

    # Return the fraction of datapoints that were correctly classified
    acc = float(num_correct) / num_samples
    return acc

def training_preprocess(image, label):
    ''' Preprocessing (for training)
        # (3) Take a random 224x224 crop to the scaled image
        # (4) Horizontally flip the image with probability 1/2
        # (5) Substract the per color mean `VGG_MEAN`
        # Note: we don't normalize the data here, as VGG was trained without normalization
    '''
    crop_image = tf.random_crop(image, [224, 224, 3])
    flip_image = tf.image.random_flip_left_right(crop_image)
    means = tf.reshape(tf.constant(VGG_MEAN), [1, 1, 3])
    centered_image = flip_image - means
    return centered_image, label


def val_preprocess(image, label):
    ''' Preprocessing (for validation)
    Take a central 224x224 crop to the scaled image
    Substract the per color mean `VGG_MEAN`
    # Note: we don't normalize the data here, as VGG was trained without normalization
    '''
    crop_image = tf.image.resize_image_with_crop_or_pad(image, 224, 224)    # (3)
    means = tf.reshape(tf.constant(VGG_MEAN), [1, 1, 3])
    centered_image = crop_image - means                                     # (4)
    return centered_image, label

def _parse_function(filename, label):
    ''' # Standard preprocessing for VGG on ImageNet taken from here:
        # https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/vgg_preprocessing.py
        # Also see the VGG paper for more details: https://arxiv.org/pdf/1409.1556.pdf
        # Preprocessing (for both training and validation):
        # (1) Decode the image from PNG format
        # (2) Resize the image so its smaller side is 256 pixels long
        '''
    image_string = tf.read_file(filename)
    image_decoded = tf.image.decode_png(image_string, channels=3)
    image = tf.cast(image_decoded, tf.float32)

    smallest_side = 256.0
    height, width = tf.shape(image)[0], tf.shape(image)[1]
    height = tf.to_float(height)
    width = tf.to_float(width)

    scale = tf.cond(tf.greater(height, width),
                    lambda: smallest_side / width,
                    lambda: smallest_side / height)
    new_height = tf.to_int32(height * scale)
    new_width = tf.to_int32(width * scale)

    resized_image = tf.image.resize_images(image, [new_height, new_width])
    return resized_image, label

def make_dataset(filenames, labels, preprocess_f, shuffle=True):
    filenames = tf.constant(filenames)
    labels = tf.constant(labels)
    dataset = tf.contrib.data.Dataset.from_tensor_slices((filenames, labels))
    dataset = dataset.map(_parse_function, num_threads=args.num_workers, output_buffer_size=args.batch_size)
    dataset = dataset.map(preprocess_f, num_threads=args.num_workers, output_buffer_size=args.batch_size)
    if shuffle:
        dataset = dataset.shuffle(buffer_size=10000)  # don't forget to shuffle
    batched_dataset = dataset.batch(args.batch_size)
    return batched_dataset

In [17]:
class VGG_Finetuner(object):
    def __init__(self, args):
        self.args = args
        
        train_filenames, train_labels = list_images(args.train_dir)
    
        n_classes = len(set(train_labels))

        batched_train_dataset = make_dataset(train_filenames, train_labels, training_preprocess)
        
        # Now we define an iterator that can operator on either dataset.
        # The iterator can be reinitialized by calling:
        #     - sess.run(train_init_op) for 1 epoch on the training set
        #     - sess.run(val_init_op)   for 1 epoch on the valiation set
        # Once this is done, we don't need to feed any value for images and labels
        # as they are automatically pulled out from the iterator queues.
        # A reinitializable iterator is defined by its structure. We could use the
        # `output_types` and `output_shapes` properties of either `train_dataset`
        # or `validation_dataset` here, because they are compatible.
        self.iterator = tf.contrib.data.Iterator.from_structure(batched_train_dataset.output_types,
                                                           batched_train_dataset.output_shapes)
        self.images, self.labels = self.iterator.get_next()
        
        self.train_init_op = self.iterator.make_initializer(batched_train_dataset)
        
        self.define_model(self.images, n_classes)
        self.create_optimizer()
        
        # Evaluation metrics
        prediction = tf.to_int32(tf.argmax(self.logits, 1))
        self.correct_prediction = tf.equal(prediction, self.labels)
        accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))

        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=None)
    
        # Launch the session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        self.init_fn(self.sess)  # load the pretrained weights
        self.sess.run(self.fc8_init)  # initialize the new fc8 layer


    def define_model(self, images, n_classes):
        '''
        # ---------------------------------------------------------------------
        # For this example, we'll use VGG-16 pretrained on ImageNet. We will remove the
        # last fully connected layer (fc8) and replace it with our own, with an
        # output size n_classes.
        # We will first train the last layer for a few epochs.
        # Then we will train the entire model on our dataset for a few epochs.
        # Get the pretrained model, specifying the num_classes argument to create a new
        # fully connected replacing the last one, called "vgg_16/fc8"
        # Each model has a different architecture, so "vgg_16/fc8" will change in another model.
        # Here, logits gives us directly the predicted scores we wanted from the images.
        # We pass a scope to initialize "vgg_16/fc8" weights with he_initializer
        '''
        self.is_training = tf.placeholder(tf.bool)
        vgg = tf.contrib.slim.nets.vgg
        with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=args.weight_decay)):
            self.logits, self.end_points = vgg.vgg_16(self.images, num_classes=n_classes, is_training=self.is_training,
                               dropout_keep_prob=args.dropout_keep_prob)

        # Specify where the model checkpoint is (pretrained weights).
        model_path = args.model_path
        assert(os.path.isfile(model_path))

        # Restore only the layers up to fc7 (included)
        # Calling function `init_fn(sess)` will load all the pretrained weights.
        variables_to_restore = tf.contrib.framework.get_variables_to_restore(exclude=['vgg_16/fc8'])
        self.init_fn = tf.contrib.framework.assign_from_checkpoint_fn(model_path, variables_to_restore)

        # Initialization operation from scratch for the new "fc8" layers
        # `get_variables` will only return the variables whose name starts with the given pattern
        self.fc8_variables = tf.contrib.framework.get_variables('vgg_16/fc8')
        self.fc8_init = tf.variables_initializer(self.fc8_variables)

        # ---------------------------------------------------------------------
        # Using tf.losses, any loss is added to the tf.GraphKeys.LOSSES collection
        # We can then call the total loss easily
        tf.losses.sparse_softmax_cross_entropy(labels=self.labels, logits=self.logits)
        self.loss = tf.losses.get_total_loss()

    def create_optimizer(self):
        # First we want to train only the reinitialized last layer fc8 for a few epochs.
        # We run minimize the loss only with respect to the fc8 variables (weight and bias).
        self.fc8_optimizer = tf.train.GradientDescentOptimizer(args.learning_rate1)
        self.fc8_train_op = self.fc8_optimizer.minimize(self.loss, var_list=self.fc8_variables)

        # Then we want to finetune the entire model for a few epochs.
        # We run minimize the loss only with respect to all the variables.
        self.full_optimizer = tf.train.GradientDescentOptimizer(args.learning_rate2)
        self.full_train_op = self.full_optimizer.minimize(self.loss)

    def train_last_layer(self, n_epochs):
        for epoch in range(n_epochs):
            # Run an epoch over the training data.
            print('Starting epoch %d / %d' % (epoch + 1, n_epochs))
            # Here we initialize the iterator with the training set.
            # This means that we can go through an entire epoch until the iterator becomes empty.
            self.sess.run(self.train_init_op)
            while True:
                try:
                    _ = self.sess.run(self.fc8_train_op, {self.is_training: True})
                except tf.errors.OutOfRangeError:
                    break
            # Check accuracy on the train sets every epoch.
            train_acc = check_accuracy(self.sess, self.correct_prediction, self.is_training, self.train_init_op)
            print('Train accuracy: %f' % train_acc)

    def train_all_layers(self, n_epochs):
        for epoch in range(n_epochs):
            print('Starting epoch %d / %d' % (epoch + 1, n_epochs))
            self.sess.run(self.train_init_op)
            while True:
                try:
                    _ = self.sess.run(self.full_train_op, {self.is_training: True})
                except tf.errors.OutOfRangeError:
                    break
            # Check accuracy on the train and val sets every epoch
            train_acc = check_accuracy(self.sess, self.correct_prediction, self.is_training, self.train_init_op)
            print('Train accuracy: %f' % train_acc)

In [7]:
class Args():
    def __init__(self):
        pass

In [21]:
args = Args()
args.train_dir = '/scr/optas/DATA/Meshes/Shape_Net_Core/2015_Summer_OUT/Images/'
args.model_path = '/orions4-zfs/projects/optas/DATA/NN/vgg16_pretrained/vgg_16.ckpt'
args.batch_size = 64
args.num_workers = 10
args.learning_rate1 = 1e-3
args.learning_rate2 = 1e-5
args.weight_decay = 5e-4
args.dropout_keep_prob = 0.5

In [22]:
reset_tf_graph()
vgg_ft = VGG_Finetuner(args)

INFO:tensorflow:Restoring parameters from /orions4-zfs/projects/optas/DATA/NN/vgg16_pretrained/vgg_16.ckpt


In [24]:
vgg_ft.train_last_layer(10)
vgg_ft.train_all_layers(2)

Starting epoch 1 / 1
Train accuracy: 0.855000


In [104]:
def predict_on_test(self, test_dir):
    filenames, labels = list_images(test_dir)
    
    
    batched_test_dataset = make_dataset(filenames, labels, val_preprocess, shuffle=False)        
    self.test_init_op = self.iterator.make_initializer(batched_test_dataset)
    
    self.sess.run(self.test_init_op)

    l = self.labels
    f = self.end_points['vgg_16/fc7']
    all_f = []
    all_l = []
    while True:
        try:
            b = self.sess.run([f, l], {self.is_training: False})
            all_f.append(b[0])
            all_l.append(b[1])
        except tf.errors.OutOfRangeError:
            break
    assert(np.all(np.hstack(all_l) == labels))
    all_f = np.squeeze(np.vstack(all_f))
    return filenames, all_f

In [105]:
fnames, features = predict_on_test(vgg_ft, args.train_dir)