In [1]:
import tensorflow as tf
import Deep3D_branched as deep3d
import utils
import numpy as np
import os
import pickle
import matplotlib.pyplot as plt
from PIL import Image


In [2]:
# Randomly select a subset of data as validation set for this
left_dir = "/a/data/deep3d_data/frames2/left/"
right_dir = "/a/data/deep3d_data/frames2/right/"

np.random.seed(500)
left_files = [left_dir + fname for fname in np.random.choice(os.listdir(left_dir), size = 2500)]
np.random.seed(500)
right_files = [right_dir + fname for fname in np.random.choice(os.listdir(right_dir), size = 2500)]

## Define Search Params

In [3]:
# Search Scope
batchsize = 50
num_epochs = 1
num_batches = (len(left_files)/batchsize)*num_epochs 

learning_rates = [0.005,0.0014, 0.0007, 0.0003]
beta1 = np.linspace(0.85,0.95,3)
beta2 = np.linspace(0.94,0.999,3)

search_count = len(learning_rates) * len(beta1) * len(beta2)

print "Iterations needed: ", search_count*num_batches

Iterations needed:  1800


In [4]:
pickle_name = "hypercost.p"
pickle.dump({}, open(pickle_name, "wb" ))

## GPU Search Training

In [5]:
# Define config for GPU memory debugging 
config = tf.ConfigProto()
config.gpu_options.allow_growth=True  # Switch to True for dynamic memory allocation instead of TF hogging BS
config.gpu_options.per_process_gpu_memory_fraction= 1  # Cap TF mem usage
config.allow_soft_placement=True

count = 0
for lr in learning_rates:
    for b1 in beta1:
        for b2 in beta2:
            print 'learning_rate: ' + str(lr) + ' | beta1: ' + str(b1) + ' | beta2: ' + str(b2)
            # Queue Stuff remains invariant
            with tf.device('/cpu:0'):
                left_image_queue = tf.train.string_input_producer(
                  left_dir + tf.convert_to_tensor(os.listdir(left_dir)),
                  shuffle=False)
                right_image_queue = tf.train.string_input_producer(
                  right_dir + tf.convert_to_tensor(os.listdir(right_dir)),
                  shuffle=False)

                # use reader to read file
                image_reader = tf.WholeFileReader()

                _, left_image_raw = image_reader.read(left_image_queue)
                left_image = tf.image.decode_jpeg(left_image_raw)
                left_image = tf.cast(left_image, tf.float32)/255.0

                _, right_image_raw = image_reader.read(right_image_queue)
                right_image = tf.image.decode_jpeg(right_image_raw)
                right_image = tf.cast(right_image, tf.float32)/255.0

                left_image.set_shape([160,288,3])
                right_image.set_shape([160,288,3])

                # preprocess image
                batch = tf.train.shuffle_batch([left_image, right_image], 
                                               batch_size = batchsize,
                                               capacity = 12*batchsize,
                                               num_threads = 1,
                                               min_after_dequeue = 4*batchsize)

            
            
            # Session
            sess = tf.Session(config=config)
            
            #initialize list to store outputs of run
            cost_list = []

            # Placeholders
            images = tf.placeholder(tf.float32, [None, 160, 288, 3], name='input_batch')
            true_out = tf.placeholder(tf.float32, [None, 160, 288, 3] , name='ground_truth')
            train_mode = tf.placeholder(tf.bool, name='train_mode')

            # Building Net based on VGG weights 
            net = deep3d.Deep3Dnet('./vgg19.npy', dropout = 0.5)
            net.build(images, train_mode)

            # Define Training Objectives
            with tf.variable_scope("Loss"):
                cost = tf.reduce_sum(tf.abs(net.prob - true_out))/batchsize
                tf.summary.scalar('cost', cost)
                
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):    
                train = tf.train.AdamOptimizer(learning_rate=lr,beta1=b1, beta2=b2).minimize(cost) 
            
            # Run initializer 
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())
            coord = tf.train.Coordinator()
            queue_threads = tf.train.start_queue_runners(coord=coord, sess=sess)
            
            # Tensorboard operations to compile summary and then write into logs
            merged = tf.summary.merge_all()
            writer = tf.summary.FileWriter('./tensorboard_logs/'+ 
                                           'lr' + str(lr) + 
                                           'b1' + str(b1) +
                                           'b2' + str(b2) +
                                           '/', graph = sess.graph)
            
            
            # Begin Training
            next_batch = sess.run(batch)
            for i in xrange(num_batches):
                _, cost_val,next_batch, summary = sess.run([train, cost, batch, merged],
                                                 feed_dict={images: next_batch[0],
                                                            true_out: next_batch[1],
                                                            train_mode: True})

                writer.add_summary(summary, i)
                cost_list.append(cost_val)
            
            count += 1
            print "Finished hyperparam: " + str(count) + ' of ' + str(search_count)
            
            
            cost_key = (lr, b1, b2)
            cost_file = pickle.load(open(pickle_name, "rb"))
            cost_file[cost_key] = cost_list
            pickle.dump(cost_file, open(pickle_name, "wb"))
            
            
            sess.close()
            tf.reset_default_graph()
            coord.request_stop()
            coord.join(queue_threads)

learning_rate: 0.005 | beta1: 0.85 | beta2: 0.94
Finished hyperparam: 1 of 36
learning_rate: 0.005 | beta1: 0.85 | beta2: 0.9695
Finished hyperparam: 2 of 36
learning_rate: 0.005 | beta1: 0.85 | beta2: 0.999
Finished hyperparam: 3 of 36
learning_rate: 0.005 | beta1: 0.9 | beta2: 0.94
Finished hyperparam: 4 of 36
learning_rate: 0.005 | beta1: 0.9 | beta2: 0.9695
Finished hyperparam: 5 of 36
learning_rate: 0.005 | beta1: 0.9 | beta2: 0.999
Finished hyperparam: 6 of 36
learning_rate: 0.005 | beta1: 0.95 | beta2: 0.94
Finished hyperparam: 7 of 36
learning_rate: 0.005 | beta1: 0.95 | beta2: 0.9695
Finished hyperparam: 8 of 36
learning_rate: 0.005 | beta1: 0.95 | beta2: 0.999
Finished hyperparam: 9 of 36
learning_rate: 0.0014 | beta1: 0.85 | beta2: 0.94
Finished hyperparam: 10 of 36
learning_rate: 0.0014 | beta1: 0.85 | beta2: 0.9695
Finished hyperparam: 11 of 36
learning_rate: 0.0014 | beta1: 0.85 | beta2: 0.999
Finished hyperparam: 12 of 36
learning_rate: 0.0014 | beta1: 0.9 | beta2: 0.94
