Skip to content
2 changes: 1 addition & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
/research/transformer/ @daviddao
/research/video_prediction/ @cbfinn
/research/fivo/ @dieterichlawson
/samples/ @MarkDaoust
/samples/ @MarkDaoust @lamberta
/samples/languages/java/ @asimshankar
/tutorials/embedding/ @zffchen78 @a-dai
/tutorials/image/ @sherrym @shlens
Expand Down
1 change: 0 additions & 1 deletion official/resnet/cifar10_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ def parse_record(raw_record, is_training):
# The first byte represents the label, which we convert from uint8 to int32
# and then to one-hot.
label = tf.cast(record_vector[0], tf.int32)
label = tf.one_hot(label, _NUM_CLASSES)

# The remaining bytes after the label represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
Expand Down
4 changes: 2 additions & 2 deletions official/resnet/cifar10_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ def test_dataset_input_fn(self):
lambda val: cifar10_main.parse_record(val, False))
image, label = fake_dataset.make_one_shot_iterator().get_next()

self.assertAllEqual(label.shape, (10,))
self.assertAllEqual(label.shape, ())
self.assertAllEqual(image.shape, (_HEIGHT, _WIDTH, _NUM_CHANNELS))

with self.test_session() as sess:
image, label = sess.run([image, label])

self.assertAllEqual(label, np.array([int(i == 7) for i in range(10)]))
self.assertEqual(label, 7)

for row in image:
for pixel in row:
Expand Down
8 changes: 5 additions & 3 deletions official/resnet/imagenet_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
}

_NUM_TRAIN_FILES = 1024
_SHUFFLE_BUFFER = 1500
_SHUFFLE_BUFFER = 10000

DATASET_NAME = 'ImageNet'

Expand Down Expand Up @@ -152,8 +152,6 @@ def parse_record(raw_record, is_training):
num_channels=_NUM_CHANNELS,
is_training=is_training)

label = tf.one_hot(tf.reshape(label, shape=[]), _NUM_CLASSES)

return image, label


Expand All @@ -177,6 +175,10 @@ def input_fn(is_training, data_dir, batch_size, num_epochs=1):
dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES)

# Convert to individual records
# TODO(guptapriya): Should we make this cycle_length a flag similar to
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you get a chance to check how sensitive performance is to this on both something big (DGX, V100 GCE) and something small (1x K80/P100)? I prefer not to have a performance flag unless it makes a big difference. And if it is a constant it would be nice to have a brief comment so it isn't just a magic number.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No I haven't had the chance to run on things other than DGX-1V. I don't think the performance difference will show up on K80s because the input pipeline will not be the bottleneck. But I haven't tested it. I am talking to the input team to figure out if a constant here makes sense, or should this be tuned (in which case we may need to just remove it)

# num_parallel_calls?
dataset = dataset.apply(tf.contrib.data.parallel_interleave(
tf.data.TFRecordDataset, cycle_length=10))
dataset = dataset.flat_map(tf.data.TFRecordDataset)

return resnet_run_loop.process_record_dataset(
Expand Down
12 changes: 6 additions & 6 deletions official/resnet/resnet_run_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ def process_record_dataset(dataset, is_training, batch_size, shuffle_buffer,
tf.contrib.data.map_and_batch(
lambda value: parse_record_fn(value, is_training),
batch_size=batch_size,
num_parallel_batches=1))
num_parallel_batches=1,
drop_remainder=True))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could matter for cifar with only 60k images.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should not matter much as long as the batch size is reasonable, since this only drops the part of the dataset that doesn't fit into a full batch.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, it only drops the last partial batch. so for 60k images for a batch size of 2048 also it should drop only 608 images or so.


# Operations between the final prefetch and the get_next call to the iterator
# will happen synchronously during run time. We prefetch here again to
Expand Down Expand Up @@ -111,7 +112,7 @@ def get_synth_input_fn(height, width, num_channels, num_classes):
"""
def input_fn(is_training, data_dir, batch_size, *args, **kwargs): # pylint: disable=unused-argument
images = tf.zeros((batch_size, height, width, num_channels), tf.float32)
labels = tf.zeros((batch_size, num_classes), tf.int32)
labels = tf.zeros((batch_size), tf.int32)
return tf.data.Dataset.from_tensors((images, labels)).repeat()

return input_fn
Expand Down Expand Up @@ -227,8 +228,8 @@ def resnet_model_fn(features, labels, mode, model_class,
})

# Calculate loss, which includes softmax cross entropy and L2 regularization.
cross_entropy = tf.losses.softmax_cross_entropy(
logits=logits, onehot_labels=labels)
cross_entropy = tf.losses.sparse_softmax_cross_entropy(
logits=logits, labels=labels)

# Create a tensor named cross_entropy for logging purposes.
tf.identity(cross_entropy, name='cross_entropy')
Expand Down Expand Up @@ -282,8 +283,7 @@ def exclude_batch_norm(name):
train_op = None

if not tf.contrib.distribute.has_distribution_strategy():
accuracy = tf.metrics.accuracy(
tf.argmax(labels, axis=1), predictions['classes'])
accuracy = tf.metrics.accuracy(labels, predictions['classes'])
else:
# Metrics are currently not compatible with distribution strategies during
# training. This does not affect the overall performance of the model.
Expand Down
2 changes: 1 addition & 1 deletion research/object_detection/utils/dataset_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def read_dataset(file_read_func, decode_func, input_files, config):
file_read_func, cycle_length=config.num_readers,
block_length=config.read_block_length, sloppy=config.shuffle))
if config.shuffle:
records_dataset.shuffle(config.shuffle_buffer_size)
records_dataset = records_dataset.shuffle(config.shuffle_buffer_size)
tensor_dataset = records_dataset.map(
decode_func, num_parallel_calls=config.num_parallel_map_calls)
return tensor_dataset.prefetch(config.prefetch_size)