update docs

tensorpack · May 30, 2018 · 099975c · 099975c
1 parent 0ee4d8b
commit 099975c
Show file tree

Hide file tree

Showing 7 changed files with 16 additions and 10 deletions.
diff --git a/examples/FasterRCNN/NOTES.md b/examples/FasterRCNN/NOTES.md
@@ -35,8 +35,8 @@ Model:
 3. We only support single image per GPU.
 
 4. Because of (3), BatchNorm statistics are not supposed to be updated during fine-tuning.
-	 This specific kind of BatchNorm will need [my kernel](https://github.com/tensorflow/tensorflow/pull/12580)
-	 which is included since TF 1.4. If using an earlier version of TF, it will be either slow or wrong.
+   This specific kind of BatchNorm will need [my kernel](https://github.com/tensorflow/tensorflow/pull/12580)
+   which is included since TF 1.4. If using an earlier version of TF, it will be either slow or wrong.
 
 Speed:
 

diff --git a/examples/FasterRCNN/README.md b/examples/FasterRCNN/README.md
@@ -1,6 +1,5 @@
 # Faster-RCNN / Mask-RCNN on COCO
-This example provides a minimal (only 1.6k lines) but faithful implementation of
-the following papers:
+This example provides a minimal (only 1.6k lines) but faithful implementation of the following papers:
 
 + [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497)
 + [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144)
@@ -70,7 +69,7 @@ MaskRCNN results contain both bbox and segm mAP.
 The two R50-C4 360k models have the same configuration __and mAP__
 as the `R50-C4-2x` entries in
 [Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines).
-So far this is the only public TensorFlow implementation that can reproduce mAP in Detectron.
+<!-- So far this is the only public TensorFlow implementation that can reproduce mAP in Detectron. -->
 The other models listed here do not correspond to any configurations in Detectron.
 
 ## Notes

diff --git a/examples/FasterRCNN/data.py b/examples/FasterRCNN/data.py
@@ -10,6 +10,7 @@
 from tensorpack.dataflow import (
     imgaug, TestDataSpeed, PrefetchDataZMQ, MultiProcessMapDataZMQ,
     MapDataComponent, DataFromList)
+from tensorpack.utils import logger
 # import tensorpack.utils.viz as tpviz
 
 from coco import COCODetection
@@ -277,7 +278,10 @@ def get_train_dataflow():
 
     # Valid training images should have at least one fg box.
     # But this filter shall not be applied for testing.
+    num = len(imgs)
     imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs))    # log invalid training
+    logger.info("Filtered {} images which contain no groudtruth boxes. Total #images for training: {}".format(
+        num - len(imgs), len(imgs)))
 
     ds = DataFromList(imgs, shuffle=True)
 

diff --git a/examples/FasterRCNN/model.py b/examples/FasterRCNN/model.py
@@ -99,7 +99,7 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
         add_moving_summary(*summaries)
 
     # Per-level loss summaries in FPN may appear lower due to the use of a small placeholder.
-    # But the total loss is still the same.
+    # But the total loss is still the same.  TODO make the summary op smarter
     placeholder = 0.
     label_loss = tf.nn.sigmoid_cross_entropy_with_logits(
         labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits)
@@ -217,7 +217,8 @@ def generate_rpn_proposals(boxes, scores, img_shape,
         (-1, 4), name='nms_input_boxes')
     nms_indices = tf.image.non_max_suppression(
         topk_valid_boxes_y1x1y2x2,
-        topk_valid_scores,
+        # TODO use exp to work around a bug in TF1.9: https://github.com/tensorflow/tensorflow/issues/19578
+        tf.exp(topk_valid_scores),
         max_output_size=post_nms_topk,
         iou_threshold=config.RPN_PROPOSAL_NMS_THRESH)
 
@@ -608,7 +609,6 @@ def upsample2x(name, x):
 
         # tf.image.resize is, again, not aligned.
         # with tf.name_scope(name):
-        #     logger.info("Nearest neighbor")
         #     shape2d = tf.shape(x)[2:]
         #     x = tf.transpose(x, [0, 2, 3, 1])
         #     x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True)

diff --git a/examples/FasterRCNN/train.py b/examples/FasterRCNN/train.py
@@ -623,5 +623,6 @@ def _trigger_epoch(self):
             max_epoch=config.LR_SCHEDULE[-1] * factor // stepnum,
             session_init=get_model_loader(args.load) if args.load else None,
         )
+        # nccl mode gives the best speed
         trainer = SyncMultiGPUTrainerReplicated(get_nr_gpu(), mode='nccl')
         launch_train_with_config(cfg, trainer)
diff --git a/examples/ImageNetModels/README.md b/examples/ImageNetModels/README.md
@@ -31,8 +31,8 @@ Evaluate the [pretrained model](http://models.tensorpack.com/ShuffleNet/):
 This AlexNet script is quite close to the setting in its [original
 paper](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
 Trained with 64x2 batch size, the script reaches 58% single-crop validation
-accuracy after 100 epochs. It also generates first-layer filter visualizations
-similar to the paper in tensorboard.
+accuracy after 100 epochs (21 hours on 2 V100s).
+It also puts in tensorboard the first-layer filter visualizations similar to the paper.
 
 ### Inception-BN, VGG16
 

diff --git a/examples/ResNet/resnet_model.py b/examples/ResNet/resnet_model.py
@@ -114,6 +114,8 @@ def resnet_group(name, l, block_func, features, count, stride):
 def resnet_backbone(image, num_blocks, group_func, block_func):
     with argscope(Conv2D, use_bias=False,
                   kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
+        # Note that this pads the image by [2, 3] instead of [3, 2].
+        # Similar things happen in later stride=2 layers as well.
         l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU)
         l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME')
         l = group_func('group0', l, block_func, 64, num_blocks[0], 1)