From cc95d6dd49ca523aaf864dc9e14ce0366e905e37 Mon Sep 17 00:00:00 2001 From: abhijit_linux Date: Thu, 28 Apr 2022 00:39:48 +0530 Subject: [PATCH 1/4] init --- docs/source/models/faster_rcnn.rst | 27 +++++++ docs/source/models_new.rst | 1 + torchvision/models/detection/faster_rcnn.py | 87 +++++++++++++++------ 3 files changed, 92 insertions(+), 23 deletions(-) create mode 100644 docs/source/models/faster_rcnn.rst diff --git a/docs/source/models/faster_rcnn.rst b/docs/source/models/faster_rcnn.rst new file mode 100644 index 00000000000..2b05d23b160 --- /dev/null +++ b/docs/source/models/faster_rcnn.rst @@ -0,0 +1,27 @@ +Faster R-CNN +========== + +.. currentmodule:: torchvision.models.detection + +The Mask R-CNN model is based on the `Mask R-CNN `__ +paper. + + +Model builders +-------------- + +The following model builders can be used to instantiate a Mask R-CNN model, with or +without pre-trained weights. All the model builders internally rely on the +``torchvision.models.detection.faster_rcnn.FasterRCNN`` base class. Please refer to the `source +code +`_ for +more details about this class. + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + fasterrcnn_resnet50_fpn + fasterrcnn_mobilenet_v3_large_fpn + fasterrcnn_mobilenet_v3_large_320_fpn + diff --git a/docs/source/models_new.rst b/docs/source/models_new.rst index c1b7e04339c..2d5fe86e082 100644 --- a/docs/source/models_new.rst +++ b/docs/source/models_new.rst @@ -41,6 +41,7 @@ weights: models/densenet models/efficientnet models/efficientnetv2 + models/faster_rcnn models/googlenet models/mobilenetv2 models/regnet diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index f1da6f77835..8a225fa8910 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -453,10 +453,9 @@ def fasterrcnn_resnet50_fpn( **kwargs: Any, ) -> FasterRCNN: """ - Constructs a Faster R-CNN model with a ResNet-50-FPN backbone. - - Reference: `"Faster R-CNN: Towards Real-Time Object Detection with - Region Proposal Networks" `_. + Faster R-CNN model with a ResNet-50-FPN backbone from the `Faster R-CNN: Towards Real-Time Object + Detection with Region Proposal Networks `__ + paper. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. @@ -510,13 +509,26 @@ def fasterrcnn_resnet50_fpn( >>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11) Args: - weights (FasterRCNN_ResNet50_FPN_Weights, optional): The pretrained weights for the model - progress (bool): If True, displays a progress bar of the download to stderr + weights (:class:`~torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. num_classes (int, optional): number of output classes of the model (including the background) - weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone - trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block. - Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is - passed (the default) this value is set to 3. + weights_backbone (:class:`~torchvision.models.ResNet50_Weights`, optional): The + pretrained weights for the backbone. + trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from + final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are + trainable. If ``None`` is passed (the default) this value is set to 3. + **kwargs: parameters passed to the ``torchvision.models.detection.faster_rcnn.FasterRCNN`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights + :members: """ weights = FasterRCNN_ResNet50_FPN_Weights.verify(weights) weights_backbone = ResNet50_Weights.verify(weights_backbone) @@ -658,7 +670,10 @@ def fasterrcnn_mobilenet_v3_large_320_fpn( **kwargs: Any, ) -> FasterRCNN: """ - Constructs a low resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone tunned for mobile use-cases. + Faster R-CNN model with a MobileNetV3-Large backbone from the `Faster R-CNN: Towards Real-Time Object + Detection with Region Proposal Networks `__ + paper. + It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more details. @@ -671,13 +686,26 @@ def fasterrcnn_mobilenet_v3_large_320_fpn( >>> predictions = model(x) Args: - weights (FasterRCNN_MobileNet_V3_Large_320_FPN_Weights, optional): The pretrained weights for the model - progress (bool): If True, displays a progress bar of the download to stderr + weights (:class:`~torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_320_FPN_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_320_FPN_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. num_classes (int, optional): number of output classes of the model (including the background) - weights_backbone (MobileNet_V3_Large_Weights, optional): The pretrained weights for the backbone - trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block. - Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. If ``None`` is - passed (the default) this value is set to 3. + weights_backbone (:class:`~torchvision.models.MobileNet_V3_Large_Weights`, optional): The + pretrained weights for the backbone. + trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from + final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are + trainable. If ``None`` is passed (the default) this value is set to 3. + **kwargs: parameters passed to the ``torchvision.models.detection.faster_rcnn.FasterRCNN`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_320_FPN_Weights + :members: """ weights = FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.verify(weights) weights_backbone = MobileNet_V3_Large_Weights.verify(weights_backbone) @@ -728,13 +756,26 @@ def fasterrcnn_mobilenet_v3_large_fpn( >>> predictions = model(x) Args: - weights (FasterRCNN_MobileNet_V3_Large_FPN_Weights, optional): The pretrained weights for the model - progress (bool): If True, displays a progress bar of the download to stderr + weights (:class:`~torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_FPN_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_FPN_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. num_classes (int, optional): number of output classes of the model (including the background) - weights_backbone (MobileNet_V3_Large_Weights, optional): The pretrained weights for the backbone - trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block. - Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. If ``None`` is - passed (the default) this value is set to 3. + weights_backbone (:class:`~torchvision.models.MobileNet_V3_Large_Weights`, optional): The + pretrained weights for the backbone. + trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from + final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are + trainable. If ``None`` is passed (the default) this value is set to 3. + **kwargs: parameters passed to the ``torchvision.models.detection.faster_rcnn.FasterRCNN`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_FPN_Weights + :members: """ weights = FasterRCNN_MobileNet_V3_Large_FPN_Weights.verify(weights) weights_backbone = MobileNet_V3_Large_Weights.verify(weights_backbone) From 5db022af9bbe62e1a6311ec1360ac5684f511e7d Mon Sep 17 00:00:00 2001 From: abhijit_linux Date: Thu, 28 Apr 2022 23:17:34 +0530 Subject: [PATCH 2/4] init --- docs/source/models/faster_rcnn.rst | 6 ++-- docs/source/models_new.rst | 2 +- torchvision/models/detection/faster_rcnn.py | 39 +++++++++++++-------- 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/docs/source/models/faster_rcnn.rst b/docs/source/models/faster_rcnn.rst index 2b05d23b160..43d8c8b6f68 100644 --- a/docs/source/models/faster_rcnn.rst +++ b/docs/source/models/faster_rcnn.rst @@ -3,14 +3,15 @@ Faster R-CNN .. currentmodule:: torchvision.models.detection -The Mask R-CNN model is based on the `Mask R-CNN `__ +The Faster R-CNN model is based on the `Faster R-CNN: Towards Real-Time Object Detection +with Region Proposal Networks `__ paper. Model builders -------------- -The following model builders can be used to instantiate a Mask R-CNN model, with or +The following model builders can be used to instantiate a Faster R-CNN model, with or without pre-trained weights. All the model builders internally rely on the ``torchvision.models.detection.faster_rcnn.FasterRCNN`` base class. Please refer to the `source code @@ -22,6 +23,7 @@ more details about this class. :template: function.rst fasterrcnn_resnet50_fpn + fasterrcnn_resnet50_fpn_v2 fasterrcnn_mobilenet_v3_large_fpn fasterrcnn_mobilenet_v3_large_320_fpn diff --git a/docs/source/models_new.rst b/docs/source/models_new.rst index 2d5fe86e082..067fbae68f0 100644 --- a/docs/source/models_new.rst +++ b/docs/source/models_new.rst @@ -41,7 +41,6 @@ weights: models/densenet models/efficientnet models/efficientnetv2 - models/faster_rcnn models/googlenet models/mobilenetv2 models/regnet @@ -94,6 +93,7 @@ weights: .. toctree:: :maxdepth: 1 + models/faster_rcnn models/fcos models/mask_rcnn models/retinanet diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index 8a225fa8910..abaea194732 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -565,21 +565,34 @@ def fasterrcnn_resnet50_fpn_v2( **kwargs: Any, ) -> FasterRCNN: """ - Constructs an improved Faster R-CNN model with a ResNet-50-FPN backbone. + Constructs an improved Faster R-CNN model with a ResNet-50-FPN backbone from `Benchmarking Detection + Transfer Learning with Vision Transformers `__ paper. - Reference: `"Benchmarking Detection Transfer Learning with Vision Transformers" - `_. - - :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more details. + It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See + :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more + details. Args: - weights (FasterRCNN_ResNet50_FPN_V2_Weights, optional): The pretrained weights for the model - progress (bool): If True, displays a progress bar of the download to stderr + weights (:class:`~torchvision.models.detection.FasterRCNN_ResNet50_FPN_V2_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.detection.FasterRCNN_ResNet50_FPN_V2_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. num_classes (int, optional): number of output classes of the model (including the background) - weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone - trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block. - Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is - passed (the default) this value is set to 3. + weights_backbone (:class:`~torchvision.models.ResNet50_Weights`, optional): The + pretrained weights for the backbone. + trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from + final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are + trainable. If ``None`` is passed (the default) this value is set to 3. + **kwargs: parameters passed to the ``torchvision.models.detection.faster_rcnn.FasterRCNN`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.detection.FasterRCNN_ResNet50_FPN_V2_Weights + :members: """ weights = FasterRCNN_ResNet50_FPN_V2_Weights.verify(weights) weights_backbone = ResNet50_Weights.verify(weights_backbone) @@ -670,9 +683,7 @@ def fasterrcnn_mobilenet_v3_large_320_fpn( **kwargs: Any, ) -> FasterRCNN: """ - Faster R-CNN model with a MobileNetV3-Large backbone from the `Faster R-CNN: Towards Real-Time Object - Detection with Region Proposal Networks `__ - paper. + Low resolution Faster R-CNN model with a MobileNetV3-Large backbone tunned for mobile use cases. It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more From 94a9d6172817a46741d6ba67a44beb7fb5cf8bef Mon Sep 17 00:00:00 2001 From: Abhijit Deo <72816663+abhi-glitchhg@users.noreply.github.com> Date: Sat, 30 Apr 2022 19:23:36 +0530 Subject: [PATCH 3/4] Update torchvision/models/detection/faster_rcnn.py Co-authored-by: Vasilis Vryniotis --- torchvision/models/detection/faster_rcnn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index abaea194732..13a1bb60026 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -708,7 +708,7 @@ def fasterrcnn_mobilenet_v3_large_320_fpn( weights_backbone (:class:`~torchvision.models.MobileNet_V3_Large_Weights`, optional): The pretrained weights for the backbone. trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from - final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are + final block. Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. If ``None`` is passed (the default) this value is set to 3. **kwargs: parameters passed to the ``torchvision.models.detection.faster_rcnn.FasterRCNN`` base class. Please refer to the `source code From e6c6b29093f7130285f21d11c74f9e85fcc0162e Mon Sep 17 00:00:00 2001 From: Abhijit Deo <72816663+abhi-glitchhg@users.noreply.github.com> Date: Sat, 30 Apr 2022 19:23:46 +0530 Subject: [PATCH 4/4] Update torchvision/models/detection/faster_rcnn.py Co-authored-by: Vasilis Vryniotis --- torchvision/models/detection/faster_rcnn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index 13a1bb60026..9a0ce20410e 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -778,7 +778,7 @@ def fasterrcnn_mobilenet_v3_large_fpn( weights_backbone (:class:`~torchvision.models.MobileNet_V3_Large_Weights`, optional): The pretrained weights for the backbone. trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from - final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are + final block. Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. If ``None`` is passed (the default) this value is set to 3. **kwargs: parameters passed to the ``torchvision.models.detection.faster_rcnn.FasterRCNN`` base class. Please refer to the `source code