From 5e5a68ccea52daee89bc2c564d3dc24a87ab35b9 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 18 Oct 2021 16:25:35 +0100 Subject: [PATCH 01/13] Move FCN methods to itsown package. --- torchvision/models/segmentation/_utils.py | 9 ++ torchvision/models/segmentation/fcn.py | 83 ++++++++++++++++++- .../models/segmentation/segmentation.py | 43 +--------- 3 files changed, 91 insertions(+), 44 deletions(-) diff --git a/torchvision/models/segmentation/_utils.py b/torchvision/models/segmentation/_utils.py index 0e9a9477838..1286482dde5 100644 --- a/torchvision/models/segmentation/_utils.py +++ b/torchvision/models/segmentation/_utils.py @@ -4,6 +4,8 @@ from torch import nn, Tensor from torch.nn import functional as F +from ..._internally_replaced_utils import load_state_dict_from_url + class _SimpleSegmentationModel(nn.Module): __constants__ = ["aux_classifier"] @@ -32,3 +34,10 @@ def forward(self, x: Tensor) -> Dict[str, Tensor]: result["aux"] = x return result + + +def _load_weights(arch: str, model: nn.Module, model_url: Optional[str], progress: bool) -> None: + if model_url is None: + raise ValueError("No checkpoint is available for {}".format(arch)) + state_dict = load_state_dict_from_url(model_url, progress=progress) + model.load_state_dict(state_dict) diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index 6a935e9ac48..2e9851b6714 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -1,9 +1,19 @@ +from typing import Any, Optional + from torch import nn -from ._utils import _SimpleSegmentationModel +from .. import resnet +from ..feature_extraction import create_feature_extractor +from ._utils import _SimpleSegmentationModel, _load_weights + + +__all__ = ["FCN", "fcn_resnet50", "fcn_resnet101"] -__all__ = ["FCN"] +model_urls = { + "fcn_resnet50_coco": "https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth", + "fcn_resnet101_coco": "https://download.pytorch.org/models/fcn_resnet101_coco-7ecb50ca.pth", +} class FCN(_SimpleSegmentationModel): @@ -35,3 +45,72 @@ def __init__(self, in_channels: int, channels: int) -> None: ] super(FCNHead, self).__init__(*layers) + + +def _fcn_resnet( + backbone_name: str, + pretrained: bool, + progress: bool, + num_classes: int, + aux: Optional[bool], + pretrained_backbone: bool = True +) -> nn.Module: + if pretrained: + aux = True + pretrained_backbone = False + + backbone = resnet.__dict__[backbone_name]( + pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True] + ) + + return_layers = {"layer4": "out"} + if aux: + return_layers["layer3"] = "aux" + backbone = create_feature_extractor(backbone, return_layers) + + aux_classifier = FCNHead(1024, num_classes) if aux else None + classifier = FCNHead(2048, num_classes) + model = FCN(backbone, classifier, aux_classifier) + + if pretrained: + arch = "fcn_" + backbone_name + "_coco" + _load_weights(arch, model, model_urls.get(arch, None), progress) + return model + + +def fcn_resnet50( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 21, + aux_loss: Optional[bool] = None, + **kwargs: Any, +) -> nn.Module: + """Constructs a Fully-Convolutional Network model with a ResNet-50 backbone. + + Args: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 which + contains the same classes as Pascal VOC + progress (bool): If True, displays a progress bar of the download to stderr + num_classes (int): number of output classes of the model (including the background) + aux_loss (bool): If True, it uses an auxiliary loss + """ + return _fcn_resnet("resnet50", pretrained, progress, num_classes, aux_loss, **kwargs) + + +def fcn_resnet101( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 21, + aux_loss: Optional[bool] = None, + **kwargs: Any, +) -> nn.Module: + """Constructs a Fully-Convolutional Network model with a ResNet-101 backbone. + + Args: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 which + contains the same classes as Pascal VOC + progress (bool): If True, displays a progress bar of the download to stderr + num_classes (int): number of output classes of the model (including the background) + aux_loss (bool): If True, it uses an auxiliary loss + """ + return _fcn_resnet("resnet101", pretrained, progress, num_classes, aux_loss, **kwargs) diff --git a/torchvision/models/segmentation/segmentation.py b/torchvision/models/segmentation/segmentation.py index c19e36e4705..375558fe420 100644 --- a/torchvision/models/segmentation/segmentation.py +++ b/torchvision/models/segmentation/segmentation.py @@ -7,7 +7,7 @@ from .. import resnet from ..feature_extraction import create_feature_extractor from .deeplabv3 import DeepLabHead, DeepLabV3 -from .fcn import FCN, FCNHead +from .fcn import FCNHead, fcn_resnet50, fcn_resnet101 from .lraspp import LRASPP @@ -22,8 +22,6 @@ model_urls = { - "fcn_resnet50_coco": "https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth", - "fcn_resnet101_coco": "https://download.pytorch.org/models/fcn_resnet101_coco-7ecb50ca.pth", "deeplabv3_resnet50_coco": "https://download.pytorch.org/models/deeplabv3_resnet50_coco-cd0a2569.pth", "deeplabv3_resnet101_coco": "https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth", "deeplabv3_mobilenet_v3_large_coco": "https://download.pytorch.org/models/deeplabv3_mobilenet_v3_large-fc3c493d.pth", @@ -68,7 +66,6 @@ def _segm_model( model_map = { "deeplabv3": (DeepLabHead, DeepLabV3), - "fcn": (FCNHead, FCN), } classifier = model_map[name][0](out_inplanes, num_classes) base_model = model_map[name][1] @@ -122,44 +119,6 @@ def _segm_lraspp_mobilenetv3(backbone_name: str, num_classes: int, pretrained_ba return model -def fcn_resnet50( - pretrained: bool = False, - progress: bool = True, - num_classes: int = 21, - aux_loss: Optional[bool] = None, - **kwargs: Any, -) -> nn.Module: - """Constructs a Fully-Convolutional Network model with a ResNet-50 backbone. - - Args: - pretrained (bool): If True, returns a model pre-trained on COCO train2017 which - contains the same classes as Pascal VOC - progress (bool): If True, displays a progress bar of the download to stderr - num_classes (int): number of output classes of the model (including the background) - aux_loss (bool): If True, it uses an auxiliary loss - """ - return _load_model("fcn", "resnet50", pretrained, progress, num_classes, aux_loss, **kwargs) - - -def fcn_resnet101( - pretrained: bool = False, - progress: bool = True, - num_classes: int = 21, - aux_loss: Optional[bool] = None, - **kwargs: Any, -) -> nn.Module: - """Constructs a Fully-Convolutional Network model with a ResNet-101 backbone. - - Args: - pretrained (bool): If True, returns a model pre-trained on COCO train2017 which - contains the same classes as Pascal VOC - progress (bool): If True, displays a progress bar of the download to stderr - num_classes (int): number of output classes of the model (including the background) - aux_loss (bool): If True, it uses an auxiliary loss - """ - return _load_model("fcn", "resnet101", pretrained, progress, num_classes, aux_loss, **kwargs) - - def deeplabv3_resnet50( pretrained: bool = False, progress: bool = True, From 93cdcf67ca549ccf3438b9c4d93548cbd7d93d03 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 18 Oct 2021 16:30:25 +0100 Subject: [PATCH 02/13] Fix lint. --- torchvision/models/segmentation/fcn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index 2e9851b6714..5055622927b 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -53,7 +53,7 @@ def _fcn_resnet( progress: bool, num_classes: int, aux: Optional[bool], - pretrained_backbone: bool = True + pretrained_backbone: bool = True, ) -> nn.Module: if pretrained: aux = True From 7213a9d1fb980def65ccbca2f041a2791517f3e3 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 18 Oct 2021 17:52:25 +0100 Subject: [PATCH 03/13] Move LRASPP methods to their own package. --- torchvision/models/segmentation/lraspp.py | 56 ++++++++++++++++++- .../models/segmentation/segmentation.py | 45 +-------------- 2 files changed, 55 insertions(+), 46 deletions(-) diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py index 654e2811315..209ce075a99 100644 --- a/torchvision/models/segmentation/lraspp.py +++ b/torchvision/models/segmentation/lraspp.py @@ -1,11 +1,20 @@ from collections import OrderedDict -from typing import Dict +from typing import Any, Dict from torch import nn, Tensor from torch.nn import functional as F +from .. import mobilenetv3 +from ..feature_extraction import create_feature_extractor +from ._utils import _load_weights -__all__ = ["LRASPP"] + +__all__ = ["LRASPP", "lraspp_mobilenet_v3_large"] + + +model_urls = { + "lraspp_mobilenet_v3_large_coco": "https://download.pytorch.org/models/lraspp_mobilenet_v3_large-d234d4ea.pth", +} class LRASPP(nn.Module): @@ -68,3 +77,46 @@ def forward(self, input: Dict[str, Tensor]) -> Tensor: x = F.interpolate(x, size=low.shape[-2:], mode="bilinear", align_corners=False) return self.low_classifier(low) + self.high_classifier(x) + + +def _lraspp_mobilenetv3( + backbone_name: str, pretrained: bool, progress: bool, num_classes: int, pretrained_backbone: bool = True +) -> LRASPP: + if pretrained: + pretrained_backbone = False + + backbone = mobilenetv3.__dict__[backbone_name](pretrained=pretrained_backbone, dilated=True).features + + # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. + # The first and last blocks are always included because they are the C0 (conv1) and Cn. + stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1] + low_pos = stage_indices[-4] # use C2 here which has output_stride = 8 + high_pos = stage_indices[-1] # use C5 which has output_stride = 16 + low_channels = backbone[low_pos].out_channels + high_channels = backbone[high_pos].out_channels + + backbone = create_feature_extractor(backbone, {str(low_pos): "low", str(high_pos): "high"}) + + model = LRASPP(backbone, low_channels, high_channels, num_classes) + + if pretrained: + arch = "lraspp_" + backbone_name + "_coco" + _load_weights(arch, model, model_urls.get(arch, None), progress) + return model + + +def lraspp_mobilenet_v3_large( + pretrained: bool = False, progress: bool = True, num_classes: int = 21, **kwargs: Any +) -> nn.Module: + """Constructs a Lite R-ASPP Network model with a MobileNetV3-Large backbone. + + Args: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 which + contains the same classes as Pascal VOC + progress (bool): If True, displays a progress bar of the download to stderr + num_classes (int): number of output classes of the model (including the background) + """ + if kwargs.pop("aux_loss", False): + raise NotImplementedError("This model does not use auxiliary loss") + + return _lraspp_mobilenetv3("mobilenet_v3_large", pretrained, progress, num_classes, **kwargs) diff --git a/torchvision/models/segmentation/segmentation.py b/torchvision/models/segmentation/segmentation.py index 375558fe420..6d5eeb75adb 100644 --- a/torchvision/models/segmentation/segmentation.py +++ b/torchvision/models/segmentation/segmentation.py @@ -8,7 +8,7 @@ from ..feature_extraction import create_feature_extractor from .deeplabv3 import DeepLabHead, DeepLabV3 from .fcn import FCNHead, fcn_resnet50, fcn_resnet101 -from .lraspp import LRASPP +from .lraspp import lraspp_mobilenet_v3_large __all__ = [ @@ -25,7 +25,6 @@ "deeplabv3_resnet50_coco": "https://download.pytorch.org/models/deeplabv3_resnet50_coco-cd0a2569.pth", "deeplabv3_resnet101_coco": "https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth", "deeplabv3_mobilenet_v3_large_coco": "https://download.pytorch.org/models/deeplabv3_mobilenet_v3_large-fc3c493d.pth", - "lraspp_mobilenet_v3_large_coco": "https://download.pytorch.org/models/lraspp_mobilenet_v3_large-d234d4ea.pth", } @@ -102,23 +101,6 @@ def _load_weights(model: nn.Module, arch_type: str, backbone: str, progress: boo model.load_state_dict(state_dict) -def _segm_lraspp_mobilenetv3(backbone_name: str, num_classes: int, pretrained_backbone: bool = True) -> LRASPP: - backbone = mobilenetv3.__dict__[backbone_name](pretrained=pretrained_backbone, dilated=True).features - - # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. - # The first and last blocks are always included because they are the C0 (conv1) and Cn. - stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1] - low_pos = stage_indices[-4] # use C2 here which has output_stride = 8 - high_pos = stage_indices[-1] # use C5 which has output_stride = 16 - low_channels = backbone[low_pos].out_channels - high_channels = backbone[high_pos].out_channels - - backbone = create_feature_extractor(backbone, {str(low_pos): "low", str(high_pos): "high"}) - - model = LRASPP(backbone, low_channels, high_channels, num_classes) - return model - - def deeplabv3_resnet50( pretrained: bool = False, progress: bool = True, @@ -174,28 +156,3 @@ def deeplabv3_mobilenet_v3_large( aux_loss (bool): If True, it uses an auxiliary loss """ return _load_model("deeplabv3", "mobilenet_v3_large", pretrained, progress, num_classes, aux_loss, **kwargs) - - -def lraspp_mobilenet_v3_large( - pretrained: bool = False, progress: bool = True, num_classes: int = 21, **kwargs: Any -) -> nn.Module: - """Constructs a Lite R-ASPP Network model with a MobileNetV3-Large backbone. - - Args: - pretrained (bool): If True, returns a model pre-trained on COCO train2017 which - contains the same classes as Pascal VOC - progress (bool): If True, displays a progress bar of the download to stderr - num_classes (int): number of output classes of the model (including the background) - """ - if kwargs.pop("aux_loss", False): - raise NotImplementedError("This model does not use auxiliary loss") - - backbone_name = "mobilenet_v3_large" - if pretrained: - kwargs["pretrained_backbone"] = False - model = _segm_lraspp_mobilenetv3(backbone_name, num_classes, **kwargs) - - if pretrained: - _load_weights(model, "lraspp", backbone_name, progress) - - return model From bee91a14e2a33599fc24f46aff8321b51b430354 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 18 Oct 2021 18:18:24 +0100 Subject: [PATCH 04/13] Move DeepLabV3 methods to their own package. --- torchvision/models/segmentation/__init__.py | 1 - torchvision/models/segmentation/deeplabv3.py | 127 +++++++++++++- torchvision/models/segmentation/fcn.py | 7 +- torchvision/models/segmentation/lraspp.py | 4 +- .../models/segmentation/segmentation.py | 161 +----------------- 5 files changed, 131 insertions(+), 169 deletions(-) diff --git a/torchvision/models/segmentation/__init__.py b/torchvision/models/segmentation/__init__.py index fb6633d7fb5..1765502d693 100644 --- a/torchvision/models/segmentation/__init__.py +++ b/torchvision/models/segmentation/__init__.py @@ -1,4 +1,3 @@ -from .segmentation import * from .fcn import * from .deeplabv3 import * from .lraspp import * diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index a8f06bd89bd..f57ea5db101 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -1,13 +1,29 @@ -from typing import List +from typing import Any, List, Optional import torch from torch import nn from torch.nn import functional as F -from ._utils import _SimpleSegmentationModel +from .. import mobilenetv3 +from .. import resnet +from ..feature_extraction import create_feature_extractor +from ._utils import _SimpleSegmentationModel, _load_weights +from .fcn import FCNHead -__all__ = ["DeepLabV3"] +__all__ = [ + "DeepLabV3", + "deeplabv3_resnet50", + "deeplabv3_resnet101", + "deeplabv3_mobilenet_v3_large", +] + + +model_urls = { + "deeplabv3_resnet50_coco": "https://download.pytorch.org/models/deeplabv3_resnet50_coco-cd0a2569.pth", + "deeplabv3_resnet101_coco": "https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth", + "deeplabv3_mobilenet_v3_large_coco": "https://download.pytorch.org/models/deeplabv3_mobilenet_v3_large-fc3c493d.pth", +} class DeepLabV3(_SimpleSegmentationModel): @@ -95,3 +111,108 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: _res.append(conv(x)) res = torch.cat(_res, dim=1) return self.project(res) + + +def _deeplabv3( + backbone_name: str, + pretrained: bool, + progress: bool, + num_classes: int, + aux: Optional[bool], + pretrained_backbone: bool = True, +) -> DeepLabV3: + if pretrained: + aux = True + pretrained_backbone = False + + if "resnet" in backbone_name: + backbone = resnet.__dict__[backbone_name]( + pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True] + ) + out_layer = "layer4" + out_inplanes = 2048 + aux_layer = "layer3" + aux_inplanes = 1024 + elif "mobilenet_v3" in backbone_name: + backbone = mobilenetv3.__dict__[backbone_name](pretrained=pretrained_backbone, dilated=True).features + # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. + # The first and last blocks are always included because they are the C0 (conv1) and Cn. + stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1] + out_pos = stage_indices[-1] # use C5 which has output_stride = 16 + out_layer = str(out_pos) + out_inplanes = backbone[out_pos].out_channels + aux_pos = stage_indices[-4] # use C2 here which has output_stride = 8 + aux_layer = str(aux_pos) + aux_inplanes = backbone[aux_pos].out_channels + else: + raise NotImplementedError("backbone {} is not supported as of now".format(backbone_name)) + return_layers = {out_layer: "out"} + if aux: + return_layers[aux_layer] = "aux" + backbone = create_feature_extractor(backbone, return_layers) + + aux_classifier = FCNHead(aux_inplanes, num_classes) if aux else None + classifier = DeepLabHead(out_inplanes, num_classes) + model = DeepLabV3(backbone, classifier, aux_classifier) + + if pretrained: + arch = "deeplabv3_" + backbone_name + "_coco" + _load_weights(arch, model, model_urls.get(arch, None), progress) + return model + + +def deeplabv3_resnet50( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 21, + aux_loss: Optional[bool] = None, + **kwargs: Any, +) -> DeepLabV3: + """Constructs a DeepLabV3 model with a ResNet-50 backbone. + + Args: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 which + contains the same classes as Pascal VOC + progress (bool): If True, displays a progress bar of the download to stderr + num_classes (int): number of output classes of the model (including the background) + aux_loss (bool): If True, it uses an auxiliary loss + """ + return _deeplabv3("resnet50", pretrained, progress, num_classes, aux_loss, **kwargs) + + +def deeplabv3_resnet101( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 21, + aux_loss: Optional[bool] = None, + **kwargs: Any, +) -> DeepLabV3: + """Constructs a DeepLabV3 model with a ResNet-101 backbone. + + Args: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 which + contains the same classes as Pascal VOC + progress (bool): If True, displays a progress bar of the download to stderr + num_classes (int): The number of classes + aux_loss (bool): If True, include an auxiliary classifier + """ + return _deeplabv3("resnet101", pretrained, progress, num_classes, aux_loss, **kwargs) + + +def deeplabv3_mobilenet_v3_large( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 21, + aux_loss: Optional[bool] = None, + **kwargs: Any, +) -> DeepLabV3: + """Constructs a DeepLabV3 model with a MobileNetV3-Large backbone. + + Args: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 which + contains the same classes as Pascal VOC + progress (bool): If True, displays a progress bar of the download to stderr + num_classes (int): number of output classes of the model (including the background) + aux_loss (bool): If True, it uses an auxiliary loss + """ + return _deeplabv3("mobilenet_v3_large", pretrained, progress, num_classes, aux_loss, **kwargs) diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index 5055622927b..50cf699d4e1 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -54,7 +54,7 @@ def _fcn_resnet( num_classes: int, aux: Optional[bool], pretrained_backbone: bool = True, -) -> nn.Module: +) -> FCN: if pretrained: aux = True pretrained_backbone = False @@ -62,7 +62,6 @@ def _fcn_resnet( backbone = resnet.__dict__[backbone_name]( pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True] ) - return_layers = {"layer4": "out"} if aux: return_layers["layer3"] = "aux" @@ -84,7 +83,7 @@ def fcn_resnet50( num_classes: int = 21, aux_loss: Optional[bool] = None, **kwargs: Any, -) -> nn.Module: +) -> FCN: """Constructs a Fully-Convolutional Network model with a ResNet-50 backbone. Args: @@ -103,7 +102,7 @@ def fcn_resnet101( num_classes: int = 21, aux_loss: Optional[bool] = None, **kwargs: Any, -) -> nn.Module: +) -> FCN: """Constructs a Fully-Convolutional Network model with a ResNet-101 backbone. Args: diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py index 209ce075a99..54659a4a2cf 100644 --- a/torchvision/models/segmentation/lraspp.py +++ b/torchvision/models/segmentation/lraspp.py @@ -86,7 +86,6 @@ def _lraspp_mobilenetv3( pretrained_backbone = False backbone = mobilenetv3.__dict__[backbone_name](pretrained=pretrained_backbone, dilated=True).features - # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. # The first and last blocks are always included because they are the C0 (conv1) and Cn. stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1] @@ -94,7 +93,6 @@ def _lraspp_mobilenetv3( high_pos = stage_indices[-1] # use C5 which has output_stride = 16 low_channels = backbone[low_pos].out_channels high_channels = backbone[high_pos].out_channels - backbone = create_feature_extractor(backbone, {str(low_pos): "low", str(high_pos): "high"}) model = LRASPP(backbone, low_channels, high_channels, num_classes) @@ -107,7 +105,7 @@ def _lraspp_mobilenetv3( def lraspp_mobilenet_v3_large( pretrained: bool = False, progress: bool = True, num_classes: int = 21, **kwargs: Any -) -> nn.Module: +) -> LRASPP: """Constructs a Lite R-ASPP Network model with a MobileNetV3-Large backbone. Args: diff --git a/torchvision/models/segmentation/segmentation.py b/torchvision/models/segmentation/segmentation.py index 6d5eeb75adb..1765502d693 100644 --- a/torchvision/models/segmentation/segmentation.py +++ b/torchvision/models/segmentation/segmentation.py @@ -1,158 +1,3 @@ -from typing import Any, Optional - -from torch import nn - -from ..._internally_replaced_utils import load_state_dict_from_url -from .. import mobilenetv3 -from .. import resnet -from ..feature_extraction import create_feature_extractor -from .deeplabv3 import DeepLabHead, DeepLabV3 -from .fcn import FCNHead, fcn_resnet50, fcn_resnet101 -from .lraspp import lraspp_mobilenet_v3_large - - -__all__ = [ - "fcn_resnet50", - "fcn_resnet101", - "deeplabv3_resnet50", - "deeplabv3_resnet101", - "deeplabv3_mobilenet_v3_large", - "lraspp_mobilenet_v3_large", -] - - -model_urls = { - "deeplabv3_resnet50_coco": "https://download.pytorch.org/models/deeplabv3_resnet50_coco-cd0a2569.pth", - "deeplabv3_resnet101_coco": "https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth", - "deeplabv3_mobilenet_v3_large_coco": "https://download.pytorch.org/models/deeplabv3_mobilenet_v3_large-fc3c493d.pth", -} - - -def _segm_model( - name: str, backbone_name: str, num_classes: int, aux: Optional[bool], pretrained_backbone: bool = True -) -> nn.Module: - if "resnet" in backbone_name: - backbone = resnet.__dict__[backbone_name]( - pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True] - ) - out_layer = "layer4" - out_inplanes = 2048 - aux_layer = "layer3" - aux_inplanes = 1024 - elif "mobilenet_v3" in backbone_name: - backbone = mobilenetv3.__dict__[backbone_name](pretrained=pretrained_backbone, dilated=True).features - - # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. - # The first and last blocks are always included because they are the C0 (conv1) and Cn. - stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1] - out_pos = stage_indices[-1] # use C5 which has output_stride = 16 - out_layer = str(out_pos) - out_inplanes = backbone[out_pos].out_channels - aux_pos = stage_indices[-4] # use C2 here which has output_stride = 8 - aux_layer = str(aux_pos) - aux_inplanes = backbone[aux_pos].out_channels - else: - raise NotImplementedError("backbone {} is not supported as of now".format(backbone_name)) - - return_layers = {out_layer: "out"} - if aux: - return_layers[aux_layer] = "aux" - backbone = create_feature_extractor(backbone, return_layers) - - aux_classifier = None - if aux: - aux_classifier = FCNHead(aux_inplanes, num_classes) - - model_map = { - "deeplabv3": (DeepLabHead, DeepLabV3), - } - classifier = model_map[name][0](out_inplanes, num_classes) - base_model = model_map[name][1] - - model = base_model(backbone, classifier, aux_classifier) - return model - - -def _load_model( - arch_type: str, - backbone: str, - pretrained: bool, - progress: bool, - num_classes: int, - aux_loss: Optional[bool], - **kwargs: Any, -) -> nn.Module: - if pretrained: - aux_loss = True - kwargs["pretrained_backbone"] = False - model = _segm_model(arch_type, backbone, num_classes, aux_loss, **kwargs) - if pretrained: - _load_weights(model, arch_type, backbone, progress) - return model - - -def _load_weights(model: nn.Module, arch_type: str, backbone: str, progress: bool) -> None: - arch = arch_type + "_" + backbone + "_coco" - model_url = model_urls.get(arch, None) - if model_url is None: - raise NotImplementedError("pretrained {} is not supported as of now".format(arch)) - else: - state_dict = load_state_dict_from_url(model_url, progress=progress) - model.load_state_dict(state_dict) - - -def deeplabv3_resnet50( - pretrained: bool = False, - progress: bool = True, - num_classes: int = 21, - aux_loss: Optional[bool] = None, - **kwargs: Any, -) -> nn.Module: - """Constructs a DeepLabV3 model with a ResNet-50 backbone. - - Args: - pretrained (bool): If True, returns a model pre-trained on COCO train2017 which - contains the same classes as Pascal VOC - progress (bool): If True, displays a progress bar of the download to stderr - num_classes (int): number of output classes of the model (including the background) - aux_loss (bool): If True, it uses an auxiliary loss - """ - return _load_model("deeplabv3", "resnet50", pretrained, progress, num_classes, aux_loss, **kwargs) - - -def deeplabv3_resnet101( - pretrained: bool = False, - progress: bool = True, - num_classes: int = 21, - aux_loss: Optional[bool] = None, - **kwargs: Any, -) -> nn.Module: - """Constructs a DeepLabV3 model with a ResNet-101 backbone. - - Args: - pretrained (bool): If True, returns a model pre-trained on COCO train2017 which - contains the same classes as Pascal VOC - progress (bool): If True, displays a progress bar of the download to stderr - num_classes (int): The number of classes - aux_loss (bool): If True, include an auxiliary classifier - """ - return _load_model("deeplabv3", "resnet101", pretrained, progress, num_classes, aux_loss, **kwargs) - - -def deeplabv3_mobilenet_v3_large( - pretrained: bool = False, - progress: bool = True, - num_classes: int = 21, - aux_loss: Optional[bool] = None, - **kwargs: Any, -) -> nn.Module: - """Constructs a DeepLabV3 model with a MobileNetV3-Large backbone. - - Args: - pretrained (bool): If True, returns a model pre-trained on COCO train2017 which - contains the same classes as Pascal VOC - progress (bool): If True, displays a progress bar of the download to stderr - num_classes (int): number of output classes of the model (including the background) - aux_loss (bool): If True, it uses an auxiliary loss - """ - return _load_model("deeplabv3", "mobilenet_v3_large", pretrained, progress, num_classes, aux_loss, **kwargs) +from .fcn import * +from .deeplabv3 import * +from .lraspp import * From c4c5ac439a1dd4be460e28935f14f000a6792322 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 18 Oct 2021 18:49:59 +0100 Subject: [PATCH 05/13] Adding deprecation warning for torchvision.models.segmentation.segmentation. --- torchvision/models/segmentation/segmentation.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/torchvision/models/segmentation/segmentation.py b/torchvision/models/segmentation/segmentation.py index 1765502d693..2051ec24f64 100644 --- a/torchvision/models/segmentation/segmentation.py +++ b/torchvision/models/segmentation/segmentation.py @@ -1,3 +1,9 @@ -from .fcn import * -from .deeplabv3 import * -from .lraspp import * +import warnings + +from . import * # noqa: F401, F403 + + +warnings.warn( + "The 'torchvision.models.segmentation.segmentation' module is deprecated. Please use directly the parent module " + "instead." +) From c341ed3a88977d3f61cd7f26d83fc1a61fabdd84 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 18 Oct 2021 19:17:14 +0100 Subject: [PATCH 06/13] Refactoring deeplab. --- torchvision/models/segmentation/deeplabv3.py | 71 ++++++++++++-------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index f57ea5db101..ba00d850334 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -113,7 +113,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return self.project(res) -def _deeplabv3( +def _deeplabv3_resnet( backbone_name: str, pretrained: bool, progress: bool, @@ -125,30 +125,47 @@ def _deeplabv3( aux = True pretrained_backbone = False - if "resnet" in backbone_name: - backbone = resnet.__dict__[backbone_name]( - pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True] - ) - out_layer = "layer4" - out_inplanes = 2048 - aux_layer = "layer3" - aux_inplanes = 1024 - elif "mobilenet_v3" in backbone_name: - backbone = mobilenetv3.__dict__[backbone_name](pretrained=pretrained_backbone, dilated=True).features - # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. - # The first and last blocks are always included because they are the C0 (conv1) and Cn. - stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1] - out_pos = stage_indices[-1] # use C5 which has output_stride = 16 - out_layer = str(out_pos) - out_inplanes = backbone[out_pos].out_channels - aux_pos = stage_indices[-4] # use C2 here which has output_stride = 8 - aux_layer = str(aux_pos) - aux_inplanes = backbone[aux_pos].out_channels - else: - raise NotImplementedError("backbone {} is not supported as of now".format(backbone_name)) - return_layers = {out_layer: "out"} + backbone = resnet.__dict__[backbone_name]( + pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True] + ) + return_layers = {"layer4": "out"} + if aux: + return_layers["layer3"] = "aux" + backbone = create_feature_extractor(backbone, return_layers) + + aux_classifier = FCNHead(1024, num_classes) if aux else None + classifier = DeepLabHead(2048, num_classes) + model = DeepLabV3(backbone, classifier, aux_classifier) + + if pretrained: + arch = "deeplabv3_" + backbone_name + "_coco" + _load_weights(arch, model, model_urls.get(arch, None), progress) + return model + + +def _deeplabv3_mobilenetv3( + backbone_name: str, + pretrained: bool, + progress: bool, + num_classes: int, + aux: Optional[bool], + pretrained_backbone: bool = True, +) -> DeepLabV3: + if pretrained: + aux = True + pretrained_backbone = False + + backbone = mobilenetv3.__dict__[backbone_name](pretrained=pretrained_backbone, dilated=True).features + # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. + # The first and last blocks are always included because they are the C0 (conv1) and Cn. + stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1] + out_pos = stage_indices[-1] # use C5 which has output_stride = 16 + out_inplanes = backbone[out_pos].out_channels + aux_pos = stage_indices[-4] # use C2 here which has output_stride = 8 + aux_inplanes = backbone[aux_pos].out_channels + return_layers = {str(out_pos): "out"} if aux: - return_layers[aux_layer] = "aux" + return_layers[str(aux_pos)] = "aux" backbone = create_feature_extractor(backbone, return_layers) aux_classifier = FCNHead(aux_inplanes, num_classes) if aux else None @@ -177,7 +194,7 @@ def deeplabv3_resnet50( num_classes (int): number of output classes of the model (including the background) aux_loss (bool): If True, it uses an auxiliary loss """ - return _deeplabv3("resnet50", pretrained, progress, num_classes, aux_loss, **kwargs) + return _deeplabv3_resnet("resnet50", pretrained, progress, num_classes, aux_loss, **kwargs) def deeplabv3_resnet101( @@ -196,7 +213,7 @@ def deeplabv3_resnet101( num_classes (int): The number of classes aux_loss (bool): If True, include an auxiliary classifier """ - return _deeplabv3("resnet101", pretrained, progress, num_classes, aux_loss, **kwargs) + return _deeplabv3_resnet("resnet101", pretrained, progress, num_classes, aux_loss, **kwargs) def deeplabv3_mobilenet_v3_large( @@ -215,4 +232,4 @@ def deeplabv3_mobilenet_v3_large( num_classes (int): number of output classes of the model (including the background) aux_loss (bool): If True, it uses an auxiliary loss """ - return _deeplabv3("mobilenet_v3_large", pretrained, progress, num_classes, aux_loss, **kwargs) + return _deeplabv3_mobilenetv3("mobilenet_v3_large", pretrained, progress, num_classes, aux_loss, **kwargs) From 43c677e5f959202ef7b5cd7777d1b65d84c6469a Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 18 Oct 2021 19:20:31 +0100 Subject: [PATCH 07/13] Setting aux default to false. --- torchvision/models/segmentation/deeplabv3.py | 10 +++++----- torchvision/models/segmentation/fcn.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index ba00d850334..60a27677bb0 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -118,7 +118,7 @@ def _deeplabv3_resnet( pretrained: bool, progress: bool, num_classes: int, - aux: Optional[bool], + aux: bool, pretrained_backbone: bool = True, ) -> DeepLabV3: if pretrained: @@ -148,7 +148,7 @@ def _deeplabv3_mobilenetv3( pretrained: bool, progress: bool, num_classes: int, - aux: Optional[bool], + aux: bool, pretrained_backbone: bool = True, ) -> DeepLabV3: if pretrained: @@ -182,7 +182,7 @@ def deeplabv3_resnet50( pretrained: bool = False, progress: bool = True, num_classes: int = 21, - aux_loss: Optional[bool] = None, + aux_loss: bool = False, **kwargs: Any, ) -> DeepLabV3: """Constructs a DeepLabV3 model with a ResNet-50 backbone. @@ -201,7 +201,7 @@ def deeplabv3_resnet101( pretrained: bool = False, progress: bool = True, num_classes: int = 21, - aux_loss: Optional[bool] = None, + aux_loss: bool = False, **kwargs: Any, ) -> DeepLabV3: """Constructs a DeepLabV3 model with a ResNet-101 backbone. @@ -220,7 +220,7 @@ def deeplabv3_mobilenet_v3_large( pretrained: bool = False, progress: bool = True, num_classes: int = 21, - aux_loss: Optional[bool] = None, + aux_loss: bool = False, **kwargs: Any, ) -> DeepLabV3: """Constructs a DeepLabV3 model with a MobileNetV3-Large backbone. diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index 50cf699d4e1..3a74922b1c7 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -52,7 +52,7 @@ def _fcn_resnet( pretrained: bool, progress: bool, num_classes: int, - aux: Optional[bool], + aux: bool, pretrained_backbone: bool = True, ) -> FCN: if pretrained: @@ -81,7 +81,7 @@ def fcn_resnet50( pretrained: bool = False, progress: bool = True, num_classes: int = 21, - aux_loss: Optional[bool] = None, + aux_loss: bool = False, **kwargs: Any, ) -> FCN: """Constructs a Fully-Convolutional Network model with a ResNet-50 backbone. @@ -100,7 +100,7 @@ def fcn_resnet101( pretrained: bool = False, progress: bool = True, num_classes: int = 21, - aux_loss: Optional[bool] = None, + aux_loss: bool = False, **kwargs: Any, ) -> FCN: """Constructs a Fully-Convolutional Network model with a ResNet-101 backbone. From f534046e445963075a1ba56e9778a73c482b205a Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 18 Oct 2021 19:32:02 +0100 Subject: [PATCH 08/13] Fixing imports. --- torchvision/models/segmentation/deeplabv3.py | 2 +- torchvision/models/segmentation/fcn.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index 60a27677bb0..ae0104a50c1 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -1,4 +1,4 @@ -from typing import Any, List, Optional +from typing import Any, List import torch from torch import nn diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index 3a74922b1c7..42e54ab9552 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -1,4 +1,4 @@ -from typing import Any, Optional +from typing import Any from torch import nn From de1d2ad3b3f033d84fc385a36456364fa2be7eca Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 18 Oct 2021 20:00:36 +0100 Subject: [PATCH 09/13] Passing backbones instead of backbone names to builders. --- torchvision/models/segmentation/deeplabv3.py | 83 +++++++++++--------- torchvision/models/segmentation/fcn.py | 51 ++++++------ torchvision/models/segmentation/lraspp.py | 32 ++++---- 3 files changed, 86 insertions(+), 80 deletions(-) diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index ae0104a50c1..d5887d0c149 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -1,4 +1,4 @@ -from typing import Any, List +from typing import List import torch from torch import nn @@ -114,20 +114,10 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: def _deeplabv3_resnet( - backbone_name: str, - pretrained: bool, - progress: bool, + backbone: resnet.ResNet, num_classes: int, aux: bool, - pretrained_backbone: bool = True, ) -> DeepLabV3: - if pretrained: - aux = True - pretrained_backbone = False - - backbone = resnet.__dict__[backbone_name]( - pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True] - ) return_layers = {"layer4": "out"} if aux: return_layers["layer3"] = "aux" @@ -135,27 +125,14 @@ def _deeplabv3_resnet( aux_classifier = FCNHead(1024, num_classes) if aux else None classifier = DeepLabHead(2048, num_classes) - model = DeepLabV3(backbone, classifier, aux_classifier) - - if pretrained: - arch = "deeplabv3_" + backbone_name + "_coco" - _load_weights(arch, model, model_urls.get(arch, None), progress) - return model + return DeepLabV3(backbone, classifier, aux_classifier) def _deeplabv3_mobilenetv3( - backbone_name: str, - pretrained: bool, - progress: bool, + backbone: mobilenetv3.MobileNetV3, num_classes: int, aux: bool, - pretrained_backbone: bool = True, ) -> DeepLabV3: - if pretrained: - aux = True - pretrained_backbone = False - - backbone = mobilenetv3.__dict__[backbone_name](pretrained=pretrained_backbone, dilated=True).features # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. # The first and last blocks are always included because they are the C0 (conv1) and Cn. stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1] @@ -170,12 +147,7 @@ def _deeplabv3_mobilenetv3( aux_classifier = FCNHead(aux_inplanes, num_classes) if aux else None classifier = DeepLabHead(out_inplanes, num_classes) - model = DeepLabV3(backbone, classifier, aux_classifier) - - if pretrained: - arch = "deeplabv3_" + backbone_name + "_coco" - _load_weights(arch, model, model_urls.get(arch, None), progress) - return model + return DeepLabV3(backbone, classifier, aux_classifier) def deeplabv3_resnet50( @@ -183,7 +155,7 @@ def deeplabv3_resnet50( progress: bool = True, num_classes: int = 21, aux_loss: bool = False, - **kwargs: Any, + pretrained_backbone: bool = True, ) -> DeepLabV3: """Constructs a DeepLabV3 model with a ResNet-50 backbone. @@ -193,8 +165,19 @@ def deeplabv3_resnet50( progress (bool): If True, displays a progress bar of the download to stderr num_classes (int): number of output classes of the model (including the background) aux_loss (bool): If True, it uses an auxiliary loss + pretrained_backbone (bool): If True, the backbone will be pre-trained. """ - return _deeplabv3_resnet("resnet50", pretrained, progress, num_classes, aux_loss, **kwargs) + if pretrained: + aux_loss = True + pretrained_backbone = False + + backbone = resnet.resnet50(pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True]) + model = _deeplabv3_resnet(backbone, num_classes, aux_loss) + + if pretrained: + arch = "deeplabv3_resnet50_coco" + _load_weights(arch, model, model_urls.get(arch, None), progress) + return model def deeplabv3_resnet101( @@ -202,7 +185,7 @@ def deeplabv3_resnet101( progress: bool = True, num_classes: int = 21, aux_loss: bool = False, - **kwargs: Any, + pretrained_backbone: bool = True, ) -> DeepLabV3: """Constructs a DeepLabV3 model with a ResNet-101 backbone. @@ -212,8 +195,19 @@ def deeplabv3_resnet101( progress (bool): If True, displays a progress bar of the download to stderr num_classes (int): The number of classes aux_loss (bool): If True, include an auxiliary classifier + pretrained_backbone (bool): If True, the backbone will be pre-trained. """ - return _deeplabv3_resnet("resnet101", pretrained, progress, num_classes, aux_loss, **kwargs) + if pretrained: + aux_loss = True + pretrained_backbone = False + + backbone = resnet.resnet101(pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True]) + model = _deeplabv3_resnet(backbone, num_classes, aux_loss) + + if pretrained: + arch = "deeplabv3_resnet101_coco" + _load_weights(arch, model, model_urls.get(arch, None), progress) + return model def deeplabv3_mobilenet_v3_large( @@ -221,7 +215,7 @@ def deeplabv3_mobilenet_v3_large( progress: bool = True, num_classes: int = 21, aux_loss: bool = False, - **kwargs: Any, + pretrained_backbone: bool = True, ) -> DeepLabV3: """Constructs a DeepLabV3 model with a MobileNetV3-Large backbone. @@ -231,5 +225,16 @@ def deeplabv3_mobilenet_v3_large( progress (bool): If True, displays a progress bar of the download to stderr num_classes (int): number of output classes of the model (including the background) aux_loss (bool): If True, it uses an auxiliary loss + pretrained_backbone (bool): If True, the backbone will be pre-trained. """ - return _deeplabv3_mobilenetv3("mobilenet_v3_large", pretrained, progress, num_classes, aux_loss, **kwargs) + if pretrained: + aux_loss = True + pretrained_backbone = False + + backbone = mobilenetv3.mobilenet_v3_large(pretrained=pretrained_backbone, dilated=True).features + model = _deeplabv3_mobilenetv3(backbone, num_classes, aux_loss) + + if pretrained: + arch = "deeplabv3_mobilenet_v3_large_coco" + _load_weights(arch, model, model_urls.get(arch, None), progress) + return model diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index 42e54ab9552..b021707e494 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -1,5 +1,3 @@ -from typing import Any - from torch import nn from .. import resnet @@ -48,20 +46,10 @@ def __init__(self, in_channels: int, channels: int) -> None: def _fcn_resnet( - backbone_name: str, - pretrained: bool, - progress: bool, + backbone: resnet.ResNet, num_classes: int, aux: bool, - pretrained_backbone: bool = True, ) -> FCN: - if pretrained: - aux = True - pretrained_backbone = False - - backbone = resnet.__dict__[backbone_name]( - pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True] - ) return_layers = {"layer4": "out"} if aux: return_layers["layer3"] = "aux" @@ -69,12 +57,7 @@ def _fcn_resnet( aux_classifier = FCNHead(1024, num_classes) if aux else None classifier = FCNHead(2048, num_classes) - model = FCN(backbone, classifier, aux_classifier) - - if pretrained: - arch = "fcn_" + backbone_name + "_coco" - _load_weights(arch, model, model_urls.get(arch, None), progress) - return model + return FCN(backbone, classifier, aux_classifier) def fcn_resnet50( @@ -82,7 +65,7 @@ def fcn_resnet50( progress: bool = True, num_classes: int = 21, aux_loss: bool = False, - **kwargs: Any, + pretrained_backbone: bool = True, ) -> FCN: """Constructs a Fully-Convolutional Network model with a ResNet-50 backbone. @@ -92,8 +75,19 @@ def fcn_resnet50( progress (bool): If True, displays a progress bar of the download to stderr num_classes (int): number of output classes of the model (including the background) aux_loss (bool): If True, it uses an auxiliary loss + pretrained_backbone (bool): If True, the backbone will be pre-trained. """ - return _fcn_resnet("resnet50", pretrained, progress, num_classes, aux_loss, **kwargs) + if pretrained: + aux_loss = True + pretrained_backbone = False + + backbone = resnet.resnet50(pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True]) + model = _fcn_resnet(backbone, num_classes, aux_loss) + + if pretrained: + arch = "fcn_resnet50_coco" + _load_weights(arch, model, model_urls.get(arch, None), progress) + return model def fcn_resnet101( @@ -101,7 +95,7 @@ def fcn_resnet101( progress: bool = True, num_classes: int = 21, aux_loss: bool = False, - **kwargs: Any, + pretrained_backbone: bool = True, ) -> FCN: """Constructs a Fully-Convolutional Network model with a ResNet-101 backbone. @@ -111,5 +105,16 @@ def fcn_resnet101( progress (bool): If True, displays a progress bar of the download to stderr num_classes (int): number of output classes of the model (including the background) aux_loss (bool): If True, it uses an auxiliary loss + pretrained_backbone (bool): If True, the backbone will be pre-trained. """ - return _fcn_resnet("resnet101", pretrained, progress, num_classes, aux_loss, **kwargs) + if pretrained: + aux_loss = True + pretrained_backbone = False + + backbone = resnet.resnet101(pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True]) + model = _fcn_resnet(backbone, num_classes, aux_loss) + + if pretrained: + arch = "fcn_resnet101_coco" + _load_weights(arch, model, model_urls.get(arch, None), progress) + return model diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py index 54659a4a2cf..e640c9ba9b0 100644 --- a/torchvision/models/segmentation/lraspp.py +++ b/torchvision/models/segmentation/lraspp.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from typing import Any, Dict +from typing import Dict from torch import nn, Tensor from torch.nn import functional as F @@ -79,13 +79,7 @@ def forward(self, input: Dict[str, Tensor]) -> Tensor: return self.low_classifier(low) + self.high_classifier(x) -def _lraspp_mobilenetv3( - backbone_name: str, pretrained: bool, progress: bool, num_classes: int, pretrained_backbone: bool = True -) -> LRASPP: - if pretrained: - pretrained_backbone = False - - backbone = mobilenetv3.__dict__[backbone_name](pretrained=pretrained_backbone, dilated=True).features +def _lraspp_mobilenetv3(backbone: mobilenetv3.MobileNetV3, num_classes: int) -> LRASPP: # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. # The first and last blocks are always included because they are the C0 (conv1) and Cn. stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1] @@ -95,16 +89,11 @@ def _lraspp_mobilenetv3( high_channels = backbone[high_pos].out_channels backbone = create_feature_extractor(backbone, {str(low_pos): "low", str(high_pos): "high"}) - model = LRASPP(backbone, low_channels, high_channels, num_classes) - - if pretrained: - arch = "lraspp_" + backbone_name + "_coco" - _load_weights(arch, model, model_urls.get(arch, None), progress) - return model + return LRASPP(backbone, low_channels, high_channels, num_classes) def lraspp_mobilenet_v3_large( - pretrained: bool = False, progress: bool = True, num_classes: int = 21, **kwargs: Any + pretrained: bool = False, progress: bool = True, num_classes: int = 21, pretrained_backbone: bool = True ) -> LRASPP: """Constructs a Lite R-ASPP Network model with a MobileNetV3-Large backbone. @@ -113,8 +102,15 @@ def lraspp_mobilenet_v3_large( contains the same classes as Pascal VOC progress (bool): If True, displays a progress bar of the download to stderr num_classes (int): number of output classes of the model (including the background) + pretrained_backbone (bool): If True, the backbone will be pre-trained. """ - if kwargs.pop("aux_loss", False): - raise NotImplementedError("This model does not use auxiliary loss") + if pretrained: + pretrained_backbone = False + + backbone = mobilenetv3.mobilenet_v3_large(pretrained=pretrained_backbone, dilated=True).features + model = _lraspp_mobilenetv3(backbone, num_classes) - return _lraspp_mobilenetv3("mobilenet_v3_large", pretrained, progress, num_classes, **kwargs) + if pretrained: + arch = "lraspp_mobilenet_v3_large_coco" + _load_weights(arch, model, model_urls.get(arch, None), progress) + return model From ede7980c517c364d10768b0ee45e2c6ef8eb5f94 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 18 Oct 2021 20:37:34 +0100 Subject: [PATCH 10/13] Fixing mypy --- torchvision/models/segmentation/deeplabv3.py | 3 ++- torchvision/models/segmentation/lraspp.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index d5887d0c149..7565ca3c229 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -133,6 +133,7 @@ def _deeplabv3_mobilenetv3( num_classes: int, aux: bool, ) -> DeepLabV3: + backbone = backbone.features # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. # The first and last blocks are always included because they are the C0 (conv1) and Cn. stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1] @@ -231,7 +232,7 @@ def deeplabv3_mobilenet_v3_large( aux_loss = True pretrained_backbone = False - backbone = mobilenetv3.mobilenet_v3_large(pretrained=pretrained_backbone, dilated=True).features + backbone = mobilenetv3.mobilenet_v3_large(pretrained=pretrained_backbone, dilated=True) model = _deeplabv3_mobilenetv3(backbone, num_classes, aux_loss) if pretrained: diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py index e640c9ba9b0..a58225ed1e5 100644 --- a/torchvision/models/segmentation/lraspp.py +++ b/torchvision/models/segmentation/lraspp.py @@ -80,6 +80,7 @@ def forward(self, input: Dict[str, Tensor]) -> Tensor: def _lraspp_mobilenetv3(backbone: mobilenetv3.MobileNetV3, num_classes: int) -> LRASPP: + backbone = backbone.features # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. # The first and last blocks are always included because they are the C0 (conv1) and Cn. stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1] @@ -107,7 +108,7 @@ def lraspp_mobilenet_v3_large( if pretrained: pretrained_backbone = False - backbone = mobilenetv3.mobilenet_v3_large(pretrained=pretrained_backbone, dilated=True).features + backbone = mobilenetv3.mobilenet_v3_large(pretrained=pretrained_backbone, dilated=True) model = _lraspp_mobilenetv3(backbone, num_classes) if pretrained: From ad34b8957729297f0028541c498980e69ec94ad0 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 19 Oct 2021 10:06:35 +0100 Subject: [PATCH 11/13] Addressing review comments. --- torchvision/models/segmentation/deeplabv3.py | 16 ++++++++-------- torchvision/models/segmentation/fcn.py | 12 +++++++----- torchvision/models/segmentation/segmentation.py | 1 + 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index 7565ca3c229..5b14ba08adb 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional import torch from torch import nn @@ -131,7 +131,7 @@ def _deeplabv3_resnet( def _deeplabv3_mobilenetv3( backbone: mobilenetv3.MobileNetV3, num_classes: int, - aux: bool, + aux: Optional[bool], ) -> DeepLabV3: backbone = backbone.features # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. @@ -155,7 +155,7 @@ def deeplabv3_resnet50( pretrained: bool = False, progress: bool = True, num_classes: int = 21, - aux_loss: bool = False, + aux_loss: Optional[bool] = None, pretrained_backbone: bool = True, ) -> DeepLabV3: """Constructs a DeepLabV3 model with a ResNet-50 backbone. @@ -165,7 +165,7 @@ def deeplabv3_resnet50( contains the same classes as Pascal VOC progress (bool): If True, displays a progress bar of the download to stderr num_classes (int): number of output classes of the model (including the background) - aux_loss (bool): If True, it uses an auxiliary loss + aux_loss (bool, optional): If True, it uses an auxiliary loss pretrained_backbone (bool): If True, the backbone will be pre-trained. """ if pretrained: @@ -185,7 +185,7 @@ def deeplabv3_resnet101( pretrained: bool = False, progress: bool = True, num_classes: int = 21, - aux_loss: bool = False, + aux_loss: Optional[bool] = None, pretrained_backbone: bool = True, ) -> DeepLabV3: """Constructs a DeepLabV3 model with a ResNet-101 backbone. @@ -195,7 +195,7 @@ def deeplabv3_resnet101( contains the same classes as Pascal VOC progress (bool): If True, displays a progress bar of the download to stderr num_classes (int): The number of classes - aux_loss (bool): If True, include an auxiliary classifier + aux_loss (bool, optional): If True, include an auxiliary classifier pretrained_backbone (bool): If True, the backbone will be pre-trained. """ if pretrained: @@ -215,7 +215,7 @@ def deeplabv3_mobilenet_v3_large( pretrained: bool = False, progress: bool = True, num_classes: int = 21, - aux_loss: bool = False, + aux_loss: Optional[bool] = None, pretrained_backbone: bool = True, ) -> DeepLabV3: """Constructs a DeepLabV3 model with a MobileNetV3-Large backbone. @@ -225,7 +225,7 @@ def deeplabv3_mobilenet_v3_large( contains the same classes as Pascal VOC progress (bool): If True, displays a progress bar of the download to stderr num_classes (int): number of output classes of the model (including the background) - aux_loss (bool): If True, it uses an auxiliary loss + aux_loss (bool, optional): If True, it uses an auxiliary loss pretrained_backbone (bool): If True, the backbone will be pre-trained. """ if pretrained: diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index b021707e494..fe226be2ce1 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -1,3 +1,5 @@ +from typing import Optional + from torch import nn from .. import resnet @@ -48,7 +50,7 @@ def __init__(self, in_channels: int, channels: int) -> None: def _fcn_resnet( backbone: resnet.ResNet, num_classes: int, - aux: bool, + aux: Optional[bool], ) -> FCN: return_layers = {"layer4": "out"} if aux: @@ -64,7 +66,7 @@ def fcn_resnet50( pretrained: bool = False, progress: bool = True, num_classes: int = 21, - aux_loss: bool = False, + aux_loss: Optional[bool] = None, pretrained_backbone: bool = True, ) -> FCN: """Constructs a Fully-Convolutional Network model with a ResNet-50 backbone. @@ -74,7 +76,7 @@ def fcn_resnet50( contains the same classes as Pascal VOC progress (bool): If True, displays a progress bar of the download to stderr num_classes (int): number of output classes of the model (including the background) - aux_loss (bool): If True, it uses an auxiliary loss + aux_loss (bool, optional): If True, it uses an auxiliary loss pretrained_backbone (bool): If True, the backbone will be pre-trained. """ if pretrained: @@ -94,7 +96,7 @@ def fcn_resnet101( pretrained: bool = False, progress: bool = True, num_classes: int = 21, - aux_loss: bool = False, + aux_loss: Optional[bool] = None, pretrained_backbone: bool = True, ) -> FCN: """Constructs a Fully-Convolutional Network model with a ResNet-101 backbone. @@ -104,7 +106,7 @@ def fcn_resnet101( contains the same classes as Pascal VOC progress (bool): If True, displays a progress bar of the download to stderr num_classes (int): number of output classes of the model (including the background) - aux_loss (bool): If True, it uses an auxiliary loss + aux_loss (bool, optional): If True, it uses an auxiliary loss pretrained_backbone (bool): If True, the backbone will be pre-trained. """ if pretrained: diff --git a/torchvision/models/segmentation/segmentation.py b/torchvision/models/segmentation/segmentation.py index 2051ec24f64..1c1d56f487c 100644 --- a/torchvision/models/segmentation/segmentation.py +++ b/torchvision/models/segmentation/segmentation.py @@ -1,5 +1,6 @@ import warnings +# Import all methods/classes for BC: from . import * # noqa: F401, F403 From 7a96c4066c22cdf05e52f76d7ff73e42ea737747 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 19 Oct 2021 10:31:09 +0100 Subject: [PATCH 12/13] Correcting typing. --- torchvision/models/segmentation/deeplabv3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index 5b14ba08adb..3b2a1d12d9a 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -116,7 +116,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: def _deeplabv3_resnet( backbone: resnet.ResNet, num_classes: int, - aux: bool, + aux: Optional[bool], ) -> DeepLabV3: return_layers = {"layer4": "out"} if aux: From 7fef7733187f92387bc94a09383363c03c6fe615 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 19 Oct 2021 12:35:12 +0100 Subject: [PATCH 13/13] Restoring special handling for references. --- torchvision/models/segmentation/lraspp.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py index a58225ed1e5..df4b21e2ee9 100644 --- a/torchvision/models/segmentation/lraspp.py +++ b/torchvision/models/segmentation/lraspp.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from typing import Dict +from typing import Any, Dict from torch import nn, Tensor from torch.nn import functional as F @@ -94,7 +94,11 @@ def _lraspp_mobilenetv3(backbone: mobilenetv3.MobileNetV3, num_classes: int) -> def lraspp_mobilenet_v3_large( - pretrained: bool = False, progress: bool = True, num_classes: int = 21, pretrained_backbone: bool = True + pretrained: bool = False, + progress: bool = True, + num_classes: int = 21, + pretrained_backbone: bool = True, + **kwargs: Any, ) -> LRASPP: """Constructs a Lite R-ASPP Network model with a MobileNetV3-Large backbone. @@ -105,6 +109,8 @@ def lraspp_mobilenet_v3_large( num_classes (int): number of output classes of the model (including the background) pretrained_backbone (bool): If True, the backbone will be pre-trained. """ + if kwargs.pop("aux_loss", False): + raise NotImplementedError("This model does not use auxiliary loss") if pretrained: pretrained_backbone = False