diff --git a/test/test_models.py b/test/test_models.py index 7e1f0fb3b12..d97dd906232 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -325,7 +325,16 @@ def test_fasterrcnn_double(self): self.assertTrue("labels" in out[0]) def test_googlenet_eval(self): - m = torch.jit.script(models.googlenet(pretrained=True).eval()) + # replacement for models.googlenet(pretrained=True) that does not download weights + kwargs = {} + kwargs['transform_input'] = True + kwargs['aux_logits'] = True + kwargs['init_weights'] = False + model = models.GoogLeNet(**kwargs) + model.aux_logits = False + model.aux1 = None + model.aux2 = None + m = torch.jit.script(model.eval()) self.checkModule(m, "googlenet", torch.rand(1, 3, 224, 224)) @unittest.skipIf(not torch.cuda.is_available(), 'needs GPU') diff --git a/test/test_models_detection_utils.py b/test/test_models_detection_utils.py index 9abef94ce9f..bfb26f24eae 100644 --- a/test/test_models_detection_utils.py +++ b/test/test_models_detection_utils.py @@ -3,7 +3,7 @@ from torchvision.models.detection import _utils from torchvision.models.detection.transform import GeneralizedRCNNTransform import unittest -from torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn, keypointrcnn_resnet50_fpn +from torchvision.models.detection import backbone_utils class Tester(unittest.TestCase): @@ -20,50 +20,34 @@ def test_balanced_positive_negative_sampler(self): self.assertEqual(neg[0].sum(), 3) self.assertEqual(neg[0][0:6].sum(), 3) - def test_fasterrcnn_resnet50_fpn_frozen_layers(self): + def test_resnet_fpn_backbone_frozen_layers(self): # we know how many initial layers and parameters of the network should - # be frozen for each trainable_backbone_layers paramter value + # be frozen for each trainable_backbone_layers parameter value # i.e all 53 params are frozen if trainable_backbone_layers=0 # ad first 24 params are frozen if trainable_backbone_layers=2 expected_frozen_params = {0: 53, 1: 43, 2: 24, 3: 11, 4: 1, 5: 0} for train_layers, exp_froz_params in expected_frozen_params.items(): - model = fasterrcnn_resnet50_fpn(pretrained=True, progress=False, - num_classes=91, pretrained_backbone=False, - trainable_backbone_layers=train_layers) + model = backbone_utils.resnet_fpn_backbone( + 'resnet50', pretrained=False, trainable_layers=train_layers) # boolean list that is true if the param at that index is frozen is_frozen = [not parameter.requires_grad for _, parameter in model.named_parameters()] # check that expected initial number of layers are frozen self.assertTrue(all(is_frozen[:exp_froz_params])) - def test_maskrcnn_resnet50_fpn_frozen_layers(self): - # we know how many initial layers and parameters of the maskrcnn should - # be frozen for each trainable_backbone_layers paramter value - # i.e all 53 params are frozen if trainable_backbone_layers=0 - # ad first 24 params are frozen if trainable_backbone_layers=2 - expected_frozen_params = {0: 53, 1: 43, 2: 24, 3: 11, 4: 1, 5: 0} - for train_layers, exp_froz_params in expected_frozen_params.items(): - model = maskrcnn_resnet50_fpn(pretrained=True, progress=False, - num_classes=91, pretrained_backbone=False, - trainable_backbone_layers=train_layers) - # boolean list that is true if the parameter at that index is frozen - is_frozen = [not parameter.requires_grad for _, parameter in model.named_parameters()] - # check that expected initial number of layers in maskrcnn are frozen - self.assertTrue(all(is_frozen[:exp_froz_params])) - - def test_keypointrcnn_resnet50_fpn_frozen_layers(self): - # we know how many initial layers and parameters of the keypointrcnn should - # be frozen for each trainable_backbone_layers paramter value - # i.e all 53 params are frozen if trainable_backbone_layers=0 - # ad first 24 params are frozen if trainable_backbone_layers=2 - expected_frozen_params = {0: 53, 1: 43, 2: 24, 3: 11, 4: 1, 5: 0} - for train_layers, exp_froz_params in expected_frozen_params.items(): - model = keypointrcnn_resnet50_fpn(pretrained=True, progress=False, - num_classes=2, pretrained_backbone=False, - trainable_backbone_layers=train_layers) - # boolean list that is true if the parameter at that index is frozen - is_frozen = [not parameter.requires_grad for _, parameter in model.named_parameters()] - # check that expected initial number of layers in keypointrcnn are frozen - self.assertTrue(all(is_frozen[:exp_froz_params])) + def test_validate_resnet_inputs_detection(self): + # default number of backbone layers to train + ret = backbone_utils._validate_resnet_trainable_layers( + pretrained=True, trainable_backbone_layers=None) + self.assertEqual(ret, 3) + # can't go beyond 5 + with self.assertRaises(AssertionError): + ret = backbone_utils._validate_resnet_trainable_layers( + pretrained=True, trainable_backbone_layers=6) + # if not pretrained, should use all trainable layers and warn + with self.assertWarns(UserWarning): + ret = backbone_utils._validate_resnet_trainable_layers( + pretrained=False, trainable_backbone_layers=0) + self.assertEqual(ret, 5) def test_transform_copy_targets(self): transform = GeneralizedRCNNTransform(300, 500, torch.zeros(3), torch.ones(3)) diff --git a/torchvision/models/detection/backbone_utils.py b/torchvision/models/detection/backbone_utils.py index c0527e544b3..9e01a765b49 100644 --- a/torchvision/models/detection/backbone_utils.py +++ b/torchvision/models/detection/backbone_utils.py @@ -1,3 +1,4 @@ +import warnings from collections import OrderedDict from torch import nn from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool @@ -105,3 +106,19 @@ def resnet_fpn_backbone( in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers] out_channels = 256 return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks) + + +def _validate_resnet_trainable_layers(pretrained, trainable_backbone_layers): + # dont freeze any layers if pretrained model or backbone is not used + if not pretrained: + if trainable_backbone_layers is not None: + warnings.warn( + "Changing trainable_backbone_layers has not effect if " + "neither pretrained nor pretrained_backbone have been set to True, " + "falling back to trainable_backbone_layers=5 so that all layers are trainable") + trainable_backbone_layers = 5 + # by default, freeze first 2 blocks following Faster R-CNN + if trainable_backbone_layers is None: + trainable_backbone_layers = 3 + assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0 + return trainable_backbone_layers diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index 117d985eca6..39cbb8bf88c 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -14,7 +14,7 @@ from .rpn import RPNHead, RegionProposalNetwork from .roi_heads import RoIHeads from .transform import GeneralizedRCNNTransform -from .backbone_utils import resnet_fpn_backbone +from .backbone_utils import resnet_fpn_backbone, _validate_resnet_trainable_layers __all__ = [ @@ -290,7 +290,7 @@ def forward(self, x): def fasterrcnn_resnet50_fpn(pretrained=False, progress=True, - num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, **kwargs): + num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-50-FPN backbone. @@ -348,10 +348,10 @@ def fasterrcnn_resnet50_fpn(pretrained=False, progress=True, trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. """ - assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0 - # dont freeze any layers if pretrained model or backbone is not used - if not (pretrained or pretrained_backbone): - trainable_backbone_layers = 5 + # check default parameters and by default set it to 3 if possible + trainable_backbone_layers = _validate_resnet_trainable_layers( + pretrained or pretrained_backbone, trainable_backbone_layers) + if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False diff --git a/torchvision/models/detection/keypoint_rcnn.py b/torchvision/models/detection/keypoint_rcnn.py index f1f4ad26680..a2a7f3227dd 100644 --- a/torchvision/models/detection/keypoint_rcnn.py +++ b/torchvision/models/detection/keypoint_rcnn.py @@ -6,7 +6,7 @@ from ..utils import load_state_dict_from_url from .faster_rcnn import FasterRCNN -from .backbone_utils import resnet_fpn_backbone +from .backbone_utils import resnet_fpn_backbone, _validate_resnet_trainable_layers __all__ = [ @@ -267,7 +267,7 @@ def forward(self, x): def keypointrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=2, num_keypoints=17, - pretrained_backbone=True, trainable_backbone_layers=3, **kwargs): + pretrained_backbone=True, trainable_backbone_layers=None, **kwargs): """ Constructs a Keypoint R-CNN model with a ResNet-50-FPN backbone. @@ -316,10 +316,10 @@ def keypointrcnn_resnet50_fpn(pretrained=False, progress=True, trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. """ - assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0 - # dont freeze any layers if pretrained model or backbone is not used - if not (pretrained or pretrained_backbone): - trainable_backbone_layers = 5 + # check default parameters and by default set it to 3 if possible + trainable_backbone_layers = _validate_resnet_trainable_layers( + pretrained or pretrained_backbone, trainable_backbone_layers) + if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False diff --git a/torchvision/models/detection/mask_rcnn.py b/torchvision/models/detection/mask_rcnn.py index 668d8ab8122..9e2734841a6 100644 --- a/torchvision/models/detection/mask_rcnn.py +++ b/torchvision/models/detection/mask_rcnn.py @@ -10,7 +10,7 @@ from ..utils import load_state_dict_from_url from .faster_rcnn import FasterRCNN -from .backbone_utils import resnet_fpn_backbone +from .backbone_utils import resnet_fpn_backbone, _validate_resnet_trainable_layers __all__ = [ "MaskRCNN", "maskrcnn_resnet50_fpn", @@ -265,7 +265,7 @@ def __init__(self, in_channels, dim_reduced, num_classes): def maskrcnn_resnet50_fpn(pretrained=False, progress=True, - num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, **kwargs): + num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs): """ Constructs a Mask R-CNN model with a ResNet-50-FPN backbone. @@ -315,10 +315,10 @@ def maskrcnn_resnet50_fpn(pretrained=False, progress=True, trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. """ - assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0 - # dont freeze any layers if pretrained model or backbone is not used - if not (pretrained or pretrained_backbone): - trainable_backbone_layers = 5 + # check default parameters and by default set it to 3 if possible + trainable_backbone_layers = _validate_resnet_trainable_layers( + pretrained or pretrained_backbone, trainable_backbone_layers) + if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False