From fd0eccbf6880f0ce42103472c8298af0078e1ed4 Mon Sep 17 00:00:00 2001 From: puhuk Date: Mon, 9 May 2022 23:59:49 +0900 Subject: [PATCH 1/9] To resolve issue #5964 Add note for resnet architecture --- docs/source/models/resnet.rst | 6 ++++++ torchvision/models/resnet.py | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/docs/source/models/resnet.rst b/docs/source/models/resnet.rst index 7976eb437e5..dfa5508907b 100644 --- a/docs/source/models/resnet.rst +++ b/docs/source/models/resnet.rst @@ -6,6 +6,12 @@ ResNet The ResNet model is based on the `Deep Residual Learning for Image Recognition `_ paper. +.. note:: + Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + while original implementation places the stride at the first 1x1 convolution(self.conv1) + according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + This variant is also known as ResNet V1.5 and improves accuracy according to + https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. Model builders -------------- diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py index bc5d952368e..8f0035f6da3 100644 --- a/torchvision/models/resnet.py +++ b/torchvision/models/resnet.py @@ -575,6 +575,13 @@ class Wide_ResNet101_2_Weights(WeightsEnum): def resnet18(*, weights: Optional[ResNet18_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet: """ResNet-18 from `Deep Residual Learning for Image Recognition `__. + .. note:: + Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + while original implementation places the stride at the first 1x1 convolution(self.conv1) + according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + This variant is also known as ResNet V1.5 and improves accuracy according to + https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + Args: weights (:class:`~torchvision.models.ResNet18_Weights`, optional): The pretrained weights to use. See From 1a563c25d48993c08b90b1652946668197251f12 Mon Sep 17 00:00:00 2001 From: puhuk Date: Tue, 10 May 2022 22:38:57 +0900 Subject: [PATCH 2/9] Update resnet.py --- torchvision/models/resnet.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py index 8f0035f6da3..e0f81a07f9b 100644 --- a/torchvision/models/resnet.py +++ b/torchvision/models/resnet.py @@ -575,13 +575,6 @@ class Wide_ResNet101_2_Weights(WeightsEnum): def resnet18(*, weights: Optional[ResNet18_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet: """ResNet-18 from `Deep Residual Learning for Image Recognition `__. - .. note:: - Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) - while original implementation places the stride at the first 1x1 convolution(self.conv1) - according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. - This variant is also known as ResNet V1.5 and improves accuracy according to - https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. - Args: weights (:class:`~torchvision.models.ResNet18_Weights`, optional): The pretrained weights to use. See @@ -631,6 +624,13 @@ def resnet34(*, weights: Optional[ResNet34_Weights] = None, progress: bool = Tru @handle_legacy_interface(weights=("pretrained", ResNet50_Weights.IMAGENET1K_V1)) def resnet50(*, weights: Optional[ResNet50_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet: """ResNet-50 from `Deep Residual Learning for Image Recognition `__. + + .. note:: + Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + while original implementation places the stride at the first 1x1 convolution(self.conv1) + according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + This variant is also known as ResNet V1.5 and improves accuracy according to + https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. Args: weights (:class:`~torchvision.models.ResNet50_Weights`, optional): The @@ -656,6 +656,13 @@ def resnet50(*, weights: Optional[ResNet50_Weights] = None, progress: bool = Tru @handle_legacy_interface(weights=("pretrained", ResNet101_Weights.IMAGENET1K_V1)) def resnet101(*, weights: Optional[ResNet101_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet: """ResNet-101 from `Deep Residual Learning for Image Recognition `__. + + .. note:: + Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + while original implementation places the stride at the first 1x1 convolution(self.conv1) + according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + This variant is also known as ResNet V1.5 and improves accuracy according to + https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. Args: weights (:class:`~torchvision.models.ResNet101_Weights`, optional): The @@ -681,6 +688,13 @@ def resnet101(*, weights: Optional[ResNet101_Weights] = None, progress: bool = T @handle_legacy_interface(weights=("pretrained", ResNet152_Weights.IMAGENET1K_V1)) def resnet152(*, weights: Optional[ResNet152_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet: """ResNet-152 from `Deep Residual Learning for Image Recognition `__. + + .. note:: + Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + while original implementation places the stride at the first 1x1 convolution(self.conv1) + according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + This variant is also known as ResNet V1.5 and improves accuracy according to + https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. Args: weights (:class:`~torchvision.models.ResNet152_Weights`, optional): The From a7f666ddace78779f588a63e8954cc1f480a93dd Mon Sep 17 00:00:00 2001 From: puhuk Date: Tue, 10 May 2022 23:05:41 +0900 Subject: [PATCH 3/9] Update resnet.py --- torchvision/models/resnet.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py index e0f81a07f9b..da8d2e16fca 100644 --- a/torchvision/models/resnet.py +++ b/torchvision/models/resnet.py @@ -624,7 +624,7 @@ def resnet34(*, weights: Optional[ResNet34_Weights] = None, progress: bool = Tru @handle_legacy_interface(weights=("pretrained", ResNet50_Weights.IMAGENET1K_V1)) def resnet50(*, weights: Optional[ResNet50_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet: """ResNet-50 from `Deep Residual Learning for Image Recognition `__. - + .. note:: Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) while original implementation places the stride at the first 1x1 convolution(self.conv1) @@ -656,7 +656,7 @@ def resnet50(*, weights: Optional[ResNet50_Weights] = None, progress: bool = Tru @handle_legacy_interface(weights=("pretrained", ResNet101_Weights.IMAGENET1K_V1)) def resnet101(*, weights: Optional[ResNet101_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet: """ResNet-101 from `Deep Residual Learning for Image Recognition `__. - + .. note:: Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) while original implementation places the stride at the first 1x1 convolution(self.conv1) @@ -688,7 +688,7 @@ def resnet101(*, weights: Optional[ResNet101_Weights] = None, progress: bool = T @handle_legacy_interface(weights=("pretrained", ResNet152_Weights.IMAGENET1K_V1)) def resnet152(*, weights: Optional[ResNet152_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet: """ResNet-152 from `Deep Residual Learning for Image Recognition `__. - + .. note:: Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) while original implementation places the stride at the first 1x1 convolution(self.conv1) From 2e5fed8efd70bdbe99621b1c77c6d459bf95af87 Mon Sep 17 00:00:00 2001 From: puhuk Date: Tue, 10 May 2022 23:07:40 +0900 Subject: [PATCH 4/9] Update resnet.rst --- docs/source/models/resnet.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/models/resnet.rst b/docs/source/models/resnet.rst index dfa5508907b..40818aedba9 100644 --- a/docs/source/models/resnet.rst +++ b/docs/source/models/resnet.rst @@ -9,9 +9,9 @@ The ResNet model is based on the `Deep Residual Learning for Image Recognition .. note:: Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) while original implementation places the stride at the first 1x1 convolution(self.conv1) - according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. - This variant is also known as ResNet V1.5 and improves accuracy according to - https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + according to the paper. + This variant improves the accuracy and it's known as `ResNet V1.5 + `_. Model builders -------------- From 9216f5d3b5bdf60a647b28a311ea967034bd0601 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 20 May 2022 10:50:59 +0100 Subject: [PATCH 5/9] Fix stylings --- docs/source/models/resnet.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/models/resnet.rst b/docs/source/models/resnet.rst index 40818aedba9..aa8e9b80456 100644 --- a/docs/source/models/resnet.rst +++ b/docs/source/models/resnet.rst @@ -7,8 +7,8 @@ The ResNet model is based on the `Deep Residual Learning for Image Recognition `_ paper. .. note:: - Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) - while original implementation places the stride at the first 1x1 convolution(self.conv1) + Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``) + while original implementation places the stride at the first 1x1 convolution (``conv1``) according to the paper. This variant improves the accuracy and it's known as `ResNet V1.5 `_. From b67a61376aa62bb75d6a283a6b228c3f0c80d729 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 20 May 2022 10:52:59 +0100 Subject: [PATCH 6/9] Add the same notes on model builders --- torchvision/models/resnet.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py index c6b8b9894ab..9d4292903d7 100644 --- a/torchvision/models/resnet.py +++ b/torchvision/models/resnet.py @@ -700,11 +700,11 @@ def resnet50(*, weights: Optional[ResNet50_Weights] = None, progress: bool = Tru """ResNet-50 from `Deep Residual Learning for Image Recognition `__. .. note:: - Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) - while original implementation places the stride at the first 1x1 convolution(self.conv1) - according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. - This variant is also known as ResNet V1.5 and improves accuracy according to - https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``) + while original implementation places the stride at the first 1x1 convolution (``conv1``) + according to the paper. + This variant improves the accuracy and it's known as `ResNet V1.5 + `_. Args: weights (:class:`~torchvision.models.ResNet50_Weights`, optional): The @@ -732,11 +732,11 @@ def resnet101(*, weights: Optional[ResNet101_Weights] = None, progress: bool = T """ResNet-101 from `Deep Residual Learning for Image Recognition `__. .. note:: - Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) - while original implementation places the stride at the first 1x1 convolution(self.conv1) - according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. - This variant is also known as ResNet V1.5 and improves accuracy according to - https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``) + while original implementation places the stride at the first 1x1 convolution (``conv1``) + according to the paper. + This variant improves the accuracy and it's known as `ResNet V1.5 + `_. Args: weights (:class:`~torchvision.models.ResNet101_Weights`, optional): The @@ -764,11 +764,11 @@ def resnet152(*, weights: Optional[ResNet152_Weights] = None, progress: bool = T """ResNet-152 from `Deep Residual Learning for Image Recognition `__. .. note:: - Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) - while original implementation places the stride at the first 1x1 convolution(self.conv1) - according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. - This variant is also known as ResNet V1.5 and improves accuracy according to - https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``) + while original implementation places the stride at the first 1x1 convolution (``conv1``) + according to the paper. + This variant improves the accuracy and it's known as `ResNet V1.5 + `_. Args: weights (:class:`~torchvision.models.ResNet152_Weights`, optional): The From f440e2dd09d460ec34c16b0c1652c107e0c70f98 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 20 May 2022 10:58:06 +0100 Subject: [PATCH 7/9] Improve description --- docs/source/models/resnet.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/source/models/resnet.rst b/docs/source/models/resnet.rst index aa8e9b80456..e5b41d192fd 100644 --- a/docs/source/models/resnet.rst +++ b/docs/source/models/resnet.rst @@ -7,10 +7,9 @@ The ResNet model is based on the `Deep Residual Learning for Image Recognition `_ paper. .. note:: - Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``) - while original implementation places the stride at the first 1x1 convolution (``conv1``) - according to the paper. - This variant improves the accuracy and it's known as `ResNet V1.5 + The bottleneck of TorchVision places the stride for downsampling to the second 3x3 + convolution while the original paper places it to the first 1x1 convolution. + This variant improves the accuracy and is known as `ResNet V1.5 `_. Model builders From 15fe780573a26d389e3947409c715fada6834bb2 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 20 May 2022 10:59:44 +0100 Subject: [PATCH 8/9] Apply the change everywhere --- torchvision/models/resnet.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py index 9d4292903d7..93f0d06f67b 100644 --- a/torchvision/models/resnet.py +++ b/torchvision/models/resnet.py @@ -700,10 +700,9 @@ def resnet50(*, weights: Optional[ResNet50_Weights] = None, progress: bool = Tru """ResNet-50 from `Deep Residual Learning for Image Recognition `__. .. note:: - Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``) - while original implementation places the stride at the first 1x1 convolution (``conv1``) - according to the paper. - This variant improves the accuracy and it's known as `ResNet V1.5 + The bottleneck of TorchVision places the stride for downsampling to the second 3x3 + convolution while the original paper places it to the first 1x1 convolution. + This variant improves the accuracy and is known as `ResNet V1.5 `_. Args: @@ -732,10 +731,9 @@ def resnet101(*, weights: Optional[ResNet101_Weights] = None, progress: bool = T """ResNet-101 from `Deep Residual Learning for Image Recognition `__. .. note:: - Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``) - while original implementation places the stride at the first 1x1 convolution (``conv1``) - according to the paper. - This variant improves the accuracy and it's known as `ResNet V1.5 + The bottleneck of TorchVision places the stride for downsampling to the second 3x3 + convolution while the original paper places it to the first 1x1 convolution. + This variant improves the accuracy and is known as `ResNet V1.5 `_. Args: @@ -764,10 +762,9 @@ def resnet152(*, weights: Optional[ResNet152_Weights] = None, progress: bool = T """ResNet-152 from `Deep Residual Learning for Image Recognition `__. .. note:: - Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``) - while original implementation places the stride at the first 1x1 convolution (``conv1``) - according to the paper. - This variant improves the accuracy and it's known as `ResNet V1.5 + The bottleneck of TorchVision places the stride for downsampling to the second 3x3 + convolution while the original paper places it to the first 1x1 convolution. + This variant improves the accuracy and is known as `ResNet V1.5 `_. Args: From ea4481480228d7d8ce5e3b6f2873fb75370ec664 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 20 May 2022 10:59:59 +0100 Subject: [PATCH 9/9] Remove trailing space --- docs/source/models/resnet.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/models/resnet.rst b/docs/source/models/resnet.rst index e5b41d192fd..9d777f2f6b1 100644 --- a/docs/source/models/resnet.rst +++ b/docs/source/models/resnet.rst @@ -9,7 +9,7 @@ The ResNet model is based on the `Deep Residual Learning for Image Recognition .. note:: The bottleneck of TorchVision places the stride for downsampling to the second 3x3 convolution while the original paper places it to the first 1x1 convolution. - This variant improves the accuracy and is known as `ResNet V1.5 + This variant improves the accuracy and is known as `ResNet V1.5 `_. Model builders