From fd0eccbf6880f0ce42103472c8298af0078e1ed4 Mon Sep 17 00:00:00 2001
From: puhuk <wetr235@gmail.com>
Date: Mon, 9 May 2022 23:59:49 +0900
Subject: [PATCH 1/9] To resolve issue #5964

Add note for resnet architecture
---
 docs/source/models/resnet.rst | 6 ++++++
 torchvision/models/resnet.py  | 7 +++++++
 2 files changed, 13 insertions(+)

diff --git a/docs/source/models/resnet.rst b/docs/source/models/resnet.rst
index 7976eb437e5..dfa5508907b 100644
--- a/docs/source/models/resnet.rst
+++ b/docs/source/models/resnet.rst
@@ -6,6 +6,12 @@ ResNet
 The ResNet model is based on the `Deep Residual Learning for Image Recognition
 <https://arxiv.org/abs/1512.03385>`_ paper.
 
+.. note::
+    Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
+    while original implementation places the stride at the first 1x1 convolution(self.conv1)
+    according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
+    This variant is also known as ResNet V1.5 and improves accuracy according to
+    https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
 
 Model builders
 --------------
diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py
index bc5d952368e..8f0035f6da3 100644
--- a/torchvision/models/resnet.py
+++ b/torchvision/models/resnet.py
@@ -575,6 +575,13 @@ class Wide_ResNet101_2_Weights(WeightsEnum):
 def resnet18(*, weights: Optional[ResNet18_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
     """ResNet-18 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
 
+    .. note::
+       Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
+       while original implementation places the stride at the first 1x1 convolution(self.conv1)
+       according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
+       This variant is also known as ResNet V1.5 and improves accuracy according to
+       https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
+
     Args:
         weights (:class:`~torchvision.models.ResNet18_Weights`, optional): The
             pretrained weights to use. See

From 1a563c25d48993c08b90b1652946668197251f12 Mon Sep 17 00:00:00 2001
From: puhuk <wetr235@gmail.com>
Date: Tue, 10 May 2022 22:38:57 +0900
Subject: [PATCH 2/9] Update resnet.py

---
 torchvision/models/resnet.py | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py
index 8f0035f6da3..e0f81a07f9b 100644
--- a/torchvision/models/resnet.py
+++ b/torchvision/models/resnet.py
@@ -575,13 +575,6 @@ class Wide_ResNet101_2_Weights(WeightsEnum):
 def resnet18(*, weights: Optional[ResNet18_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
     """ResNet-18 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
 
-    .. note::
-       Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
-       while original implementation places the stride at the first 1x1 convolution(self.conv1)
-       according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
-       This variant is also known as ResNet V1.5 and improves accuracy according to
-       https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
-
     Args:
         weights (:class:`~torchvision.models.ResNet18_Weights`, optional): The
             pretrained weights to use. See
@@ -631,6 +624,13 @@ def resnet34(*, weights: Optional[ResNet34_Weights] = None, progress: bool = Tru
 @handle_legacy_interface(weights=("pretrained", ResNet50_Weights.IMAGENET1K_V1))
 def resnet50(*, weights: Optional[ResNet50_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
     """ResNet-50 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
+    
+    .. note::
+       Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
+       while original implementation places the stride at the first 1x1 convolution(self.conv1)
+       according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
+       This variant is also known as ResNet V1.5 and improves accuracy according to
+       https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
 
     Args:
         weights (:class:`~torchvision.models.ResNet50_Weights`, optional): The
@@ -656,6 +656,13 @@ def resnet50(*, weights: Optional[ResNet50_Weights] = None, progress: bool = Tru
 @handle_legacy_interface(weights=("pretrained", ResNet101_Weights.IMAGENET1K_V1))
 def resnet101(*, weights: Optional[ResNet101_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
     """ResNet-101 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
+    
+    .. note::
+       Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
+       while original implementation places the stride at the first 1x1 convolution(self.conv1)
+       according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
+       This variant is also known as ResNet V1.5 and improves accuracy according to
+       https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
 
     Args:
         weights (:class:`~torchvision.models.ResNet101_Weights`, optional): The
@@ -681,6 +688,13 @@ def resnet101(*, weights: Optional[ResNet101_Weights] = None, progress: bool = T
 @handle_legacy_interface(weights=("pretrained", ResNet152_Weights.IMAGENET1K_V1))
 def resnet152(*, weights: Optional[ResNet152_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
     """ResNet-152 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
+    
+    .. note::
+       Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
+       while original implementation places the stride at the first 1x1 convolution(self.conv1)
+       according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
+       This variant is also known as ResNet V1.5 and improves accuracy according to
+       https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
 
     Args:
         weights (:class:`~torchvision.models.ResNet152_Weights`, optional): The

From a7f666ddace78779f588a63e8954cc1f480a93dd Mon Sep 17 00:00:00 2001
From: puhuk <wetr235@gmail.com>
Date: Tue, 10 May 2022 23:05:41 +0900
Subject: [PATCH 3/9] Update resnet.py

---
 torchvision/models/resnet.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py
index e0f81a07f9b..da8d2e16fca 100644
--- a/torchvision/models/resnet.py
+++ b/torchvision/models/resnet.py
@@ -624,7 +624,7 @@ def resnet34(*, weights: Optional[ResNet34_Weights] = None, progress: bool = Tru
 @handle_legacy_interface(weights=("pretrained", ResNet50_Weights.IMAGENET1K_V1))
 def resnet50(*, weights: Optional[ResNet50_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
     """ResNet-50 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
-    
+
     .. note::
        Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
        while original implementation places the stride at the first 1x1 convolution(self.conv1)
@@ -656,7 +656,7 @@ def resnet50(*, weights: Optional[ResNet50_Weights] = None, progress: bool = Tru
 @handle_legacy_interface(weights=("pretrained", ResNet101_Weights.IMAGENET1K_V1))
 def resnet101(*, weights: Optional[ResNet101_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
     """ResNet-101 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
-    
+
     .. note::
        Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
        while original implementation places the stride at the first 1x1 convolution(self.conv1)
@@ -688,7 +688,7 @@ def resnet101(*, weights: Optional[ResNet101_Weights] = None, progress: bool = T
 @handle_legacy_interface(weights=("pretrained", ResNet152_Weights.IMAGENET1K_V1))
 def resnet152(*, weights: Optional[ResNet152_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
     """ResNet-152 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
-    
+
     .. note::
        Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
        while original implementation places the stride at the first 1x1 convolution(self.conv1)

From 2e5fed8efd70bdbe99621b1c77c6d459bf95af87 Mon Sep 17 00:00:00 2001
From: puhuk <wetr235@gmail.com>
Date: Tue, 10 May 2022 23:07:40 +0900
Subject: [PATCH 4/9] Update resnet.rst

---
 docs/source/models/resnet.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/models/resnet.rst b/docs/source/models/resnet.rst
index dfa5508907b..40818aedba9 100644
--- a/docs/source/models/resnet.rst
+++ b/docs/source/models/resnet.rst
@@ -9,9 +9,9 @@ The ResNet model is based on the `Deep Residual Learning for Image Recognition
 .. note::
     Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
     while original implementation places the stride at the first 1x1 convolution(self.conv1)
-    according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
-    This variant is also known as ResNet V1.5 and improves accuracy according to
-    https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
+    according to the paper.
+    This variant improves the accuracy and it's known as `ResNet V1.5 
+    <https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch>`_.
 
 Model builders
 --------------

From 9216f5d3b5bdf60a647b28a311ea967034bd0601 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <datumbox@users.noreply.github.com>
Date: Fri, 20 May 2022 10:50:59 +0100
Subject: [PATCH 5/9] Fix stylings

---
 docs/source/models/resnet.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/models/resnet.rst b/docs/source/models/resnet.rst
index 40818aedba9..aa8e9b80456 100644
--- a/docs/source/models/resnet.rst
+++ b/docs/source/models/resnet.rst
@@ -7,8 +7,8 @@ The ResNet model is based on the `Deep Residual Learning for Image Recognition
 <https://arxiv.org/abs/1512.03385>`_ paper.
 
 .. note::
-    Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
-    while original implementation places the stride at the first 1x1 convolution(self.conv1)
+    Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``)
+    while original implementation places the stride at the first 1x1 convolution (``conv1``)
     according to the paper.
     This variant improves the accuracy and it's known as `ResNet V1.5 
     <https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch>`_.

From b67a61376aa62bb75d6a283a6b228c3f0c80d729 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <datumbox@users.noreply.github.com>
Date: Fri, 20 May 2022 10:52:59 +0100
Subject: [PATCH 6/9] Add the same notes on model builders

---
 torchvision/models/resnet.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py
index c6b8b9894ab..9d4292903d7 100644
--- a/torchvision/models/resnet.py
+++ b/torchvision/models/resnet.py
@@ -700,11 +700,11 @@ def resnet50(*, weights: Optional[ResNet50_Weights] = None, progress: bool = Tru
     """ResNet-50 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
 
     .. note::
-       Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
-       while original implementation places the stride at the first 1x1 convolution(self.conv1)
-       according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
-       This variant is also known as ResNet V1.5 and improves accuracy according to
-       https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
+       Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``)
+       while original implementation places the stride at the first 1x1 convolution (``conv1``)
+       according to the paper.
+       This variant improves the accuracy and it's known as `ResNet V1.5 
+       <https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch>`_.
 
     Args:
         weights (:class:`~torchvision.models.ResNet50_Weights`, optional): The
@@ -732,11 +732,11 @@ def resnet101(*, weights: Optional[ResNet101_Weights] = None, progress: bool = T
     """ResNet-101 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
 
     .. note::
-       Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
-       while original implementation places the stride at the first 1x1 convolution(self.conv1)
-       according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
-       This variant is also known as ResNet V1.5 and improves accuracy according to
-       https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
+       Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``)
+       while original implementation places the stride at the first 1x1 convolution (``conv1``)
+       according to the paper.
+       This variant improves the accuracy and it's known as `ResNet V1.5 
+       <https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch>`_.
 
     Args:
         weights (:class:`~torchvision.models.ResNet101_Weights`, optional): The
@@ -764,11 +764,11 @@ def resnet152(*, weights: Optional[ResNet152_Weights] = None, progress: bool = T
     """ResNet-152 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
 
     .. note::
-       Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
-       while original implementation places the stride at the first 1x1 convolution(self.conv1)
-       according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
-       This variant is also known as ResNet V1.5 and improves accuracy according to
-       https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
+       Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``)
+       while original implementation places the stride at the first 1x1 convolution (``conv1``)
+       according to the paper.
+       This variant improves the accuracy and it's known as `ResNet V1.5 
+       <https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch>`_.
 
     Args:
         weights (:class:`~torchvision.models.ResNet152_Weights`, optional): The

From f440e2dd09d460ec34c16b0c1652c107e0c70f98 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <datumbox@users.noreply.github.com>
Date: Fri, 20 May 2022 10:58:06 +0100
Subject: [PATCH 7/9] Improve description

---
 docs/source/models/resnet.rst | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/docs/source/models/resnet.rst b/docs/source/models/resnet.rst
index aa8e9b80456..e5b41d192fd 100644
--- a/docs/source/models/resnet.rst
+++ b/docs/source/models/resnet.rst
@@ -7,10 +7,9 @@ The ResNet model is based on the `Deep Residual Learning for Image Recognition
 <https://arxiv.org/abs/1512.03385>`_ paper.
 
 .. note::
-    Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``)
-    while original implementation places the stride at the first 1x1 convolution (``conv1``)
-    according to the paper.
-    This variant improves the accuracy and it's known as `ResNet V1.5 
+    The bottleneck of TorchVision places the stride for downsampling to the second 3x3
+    convolution while the original paper places it to the first 1x1 convolution.
+    This variant improves the accuracy and is known as `ResNet V1.5 
     <https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch>`_.
 
 Model builders

From 15fe780573a26d389e3947409c715fada6834bb2 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <datumbox@users.noreply.github.com>
Date: Fri, 20 May 2022 10:59:44 +0100
Subject: [PATCH 8/9] Apply the change everywhere

---
 torchvision/models/resnet.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py
index 9d4292903d7..93f0d06f67b 100644
--- a/torchvision/models/resnet.py
+++ b/torchvision/models/resnet.py
@@ -700,10 +700,9 @@ def resnet50(*, weights: Optional[ResNet50_Weights] = None, progress: bool = Tru
     """ResNet-50 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
 
     .. note::
-       Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``)
-       while original implementation places the stride at the first 1x1 convolution (``conv1``)
-       according to the paper.
-       This variant improves the accuracy and it's known as `ResNet V1.5 
+       The bottleneck of TorchVision places the stride for downsampling to the second 3x3
+       convolution while the original paper places it to the first 1x1 convolution.
+       This variant improves the accuracy and is known as `ResNet V1.5
        <https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch>`_.
 
     Args:
@@ -732,10 +731,9 @@ def resnet101(*, weights: Optional[ResNet101_Weights] = None, progress: bool = T
     """ResNet-101 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
 
     .. note::
-       Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``)
-       while original implementation places the stride at the first 1x1 convolution (``conv1``)
-       according to the paper.
-       This variant improves the accuracy and it's known as `ResNet V1.5 
+       The bottleneck of TorchVision places the stride for downsampling to the second 3x3
+       convolution while the original paper places it to the first 1x1 convolution.
+       This variant improves the accuracy and is known as `ResNet V1.5
        <https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch>`_.
 
     Args:
@@ -764,10 +762,9 @@ def resnet152(*, weights: Optional[ResNet152_Weights] = None, progress: bool = T
     """ResNet-152 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
 
     .. note::
-       Bottleneck in torchvision places the stride for downsampling at 3x3 convolution (``conv2``)
-       while original implementation places the stride at the first 1x1 convolution (``conv1``)
-       according to the paper.
-       This variant improves the accuracy and it's known as `ResNet V1.5 
+       The bottleneck of TorchVision places the stride for downsampling to the second 3x3
+       convolution while the original paper places it to the first 1x1 convolution.
+       This variant improves the accuracy and is known as `ResNet V1.5
        <https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch>`_.
 
     Args:

From ea4481480228d7d8ce5e3b6f2873fb75370ec664 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <datumbox@users.noreply.github.com>
Date: Fri, 20 May 2022 10:59:59 +0100
Subject: [PATCH 9/9] Remove trailing space

---
 docs/source/models/resnet.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/models/resnet.rst b/docs/source/models/resnet.rst
index e5b41d192fd..9d777f2f6b1 100644
--- a/docs/source/models/resnet.rst
+++ b/docs/source/models/resnet.rst
@@ -9,7 +9,7 @@ The ResNet model is based on the `Deep Residual Learning for Image Recognition
 .. note::
     The bottleneck of TorchVision places the stride for downsampling to the second 3x3
     convolution while the original paper places it to the first 1x1 convolution.
-    This variant improves the accuracy and is known as `ResNet V1.5 
+    This variant improves the accuracy and is known as `ResNet V1.5
     <https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch>`_.
 
 Model builders