From 31fadbee7d1a65cd73ae43dfd4ac6e97e7ca7b01 Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Fri, 29 Oct 2021 10:32:46 +0100
Subject: [PATCH 01/24] Adding multiweight support for shufflenetv2 prototype
 models

---
 torchvision/prototype/models/__init__.py     |   1 +
 torchvision/prototype/models/shufflenetv2.py | 121 +++++++++++++++++++
 2 files changed, 122 insertions(+)
 create mode 100644 torchvision/prototype/models/shufflenetv2.py

diff --git a/torchvision/prototype/models/__init__.py b/torchvision/prototype/models/__init__.py
index a187af7f090..399280eaff7 100644
--- a/torchvision/prototype/models/__init__.py
+++ b/torchvision/prototype/models/__init__.py
@@ -5,6 +5,7 @@
 from .efficientnet import *
 from .mobilenetv3 import *
 from .mnasnet import *
+from .shufflenetv2 import *
 from . import detection
 from . import quantization
 from . import segmentation
diff --git a/torchvision/prototype/models/shufflenetv2.py b/torchvision/prototype/models/shufflenetv2.py
new file mode 100644
index 00000000000..d6d02873051
--- /dev/null
+++ b/torchvision/prototype/models/shufflenetv2.py
@@ -0,0 +1,121 @@
+import warnings
+from functools import partial
+from typing import Any, Optional
+
+from torchvision.transforms.functional import InterpolationMode
+
+from ...models.shufflenetv2 import ShuffleNetV2
+from ..transforms.presets import ImageNetEval
+from ._api import Weights, WeightEntry
+from ._meta import _IMAGENET_CATEGORIES
+
+
+__all__ = [
+    "ShuffleNetV2",
+    "ShuffleNetV2_x0_5Weights",
+    "ShuffleNetV2_x1_0Weights",
+    "ShuffleNetV2_x1_5Weights",
+    "ShuffleNetV2_x2_0Weights",
+    "shufflenet_v2_x0_5",
+    "shufflenet_v2_x1_0",
+    "shufflenet_v2_x1_5",
+    "shufflenet_v2_x2_0",
+]
+
+
+def _shufflenetv2(
+    weights: Optional[Weights],
+    progress: bool,
+    *args: Any,
+    **kwargs: Any,
+) -> ShuffleNetV2:
+    if weights is not None:
+        kwargs["num_classes"] = len(weights.meta["categories"])
+
+    model = ShuffleNetV2(*args, **kwargs)
+
+    if weights is not None:
+        model.load_state_dict(weights.state_dict(progress=progress))
+
+    return model
+
+
+_common_meta = {"size": (224, 224), "categories": _IMAGENET_CATEGORIES, "interpolation": InterpolationMode.BILINEAR}
+
+
+class ShuffleNetV2_x0_5Weights(Weights):
+    ImageNet1K_RefV1 = WeightEntry(
+        url="https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth",
+        transforms=partial(ImageNetEval, crop_size=224),
+        meta={
+            **_common_meta,
+            "recipe": "",
+            "acc@1": 69.362,
+            "acc@5": 88.316,
+        },
+    )
+
+
+class ShuffleNetV2_x1_0Weights(Weights):
+    ImageNet1K_RefV1 = WeightEntry(
+        url="https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth",
+        transforms=partial(ImageNetEval, crop_size=224),
+        meta={
+            **_common_meta,
+            "recipe": "",
+            "acc@1": 60.552,
+            "acc@5": 81.746,
+        },
+    )
+
+
+class ShuffleNetV2_x1_5Weights(Weights):
+    pass
+
+
+class ShuffleNetV2_x2_0Weights(Weights):
+    pass
+
+
+def shufflenet_v2_x0_5(
+    weights: Optional[ShuffleNetV2_x0_5Weights] = None, progress: bool = True, **kwargs: Any
+) -> ShuffleNetV2:
+    if "pretrained" in kwargs:
+        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
+        weights = ShuffleNetV2_x0_5Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
+    weights = ShuffleNetV2_x0_5Weights.verify(weights)
+
+    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs)
+
+
+def shufflenet_v2_x1_0(
+    weights: Optional[ShuffleNetV2_x1_0Weights] = None, progress: bool = True, **kwargs: Any
+) -> ShuffleNetV2:
+    if "pretrained" in kwargs:
+        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
+        weights = ShuffleNetV2_x1_0Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
+    weights = ShuffleNetV2_x1_0Weights.verify(weights)
+
+    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs)
+
+
+def shufflenet_v2_x1_5(
+    weights: Optional[ShuffleNetV2_x1_5Weights] = None, progress: bool = True, **kwargs: Any
+) -> ShuffleNetV2:
+    if "pretrained" in kwargs:
+        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
+        weights = ShuffleNetV2_x1_5Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
+    weights = ShuffleNetV2_x1_5Weights.verify(weights)
+
+    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs)
+
+
+def shufflenet_v2_x2_0(
+    weights: Optional[ShuffleNetV2_x2_0Weights] = None, progress: bool = True, **kwargs: Any
+) -> ShuffleNetV2:
+    if "pretrained" in kwargs:
+        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
+        weights = ShuffleNetV2_x2_0Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
+    weights = ShuffleNetV2_x2_0Weights.verify(weights)
+
+    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs)

From 1e578b7fe05ff5a18201df8a36b552a18fabcd08 Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Fri, 29 Oct 2021 10:42:31 +0100
Subject: [PATCH 02/24] Revert "Adding multiweight support for shufflenetv2
 prototype models"

This reverts commit 31fadbee7d1a65cd73ae43dfd4ac6e97e7ca7b01.
---
 torchvision/prototype/models/__init__.py     |   1 -
 torchvision/prototype/models/shufflenetv2.py | 121 -------------------
 2 files changed, 122 deletions(-)
 delete mode 100644 torchvision/prototype/models/shufflenetv2.py

diff --git a/torchvision/prototype/models/__init__.py b/torchvision/prototype/models/__init__.py
index 399280eaff7..a187af7f090 100644
--- a/torchvision/prototype/models/__init__.py
+++ b/torchvision/prototype/models/__init__.py
@@ -5,7 +5,6 @@
 from .efficientnet import *
 from .mobilenetv3 import *
 from .mnasnet import *
-from .shufflenetv2 import *
 from . import detection
 from . import quantization
 from . import segmentation
diff --git a/torchvision/prototype/models/shufflenetv2.py b/torchvision/prototype/models/shufflenetv2.py
deleted file mode 100644
index d6d02873051..00000000000
--- a/torchvision/prototype/models/shufflenetv2.py
+++ /dev/null
@@ -1,121 +0,0 @@
-import warnings
-from functools import partial
-from typing import Any, Optional
-
-from torchvision.transforms.functional import InterpolationMode
-
-from ...models.shufflenetv2 import ShuffleNetV2
-from ..transforms.presets import ImageNetEval
-from ._api import Weights, WeightEntry
-from ._meta import _IMAGENET_CATEGORIES
-
-
-__all__ = [
-    "ShuffleNetV2",
-    "ShuffleNetV2_x0_5Weights",
-    "ShuffleNetV2_x1_0Weights",
-    "ShuffleNetV2_x1_5Weights",
-    "ShuffleNetV2_x2_0Weights",
-    "shufflenet_v2_x0_5",
-    "shufflenet_v2_x1_0",
-    "shufflenet_v2_x1_5",
-    "shufflenet_v2_x2_0",
-]
-
-
-def _shufflenetv2(
-    weights: Optional[Weights],
-    progress: bool,
-    *args: Any,
-    **kwargs: Any,
-) -> ShuffleNetV2:
-    if weights is not None:
-        kwargs["num_classes"] = len(weights.meta["categories"])
-
-    model = ShuffleNetV2(*args, **kwargs)
-
-    if weights is not None:
-        model.load_state_dict(weights.state_dict(progress=progress))
-
-    return model
-
-
-_common_meta = {"size": (224, 224), "categories": _IMAGENET_CATEGORIES, "interpolation": InterpolationMode.BILINEAR}
-
-
-class ShuffleNetV2_x0_5Weights(Weights):
-    ImageNet1K_RefV1 = WeightEntry(
-        url="https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth",
-        transforms=partial(ImageNetEval, crop_size=224),
-        meta={
-            **_common_meta,
-            "recipe": "",
-            "acc@1": 69.362,
-            "acc@5": 88.316,
-        },
-    )
-
-
-class ShuffleNetV2_x1_0Weights(Weights):
-    ImageNet1K_RefV1 = WeightEntry(
-        url="https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth",
-        transforms=partial(ImageNetEval, crop_size=224),
-        meta={
-            **_common_meta,
-            "recipe": "",
-            "acc@1": 60.552,
-            "acc@5": 81.746,
-        },
-    )
-
-
-class ShuffleNetV2_x1_5Weights(Weights):
-    pass
-
-
-class ShuffleNetV2_x2_0Weights(Weights):
-    pass
-
-
-def shufflenet_v2_x0_5(
-    weights: Optional[ShuffleNetV2_x0_5Weights] = None, progress: bool = True, **kwargs: Any
-) -> ShuffleNetV2:
-    if "pretrained" in kwargs:
-        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
-        weights = ShuffleNetV2_x0_5Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
-    weights = ShuffleNetV2_x0_5Weights.verify(weights)
-
-    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs)
-
-
-def shufflenet_v2_x1_0(
-    weights: Optional[ShuffleNetV2_x1_0Weights] = None, progress: bool = True, **kwargs: Any
-) -> ShuffleNetV2:
-    if "pretrained" in kwargs:
-        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
-        weights = ShuffleNetV2_x1_0Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
-    weights = ShuffleNetV2_x1_0Weights.verify(weights)
-
-    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs)
-
-
-def shufflenet_v2_x1_5(
-    weights: Optional[ShuffleNetV2_x1_5Weights] = None, progress: bool = True, **kwargs: Any
-) -> ShuffleNetV2:
-    if "pretrained" in kwargs:
-        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
-        weights = ShuffleNetV2_x1_5Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
-    weights = ShuffleNetV2_x1_5Weights.verify(weights)
-
-    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs)
-
-
-def shufflenet_v2_x2_0(
-    weights: Optional[ShuffleNetV2_x2_0Weights] = None, progress: bool = True, **kwargs: Any
-) -> ShuffleNetV2:
-    if "pretrained" in kwargs:
-        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
-        weights = ShuffleNetV2_x2_0Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
-    weights = ShuffleNetV2_x2_0Weights.verify(weights)
-
-    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs)

From 4e3d900f796c1e3e667312087e77956ca4a4c017 Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Fri, 29 Oct 2021 10:59:31 +0100
Subject: [PATCH 03/24] Adding multiweight support for shufflenetv2 prototype
 models

---
 torchvision/prototype/models/__init__.py     |   1 +
 torchvision/prototype/models/shufflenetv2.py | 121 +++++++++++++++++++
 2 files changed, 122 insertions(+)
 create mode 100644 torchvision/prototype/models/shufflenetv2.py

diff --git a/torchvision/prototype/models/__init__.py b/torchvision/prototype/models/__init__.py
index 264d787d40e..69fe4310606 100644
--- a/torchvision/prototype/models/__init__.py
+++ b/torchvision/prototype/models/__init__.py
@@ -7,6 +7,7 @@
 from .mobilenetv2 import *
 from .mnasnet import *
 from .regnet import *
+from .shufflenetv2 import *
 from . import detection
 from . import quantization
 from . import segmentation
diff --git a/torchvision/prototype/models/shufflenetv2.py b/torchvision/prototype/models/shufflenetv2.py
new file mode 100644
index 00000000000..d6d02873051
--- /dev/null
+++ b/torchvision/prototype/models/shufflenetv2.py
@@ -0,0 +1,121 @@
+import warnings
+from functools import partial
+from typing import Any, Optional
+
+from torchvision.transforms.functional import InterpolationMode
+
+from ...models.shufflenetv2 import ShuffleNetV2
+from ..transforms.presets import ImageNetEval
+from ._api import Weights, WeightEntry
+from ._meta import _IMAGENET_CATEGORIES
+
+
+__all__ = [
+    "ShuffleNetV2",
+    "ShuffleNetV2_x0_5Weights",
+    "ShuffleNetV2_x1_0Weights",
+    "ShuffleNetV2_x1_5Weights",
+    "ShuffleNetV2_x2_0Weights",
+    "shufflenet_v2_x0_5",
+    "shufflenet_v2_x1_0",
+    "shufflenet_v2_x1_5",
+    "shufflenet_v2_x2_0",
+]
+
+
+def _shufflenetv2(
+    weights: Optional[Weights],
+    progress: bool,
+    *args: Any,
+    **kwargs: Any,
+) -> ShuffleNetV2:
+    if weights is not None:
+        kwargs["num_classes"] = len(weights.meta["categories"])
+
+    model = ShuffleNetV2(*args, **kwargs)
+
+    if weights is not None:
+        model.load_state_dict(weights.state_dict(progress=progress))
+
+    return model
+
+
+_common_meta = {"size": (224, 224), "categories": _IMAGENET_CATEGORIES, "interpolation": InterpolationMode.BILINEAR}
+
+
+class ShuffleNetV2_x0_5Weights(Weights):
+    ImageNet1K_RefV1 = WeightEntry(
+        url="https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth",
+        transforms=partial(ImageNetEval, crop_size=224),
+        meta={
+            **_common_meta,
+            "recipe": "",
+            "acc@1": 69.362,
+            "acc@5": 88.316,
+        },
+    )
+
+
+class ShuffleNetV2_x1_0Weights(Weights):
+    ImageNet1K_RefV1 = WeightEntry(
+        url="https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth",
+        transforms=partial(ImageNetEval, crop_size=224),
+        meta={
+            **_common_meta,
+            "recipe": "",
+            "acc@1": 60.552,
+            "acc@5": 81.746,
+        },
+    )
+
+
+class ShuffleNetV2_x1_5Weights(Weights):
+    pass
+
+
+class ShuffleNetV2_x2_0Weights(Weights):
+    pass
+
+
+def shufflenet_v2_x0_5(
+    weights: Optional[ShuffleNetV2_x0_5Weights] = None, progress: bool = True, **kwargs: Any
+) -> ShuffleNetV2:
+    if "pretrained" in kwargs:
+        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
+        weights = ShuffleNetV2_x0_5Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
+    weights = ShuffleNetV2_x0_5Weights.verify(weights)
+
+    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs)
+
+
+def shufflenet_v2_x1_0(
+    weights: Optional[ShuffleNetV2_x1_0Weights] = None, progress: bool = True, **kwargs: Any
+) -> ShuffleNetV2:
+    if "pretrained" in kwargs:
+        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
+        weights = ShuffleNetV2_x1_0Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
+    weights = ShuffleNetV2_x1_0Weights.verify(weights)
+
+    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs)
+
+
+def shufflenet_v2_x1_5(
+    weights: Optional[ShuffleNetV2_x1_5Weights] = None, progress: bool = True, **kwargs: Any
+) -> ShuffleNetV2:
+    if "pretrained" in kwargs:
+        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
+        weights = ShuffleNetV2_x1_5Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
+    weights = ShuffleNetV2_x1_5Weights.verify(weights)
+
+    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs)
+
+
+def shufflenet_v2_x2_0(
+    weights: Optional[ShuffleNetV2_x2_0Weights] = None, progress: bool = True, **kwargs: Any
+) -> ShuffleNetV2:
+    if "pretrained" in kwargs:
+        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
+        weights = ShuffleNetV2_x2_0Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
+    weights = ShuffleNetV2_x2_0Weights.verify(weights)
+
+    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs)

From 615b612933c1dea2da471ac5678bd4ec97e5255f Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Fri, 29 Oct 2021 11:14:45 +0100
Subject: [PATCH 04/24] Revert "Adding multiweight support for shufflenetv2
 prototype models"

This reverts commit 4e3d900f796c1e3e667312087e77956ca4a4c017.
---
 torchvision/prototype/models/__init__.py     |   1 -
 torchvision/prototype/models/shufflenetv2.py | 121 -------------------
 2 files changed, 122 deletions(-)
 delete mode 100644 torchvision/prototype/models/shufflenetv2.py

diff --git a/torchvision/prototype/models/__init__.py b/torchvision/prototype/models/__init__.py
index 69fe4310606..264d787d40e 100644
--- a/torchvision/prototype/models/__init__.py
+++ b/torchvision/prototype/models/__init__.py
@@ -7,7 +7,6 @@
 from .mobilenetv2 import *
 from .mnasnet import *
 from .regnet import *
-from .shufflenetv2 import *
 from . import detection
 from . import quantization
 from . import segmentation
diff --git a/torchvision/prototype/models/shufflenetv2.py b/torchvision/prototype/models/shufflenetv2.py
deleted file mode 100644
index d6d02873051..00000000000
--- a/torchvision/prototype/models/shufflenetv2.py
+++ /dev/null
@@ -1,121 +0,0 @@
-import warnings
-from functools import partial
-from typing import Any, Optional
-
-from torchvision.transforms.functional import InterpolationMode
-
-from ...models.shufflenetv2 import ShuffleNetV2
-from ..transforms.presets import ImageNetEval
-from ._api import Weights, WeightEntry
-from ._meta import _IMAGENET_CATEGORIES
-
-
-__all__ = [
-    "ShuffleNetV2",
-    "ShuffleNetV2_x0_5Weights",
-    "ShuffleNetV2_x1_0Weights",
-    "ShuffleNetV2_x1_5Weights",
-    "ShuffleNetV2_x2_0Weights",
-    "shufflenet_v2_x0_5",
-    "shufflenet_v2_x1_0",
-    "shufflenet_v2_x1_5",
-    "shufflenet_v2_x2_0",
-]
-
-
-def _shufflenetv2(
-    weights: Optional[Weights],
-    progress: bool,
-    *args: Any,
-    **kwargs: Any,
-) -> ShuffleNetV2:
-    if weights is not None:
-        kwargs["num_classes"] = len(weights.meta["categories"])
-
-    model = ShuffleNetV2(*args, **kwargs)
-
-    if weights is not None:
-        model.load_state_dict(weights.state_dict(progress=progress))
-
-    return model
-
-
-_common_meta = {"size": (224, 224), "categories": _IMAGENET_CATEGORIES, "interpolation": InterpolationMode.BILINEAR}
-
-
-class ShuffleNetV2_x0_5Weights(Weights):
-    ImageNet1K_RefV1 = WeightEntry(
-        url="https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth",
-        transforms=partial(ImageNetEval, crop_size=224),
-        meta={
-            **_common_meta,
-            "recipe": "",
-            "acc@1": 69.362,
-            "acc@5": 88.316,
-        },
-    )
-
-
-class ShuffleNetV2_x1_0Weights(Weights):
-    ImageNet1K_RefV1 = WeightEntry(
-        url="https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth",
-        transforms=partial(ImageNetEval, crop_size=224),
-        meta={
-            **_common_meta,
-            "recipe": "",
-            "acc@1": 60.552,
-            "acc@5": 81.746,
-        },
-    )
-
-
-class ShuffleNetV2_x1_5Weights(Weights):
-    pass
-
-
-class ShuffleNetV2_x2_0Weights(Weights):
-    pass
-
-
-def shufflenet_v2_x0_5(
-    weights: Optional[ShuffleNetV2_x0_5Weights] = None, progress: bool = True, **kwargs: Any
-) -> ShuffleNetV2:
-    if "pretrained" in kwargs:
-        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
-        weights = ShuffleNetV2_x0_5Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
-    weights = ShuffleNetV2_x0_5Weights.verify(weights)
-
-    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs)
-
-
-def shufflenet_v2_x1_0(
-    weights: Optional[ShuffleNetV2_x1_0Weights] = None, progress: bool = True, **kwargs: Any
-) -> ShuffleNetV2:
-    if "pretrained" in kwargs:
-        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
-        weights = ShuffleNetV2_x1_0Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
-    weights = ShuffleNetV2_x1_0Weights.verify(weights)
-
-    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs)
-
-
-def shufflenet_v2_x1_5(
-    weights: Optional[ShuffleNetV2_x1_5Weights] = None, progress: bool = True, **kwargs: Any
-) -> ShuffleNetV2:
-    if "pretrained" in kwargs:
-        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
-        weights = ShuffleNetV2_x1_5Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
-    weights = ShuffleNetV2_x1_5Weights.verify(weights)
-
-    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs)
-
-
-def shufflenet_v2_x2_0(
-    weights: Optional[ShuffleNetV2_x2_0Weights] = None, progress: bool = True, **kwargs: Any
-) -> ShuffleNetV2:
-    if "pretrained" in kwargs:
-        warnings.warn("The argument pretrained is deprecated, please use weights instead.")
-        weights = ShuffleNetV2_x2_0Weights.ImageNet1K_RefV1 if kwargs.pop("pretrained") else None
-    weights = ShuffleNetV2_x2_0Weights.verify(weights)
-
-    return _shufflenetv2(weights, progress, [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs)

From f18f86feda38594f9ee0104c78c00e06844001c7 Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Mon, 24 Jan 2022 16:03:21 +0000
Subject: [PATCH 05/24] Remove module vs method name clash

---
 torchvision/ops/__init__.py                                   | 2 +-
 torchvision/ops/{generalized_box_iou_loss.py => giou_loss.py} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename torchvision/ops/{generalized_box_iou_loss.py => giou_loss.py} (100%)

diff --git a/torchvision/ops/__init__.py b/torchvision/ops/__init__.py
index 33a48995869..8ba10080c1f 100644
--- a/torchvision/ops/__init__.py
+++ b/torchvision/ops/__init__.py
@@ -13,7 +13,7 @@
 from .deform_conv import deform_conv2d, DeformConv2d
 from .feature_pyramid_network import FeaturePyramidNetwork
 from .focal_loss import sigmoid_focal_loss
-from .generalized_box_iou_loss import generalized_box_iou_loss
+from .giou_loss import generalized_box_iou_loss
 from .misc import FrozenBatchNorm2d, ConvNormActivation, SqueezeExcitation
 from .poolers import MultiScaleRoIAlign
 from .ps_roi_align import ps_roi_align, PSRoIAlign
diff --git a/torchvision/ops/generalized_box_iou_loss.py b/torchvision/ops/giou_loss.py
similarity index 100%
rename from torchvision/ops/generalized_box_iou_loss.py
rename to torchvision/ops/giou_loss.py

From 95f12714c1e257159c255861d121280ae4bd1150 Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Sat, 29 Jan 2022 11:11:38 +0000
Subject: [PATCH 06/24] add model contribution guidelines

---
 CONTRIBUTING.md        |   5 +-
 CONTRIBUTING_MODELS.md | 210 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 212 insertions(+), 3 deletions(-)
 create mode 100644 CONTRIBUTING_MODELS.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 612827a99b5..25ef4bda336 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -186,10 +186,9 @@ You can also choose to only build a subset of the examples by using the
 example ``EXAMPLES_PATTERN="transforms" make html`` will only build the examples
 with "transforms" in their name.
 
-### New model
+### New or improved model
 
-More details on how to add a new model will be provided later. Please, do not send any PR with a new model without discussing 
-it in an issue as, most likely, it will not be accepted.
+Please refer to guidelines in [Contributing to Torchvision - Models](https://github.com/pytorch/vision/blob/main/CONTRIBUTING_MODELS.md).
  
 ### New dataset
 
diff --git a/CONTRIBUTING_MODELS.md b/CONTRIBUTING_MODELS.md
new file mode 100644
index 00000000000..612827a99b5
--- /dev/null
+++ b/CONTRIBUTING_MODELS.md
@@ -0,0 +1,210 @@
+# Contributing to Torchvision
+
+We want to make contributing to this project as easy and transparent as possible.
+
+## TL;DR
+
+We appreciate all contributions. If you are interested in contributing to Torchvision, there are many ways to help out. 
+Your contributions may fall into the following categories:
+
+- It helps the project if you could 
+    - Report issues you're facing
+    - Give a :+1: on issues that others reported and that are relevant to you 
+
+- Answering queries on the issue tracker, investigating bugs are very valuable contributions to the project.
+
+- You would like to improve the documentation. This is no less important than improving the library itself! 
+If you find a typo in the documentation, do not hesitate to submit a GitHub pull request.
+
+- If you would like to fix a bug
+    - please pick one from the [list of open issues labelled as "help wanted"](https://github.com/pytorch/vision/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22)
+    - comment on the issue that you want to work on this issue
+    - send a PR with your fix, see below. 
+
+- If you plan to contribute new features, utility functions or extensions, please first open an issue and discuss the feature with us.
+
+## Issues
+
+We use GitHub issues to track public bugs. Please ensure your description is
+clear and has sufficient instructions to be able to reproduce the issue.
+
+## Development installation
+
+### Install PyTorch Nightly 
+
+```bash
+conda install pytorch -c pytorch-nightly
+# or with pip (see https://pytorch.org/get-started/locally/)
+# pip install numpy
+# pip install --pre torch -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
+```
+
+### Install Torchvision
+
+```bash
+git clone https://github.com/pytorch/vision.git
+cd vision
+python setup.py develop
+# or, for OSX
+# MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py develop
+# for C++ debugging, please use DEBUG=1
+# DEBUG=1 python setup.py develop
+pip install flake8 typing mypy pytest pytest-mock scipy
+```
+You may also have to install `libpng-dev` and `libjpeg-turbo8-dev` libraries:
+```bash
+conda install libpng jpeg
+```
+
+## Development Process
+
+If you plan to modify the code or documentation, please follow the steps below:
+
+1. Fork the repository and create your branch from `main`.
+2. If you have modified the code (new feature or bug-fix), please add unit tests.
+3. If you have changed APIs, update the documentation. Make sure the documentation builds.
+4. Ensure the test suite passes.
+5. Make sure your code passes the formatting checks (see below).
+
+For more details about pull requests, 
+please read [GitHub's guides](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request). 
+
+If you would like to contribute a new model, please see [here](#New-model).
+
+If you would like to contribute a new dataset, please see [here](#New-dataset). 
+
+### Code formatting and typing
+
+#### Formatting
+
+The torchvision code is formatted by [black](https://black.readthedocs.io/en/stable/),
+and checked against pep8 compliance with [flake8](https://flake8.pycqa.org/en/latest/).
+Instead of relying directly on `black` however, we rely on
+[ufmt](https://github.com/omnilib/ufmt), for compatibility reasons with Facebook
+internal infrastructure.
+
+To format your code, install `ufmt` with `pip install ufmt` and use e.g.:
+
+```bash
+ufmt format torchvision
+```
+
+For the vast majority of cases, this is all you should need to run. For the
+formatting to be a bit faster, you can also choose to only apply `ufmt` to the
+files that were edited in your PR with e.g.:
+
+```bash
+ufmt format `git diff main --name-only`
+```
+
+Similarly, you can check for `flake8` errors with `flake8 torchvision`, although
+they should be fairly rare considering that most of the errors are automatically
+taken care of by `ufmt` already.
+
+##### Pre-commit hooks
+
+For convenience and **purely optionally**, you can rely on [pre-commit
+hooks](https://pre-commit.com/) which will run both `ufmt` and `flake8` prior to
+every commit.
+
+First install the `pre-commit` package with `pip install pre-commit`, and then
+run `pre-commit install` at the root of the repo for the hooks to be set up -
+that's it.
+
+Feel free to read the [pre-commit docs](https://pre-commit.com/#usage) to learn
+more and improve your workflow. You'll see for example that `pre-commit run
+--all-files` will run both `ufmt` and `flake8` without the need for you to
+commit anything, and that the `--no-verify` flag can be added to `git commit` to
+temporarily deactivate the hooks.
+
+#### Type annotations
+
+The codebase has type annotations, please make sure to add type hints if required. We use `mypy` tool for type checking:
+```bash
+mypy --config-file mypy.ini
+```
+
+### Unit tests
+
+If you have modified the code by adding a new feature or a bug-fix, please add unit tests for that. To run a specific 
+test: 
+```bash
+pytest test/<test-module.py> -vvv -k <test_myfunc>
+# e.g. pytest test/test_transforms.py -vvv -k test_center_crop
+```
+
+If you would like to run all tests:
+```bash
+pytest test -vvv
+``` 
+
+Tests that require internet access should be in
+`test/test_internet.py`.
+
+### Documentation
+
+Torchvision uses [Google style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html)
+for formatting docstrings. Length of line inside docstrings block must be limited to 120 characters.
+
+Please, follow the instructions to build and deploy the documentation locally.
+
+#### Install requirements
+
+```bash
+cd docs
+pip install -r requirements.txt
+```
+
+#### Build
+
+```bash
+cd docs
+make html
+```
+
+Then open `docs/build/html/index.html` in your favorite browser.
+
+The docs are also automatically built when you submit a PR. The job that
+builds the docs is named `build_docs`. You can access the rendered docs by
+clicking on that job and then going to the "Artifacts" tab.
+
+You can clean the built docs and re-start the build from scratch by doing ``make
+clean``.
+
+#### Building the example gallery - or not
+
+When you run ``make html`` for the first time, all the examples in the gallery
+will be built. Subsequent builds should be faster, and will only build the
+examples that have been modified.
+
+You can run ``make html-noplot`` to not build the examples at all. This is
+useful after a ``make clean`` to do some quick checks that are not related to
+the examples.
+
+You can also choose to only build a subset of the examples by using the
+``EXAMPLES_PATTERN`` env variable, which accepts a regular expression. For
+example ``EXAMPLES_PATTERN="transforms" make html`` will only build the examples
+with "transforms" in their name.
+
+### New model
+
+More details on how to add a new model will be provided later. Please, do not send any PR with a new model without discussing 
+it in an issue as, most likely, it will not be accepted.
+ 
+### New dataset
+
+More details on how to add a new dataset will be provided later. Please, do not send any PR with a new dataset without discussing 
+it in an issue as, most likely, it will not be accepted.
+
+### Pull Request
+
+If all previous checks (flake8, mypy, unit tests) are passing, please send a PR. Submitted PR will pass other tests on 
+different operation systems, python versions and hardwares.
+
+For more details about pull requests workflow, 
+please read [GitHub's guides](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request).
+
+## License
+
+By contributing to Torchvision, you agree that your contributions will be licensed
+under the LICENSE file in the root directory of this source tree.

From 1f79d422a8dd310ccfe90dfd5020da595147bc0b Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Sat, 29 Jan 2022 11:14:55 +0000
Subject: [PATCH 07/24] update CONTRIBUTING_MODELS.md

---
 CONTRIBUTING_MODELS.md | 221 +++++++++++++----------------------------
 1 file changed, 67 insertions(+), 154 deletions(-)

diff --git a/CONTRIBUTING_MODELS.md b/CONTRIBUTING_MODELS.md
index 612827a99b5..fd29e8958ec 100644
--- a/CONTRIBUTING_MODELS.md
+++ b/CONTRIBUTING_MODELS.md
@@ -1,210 +1,123 @@
-# Contributing to Torchvision
+# Contributing to Torchvision - Models
 
-We want to make contributing to this project as easy and transparent as possible.
+<!-- toc -->
 
-## TL;DR
+- [New Model Architectures - Overview](#new-model-architectures-overview)
+- [New Model Architectures - Implementation Details](#new-model-architectures-implementation-details)
+- [New Weights for Existing Model Architectures](#new-weights-for-existing-model-architectures)
 
-We appreciate all contributions. If you are interested in contributing to Torchvision, there are many ways to help out. 
-Your contributions may fall into the following categories:
+<!-- tocstop -->
 
-- It helps the project if you could 
-    - Report issues you're facing
-    - Give a :+1: on issues that others reported and that are relevant to you 
 
-- Answering queries on the issue tracker, investigating bugs are very valuable contributions to the project.
+## New Model Architectures - Overview
 
-- You would like to improve the documentation. This is no less important than improving the library itself! 
-If you find a typo in the documentation, do not hesitate to submit a GitHub pull request.
+For someone who would be interested in adding a model architecture, it is also expected to train the model, so here are a few important considerations:
 
-- If you would like to fix a bug
-    - please pick one from the [list of open issues labelled as "help wanted"](https://github.com/pytorch/vision/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22)
-    - comment on the issue that you want to work on this issue
-    - send a PR with your fix, see below. 
+- Training big models requires lots of resources and the cost quickly adds up
 
-- If you plan to contribute new features, utility functions or extensions, please first open an issue and discuss the feature with us.
+- Reproducing models is fun but also risky as you might not always get the results reported on the paper. It might require a huge amount of effort to close the gap
 
-## Issues
+- The contribution might not get merged if we significantly lack in terms of accuracy, speed etc
 
-We use GitHub issues to track public bugs. Please ensure your description is
-clear and has sufficient instructions to be able to reproduce the issue.
+- Including new models in TorchVision might not be the best approach, so other options such as releasing the model through to [Pytorch Hub](https://pytorch.org/hub/) should be considered
 
-## Development installation
+So, before starting any work and submitting a PR there are a few critical things that need to be taken into account in order to make sure the planned contribution is within the context of TorchVision, and the requirements and expectations are discussed beforehand. If this step is skipped and a PR is submitted without prior discussion it will almost certainly be rejected.
 
-### Install PyTorch Nightly 
+### 1. Preparation work
 
-```bash
-conda install pytorch -c pytorch-nightly
-# or with pip (see https://pytorch.org/get-started/locally/)
-# pip install numpy
-# pip install --pre torch -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
-```
+- Start by looking into this [issue](https://github.com/pytorch/vision/issues/2707) in order to have an idea of the models that are being considered, express your willingness to add a new model and discuss with the community whether or not this model should be included in TorchVision. It is very important at this stage to make sure that there is an agreement on the value of having this model in TorchVision and there is no one else already working on it.
 
-### Install Torchvision
+- If the decision is to include the new model, then please create a new ticket which will be used for all design and implementation discussions prior to the PR. One of the TorchVision maintainers will reach out at this stage and this will be your POC from this point onwards in order to provide support, guidance and regular feedback.
 
-```bash
-git clone https://github.com/pytorch/vision.git
-cd vision
-python setup.py develop
-# or, for OSX
-# MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py develop
-# for C++ debugging, please use DEBUG=1
-# DEBUG=1 python setup.py develop
-pip install flake8 typing mypy pytest pytest-mock scipy
-```
-You may also have to install `libpng-dev` and `libjpeg-turbo8-dev` libraries:
-```bash
-conda install libpng jpeg
-```
+### 2.  Implement the model
 
-## Development Process
+Please take a look at existing models in TorchVision to get familiar with the idioms. Also please look at recent contributions for new models. If in doubt about any design decisions you can ask for feedback on the issue created in step 1.  Example of things to take into account:
 
-If you plan to modify the code or documentation, please follow the steps below:
+- The implementation should be as close as possible to the canonical implementation/paper
+- The PR must include the code implementation, documentation and tests
+- It should also extend the existing reference scripts used to train the model
+- The weights need to reproduce closely the results of the paper in terms of accuracy, even though the final weights to be deployed will be those trained by the TorchVision maintainers
+- The PR description should include commands/configuration used to train the model, so that the TorchVision maintainers can easily run them to verify the implementation and generate the final model to be released
+- Make sure we re-use existing components as much as possible (inheritance)
+- New primitives (transforms, losses, etc) can be added if necessary, but the final location will be determined after discussion with the dedicated maintainer
 
-1. Fork the repository and create your branch from `main`.
-2. If you have modified the code (new feature or bug-fix), please add unit tests.
-3. If you have changed APIs, update the documentation. Make sure the documentation builds.
-4. Ensure the test suite passes.
-5. Make sure your code passes the formatting checks (see below).
+### 3. Train the model with reference scripts
 
-For more details about pull requests, 
-please read [GitHub's guides](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request). 
+To validate the new model against the common benchmark, as well as to generate pre-trained weights, you must use TorchVision’s reference scripts to train the model.
 
-If you would like to contribute a new model, please see [here](#New-model).
+Make sure all logs and a final (or best) checkpoint are saved, because it is expected that a submission shows that a model has been successfully trained  and the results are in line with the original paper/repository. This will allow the reviewers to quickly check the validity of the submission, but please note that the final model to be released will be re-trained by the maintainers in order to verify reproducibility,  ensure that the changes occurred during the PR review did not introduce any bugs, and to avoid moving around a large amount of data (including all checkpoints and logs).
 
-If you would like to contribute a new dataset, please see [here](#New-dataset). 
+### 4. Submit a PR
 
-### Code formatting and typing
+Submit a PR and tag the assigned maintainer. This PR should:
 
-#### Formatting
+- Link the original ticket
+- Provide a link for the original paper and the original repository if available
+- Highlight the important test metrics and how they compare to the original paper
+- Highlight any design choices that deviate from the original paper/implementation and rationale for these choices
 
-The torchvision code is formatted by [black](https://black.readthedocs.io/en/stable/),
-and checked against pep8 compliance with [flake8](https://flake8.pycqa.org/en/latest/).
-Instead of relying directly on `black` however, we rely on
-[ufmt](https://github.com/omnilib/ufmt), for compatibility reasons with Facebook
-internal infrastructure.
+## New Model Architectures - Implementation Details
 
-To format your code, install `ufmt` with `pip install ufmt` and use e.g.:
+### Model development and training steps
 
-```bash
-ufmt format torchvision
-```
+When developing a new model there are some details not to be missed:
 
-For the vast majority of cases, this is all you should need to run. For the
-formatting to be a bit faster, you can also choose to only apply `ufmt` to the
-files that were edited in your PR with e.g.:
+- Implement a model factory function for each of the model variants
 
-```bash
-ufmt format `git diff main --name-only`
-```
+- in the module constructor, [pass layer constructor instead of instance](https://github.com/pytorch/vision/blob/47bd962069ba03f753e7ba711cb825317be0b00a/torchvision/models/efficientnet.py#L88) for configurable layers like norm, activation, and log the api usage with `_log_api_usage_once(self)`
 
-Similarly, you can check for `flake8` errors with `flake8 torchvision`, although
-they should be fairly rare considering that most of the errors are automatically
-taken care of by `ufmt` already.
+- fuse layers together with existing common blocks if possible; For example consecutive conv, bn, activation layers could be replaced by [ConvNormActication](https://github.com/pytorch/vision/blob/47bd962069ba03f753e7ba711cb825317be0b00a/torchvision/ops/misc.py#L104)
 
-##### Pre-commit hooks
+- define `__all__` in the beginning of the model file to expose model factory functions; import model public APIs (e.g. factory methods) in `torchvision/models/__init__.py`
 
-For convenience and **purely optionally**, you can rely on [pre-commit
-hooks](https://pre-commit.com/) which will run both `ufmt` and `flake8` prior to
-every commit.
+- create the model builder using the new API and add it to the prototype area. Here is an [example](https://github.com/pytorch/vision/pull/4784/files) on how to do this. The new API requires adding more information about the weights such as the preprocessing transforms necessary for using the model, meta-data about the model, etc
 
-First install the `pre-commit` package with `pip install pre-commit`, and then
-run `pre-commit install` at the root of the repo for the hooks to be set up -
-that's it.
+- Make sure you write tests for the model itself (see `_check_input_backprop`, `_model_params` and `_model_params` in `test/test_models.py`) and for any new operators/transforms or important functions that you introduce
 
-Feel free to read the [pre-commit docs](https://pre-commit.com/#usage) to learn
-more and improve your workflow. You'll see for example that `pre-commit run
---all-files` will run both `ufmt` and `flake8` without the need for you to
-commit anything, and that the `--no-verify` flag can be added to `git commit` to
-temporarily deactivate the hooks.
+Note that this list is not exhaustive and there are details here related to the code quality etc, but these are rules that apply in all PRs (see [Contributing to TorchVision](https://github.com/pytorch/vision/blob/main/CONTRIBUTING.md)).
 
-#### Type annotations
+Once the model is implemented, you need to train the model using the reference scripts. For example, in order to train a classification resnet18 model you would:
 
-The codebase has type annotations, please make sure to add type hints if required. We use `mypy` tool for type checking:
-```bash
-mypy --config-file mypy.ini
-```
+1. go to `references/classification`
 
-### Unit tests
+2. run the train command (for example `torchrun --nproc_per_node=8 train.py --model resnet18`)
 
-If you have modified the code by adding a new feature or a bug-fix, please add unit tests for that. To run a specific 
-test: 
-```bash
-pytest test/<test-module.py> -vvv -k <test_myfunc>
-# e.g. pytest test/test_transforms.py -vvv -k test_center_crop
-```
+After training the model, select the best checkpoint and estimate its accuracy with a batch size of 1 on a single GPU. This helps us get better measurements about the accuracy of the models and avoid variants introduced due to batch padding (read [here](https://github.com/pytorch/vision/pull/4609/commits/5264b1a670107bcb4dc89e83a369f6fd97466ef8) for more details).
 
-If you would like to run all tests:
-```bash
-pytest test -vvv
-``` 
+Finally, run the model test to generate expected model files for testing. Please include those generated files in the PR as well.:
 
-Tests that require internet access should be in
-`test/test_internet.py`.
+`EXPECTTEST_ACCEPT=1 pytest test/test_models.py -k {model_name}`
 
-### Documentation
 
-Torchvision uses [Google style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html)
-for formatting docstrings. Length of line inside docstrings block must be limited to 120 characters.
+### Documentation and Pytorch Hub
 
-Please, follow the instructions to build and deploy the documentation locally.
+- `docs/source/models.rst`:
 
-#### Install requirements
+    - add the model to the corresponding section (classification/detection/video etc.)
 
-```bash
-cd docs
-pip install -r requirements.txt
-```
+    - describe how to construct the model variants (with and without pre-trained weights)
 
-#### Build
+    - add model metrics and reference to the original paper
 
-```bash
-cd docs
-make html
-```
+- `hubconf.py`:
 
-Then open `docs/build/html/index.html` in your favorite browser.
+    - import the model factory functions
 
-The docs are also automatically built when you submit a PR. The job that
-builds the docs is named `build_docs`. You can access the rendered docs by
-clicking on that job and then going to the "Artifacts" tab.
+    - submit a PR to [https://github.com/pytorch/hub](https://github.com/pytorch/hub) with a model page (or update an existing one)
 
-You can clean the built docs and re-start the build from scratch by doing ``make
-clean``.
+- `README.md` under the reference script folder:
+    
+    - command(s) to train the model
 
-#### Building the example gallery - or not
 
-When you run ``make html`` for the first time, all the examples in the gallery
-will be built. Subsequent builds should be faster, and will only build the
-examples that have been modified.
+## New Weights for Existing Model Architectures
 
-You can run ``make html-noplot`` to not build the examples at all. This is
-useful after a ``make clean`` to do some quick checks that are not related to
-the examples.
+The process of improving existing models, for instance improving accuracy by retraining the model with a different set of hyperparameters or augmentations, is the following:
 
-You can also choose to only build a subset of the examples by using the
-``EXAMPLES_PATTERN`` env variable, which accepts a regular expression. For
-example ``EXAMPLES_PATTERN="transforms" make html`` will only build the examples
-with "transforms" in their name.
+1. Open a ticket and discuss with the community and maintainers whether this improvement should be added to TorchVision. Note that to add new weights the improvement should be significant. 
 
-### New model
+2. Train the model using TorchVision reference scripts. You can add new primitives (transforms, losses, etc) when necessary, but the final location will be determined after discussion with the dedicated maintainer.
 
-More details on how to add a new model will be provided later. Please, do not send any PR with a new model without discussing 
-it in an issue as, most likely, it will not be accepted.
- 
-### New dataset
+3. Open a PR with the new weights, together with the training logs and the checkpoint chosen so the reviewers can verify the submission.  Details on how the model was trained, i.e., the training command using the reference scripts, should be included in the PR. 
 
-More details on how to add a new dataset will be provided later. Please, do not send any PR with a new dataset without discussing 
-it in an issue as, most likely, it will not be accepted.
-
-### Pull Request
-
-If all previous checks (flake8, mypy, unit tests) are passing, please send a PR. Submitted PR will pass other tests on 
-different operation systems, python versions and hardwares.
-
-For more details about pull requests workflow, 
-please read [GitHub's guides](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request).
-
-## License
-
-By contributing to Torchvision, you agree that your contributions will be licensed
-under the LICENSE file in the root directory of this source tree.
+4. The PR reviewers should replicate the results on their side to verify the submission and if all goes well the new weights should be ready to be released! 

From d8ab5923fb8dc5b0730862e53bf24cf5096500b7 Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Sat, 29 Jan 2022 11:30:37 +0000
Subject: [PATCH 08/24] Fix formatting and typo

---
 CONTRIBUTING.md        |  2 +-
 CONTRIBUTING_MODELS.md | 15 ++++++---------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 25ef4bda336..99cadf98c12 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -188,7 +188,7 @@ with "transforms" in their name.
 
 ### New or improved model
 
-Please refer to guidelines in [Contributing to Torchvision - Models](https://github.com/pytorch/vision/blob/main/CONTRIBUTING_MODELS.md).
+Please refer to the guidelines in [Contributing to Torchvision - Models](https://github.com/pytorch/vision/blob/main/CONTRIBUTING_MODELS.md).
  
 ### New dataset
 
diff --git a/CONTRIBUTING_MODELS.md b/CONTRIBUTING_MODELS.md
index fd29e8958ec..daf07affec8 100644
--- a/CONTRIBUTING_MODELS.md
+++ b/CONTRIBUTING_MODELS.md
@@ -1,13 +1,10 @@
 # Contributing to Torchvision - Models
 
-<!-- toc -->
-
 - [New Model Architectures - Overview](#new-model-architectures-overview)
-- [New Model Architectures - Implementation Details](#new-model-architectures-implementation-details)
-- [New Weights for Existing Model Architectures](#new-weights-for-existing-model-architectures)
 
-<!-- tocstop -->
+- [New Model Architectures - Implementation Details](#new-model-architectures-implementation-details)
 
+- [New Weights for Existing Model Architectures](#new-weights-for-existing-model-architectures)
 
 ## New Model Architectures - Overview
 
@@ -106,7 +103,7 @@ Finally, run the model test to generate expected model files for testing. Please
     - submit a PR to [https://github.com/pytorch/hub](https://github.com/pytorch/hub) with a model page (or update an existing one)
 
 - `README.md` under the reference script folder:
-    
+
     - command(s) to train the model
 
 
@@ -114,10 +111,10 @@ Finally, run the model test to generate expected model files for testing. Please
 
 The process of improving existing models, for instance improving accuracy by retraining the model with a different set of hyperparameters or augmentations, is the following:
 
-1. Open a ticket and discuss with the community and maintainers whether this improvement should be added to TorchVision. Note that to add new weights the improvement should be significant. 
+1. Open a ticket and discuss with the community and maintainers whether this improvement should be added to TorchVision. Note that to add new weights the improvement should be significant.
 
 2. Train the model using TorchVision reference scripts. You can add new primitives (transforms, losses, etc) when necessary, but the final location will be determined after discussion with the dedicated maintainer.
 
-3. Open a PR with the new weights, together with the training logs and the checkpoint chosen so the reviewers can verify the submission.  Details on how the model was trained, i.e., the training command using the reference scripts, should be included in the PR. 
+3. Open a PR with the new weights, together with the training logs and the checkpoint chosen so the reviewers can verify the submission.  Details on how the model was trained, i.e., the training command using the reference scripts, should be included in the PR.
 
-4. The PR reviewers should replicate the results on their side to verify the submission and if all goes well the new weights should be ready to be released! 
+4. The PR reviewers should replicate the results on their side to verify the submission and if all goes well the new weights should be ready to be released!

From 480683de61c915eb1bbdbff6b040f074d5c0ae39 Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Sat, 29 Jan 2022 11:34:05 +0000
Subject: [PATCH 09/24] fix in-document links

---
 CONTRIBUTING_MODELS.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING_MODELS.md b/CONTRIBUTING_MODELS.md
index daf07affec8..e42e03e600b 100644
--- a/CONTRIBUTING_MODELS.md
+++ b/CONTRIBUTING_MODELS.md
@@ -1,8 +1,8 @@
 # Contributing to Torchvision - Models
 
-- [New Model Architectures - Overview](#new-model-architectures-overview)
+- [New Model Architectures - Overview](#new-model-architectures---overview)
 
-- [New Model Architectures - Implementation Details](#new-model-architectures-implementation-details)
+- [New Model Architectures - Implementation Details](#new-model-architectures---implementation-details)
 
 - [New Weights for Existing Model Architectures](#new-weights-for-existing-model-architectures)
 

From 74b52199573d70ff9e07c9e77a67313c39e5fe4a Mon Sep 17 00:00:00 2001
From: Joao Gomes <joaopsgomes@gmail.com>
Date: Sun, 30 Jan 2022 15:49:59 +0000
Subject: [PATCH 10/24] Update CONTRIBUTING.md

Co-authored-by: Vasilis Vryniotis <datumbox@users.noreply.github.com>
---
 CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 99cadf98c12..e9b6204366c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -186,7 +186,7 @@ You can also choose to only build a subset of the examples by using the
 example ``EXAMPLES_PATTERN="transforms" make html`` will only build the examples
 with "transforms" in their name.
 
-### New or improved model
+### New architecture or improved model weights
 
 Please refer to the guidelines in [Contributing to Torchvision - Models](https://github.com/pytorch/vision/blob/main/CONTRIBUTING_MODELS.md).
  

From e06c180a5d06f7f790016674442df266baa49046 Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Mon, 31 Jan 2022 10:47:13 +0000
Subject: [PATCH 11/24] remove Implementation Details section

---
 CONTRIBUTING_MODELS.md | 57 +-----------------------------------------
 1 file changed, 1 insertion(+), 56 deletions(-)

diff --git a/CONTRIBUTING_MODELS.md b/CONTRIBUTING_MODELS.md
index e42e03e600b..82845e6579a 100644
--- a/CONTRIBUTING_MODELS.md
+++ b/CONTRIBUTING_MODELS.md
@@ -2,8 +2,6 @@
 
 - [New Model Architectures - Overview](#new-model-architectures---overview)
 
-- [New Model Architectures - Implementation Details](#new-model-architectures---implementation-details)
-
 - [New Weights for Existing Model Architectures](#new-weights-for-existing-model-architectures)
 
 ## New Model Architectures - Overview
@@ -37,6 +35,7 @@ Please take a look at existing models in TorchVision to get familiar with the id
 - The PR description should include commands/configuration used to train the model, so that the TorchVision maintainers can easily run them to verify the implementation and generate the final model to be released
 - Make sure we re-use existing components as much as possible (inheritance)
 - New primitives (transforms, losses, etc) can be added if necessary, but the final location will be determined after discussion with the dedicated maintainer
+- Please take a look at the detailed [implementation and documentation guidelines](https://github.com/pytorch/vision/issues/5319) for a fine grain list of things not to be missed
 
 ### 3. Train the model with reference scripts
 
@@ -53,60 +52,6 @@ Submit a PR and tag the assigned maintainer. This PR should:
 - Highlight the important test metrics and how they compare to the original paper
 - Highlight any design choices that deviate from the original paper/implementation and rationale for these choices
 
-## New Model Architectures - Implementation Details
-
-### Model development and training steps
-
-When developing a new model there are some details not to be missed:
-
-- Implement a model factory function for each of the model variants
-
-- in the module constructor, [pass layer constructor instead of instance](https://github.com/pytorch/vision/blob/47bd962069ba03f753e7ba711cb825317be0b00a/torchvision/models/efficientnet.py#L88) for configurable layers like norm, activation, and log the api usage with `_log_api_usage_once(self)`
-
-- fuse layers together with existing common blocks if possible; For example consecutive conv, bn, activation layers could be replaced by [ConvNormActication](https://github.com/pytorch/vision/blob/47bd962069ba03f753e7ba711cb825317be0b00a/torchvision/ops/misc.py#L104)
-
-- define `__all__` in the beginning of the model file to expose model factory functions; import model public APIs (e.g. factory methods) in `torchvision/models/__init__.py`
-
-- create the model builder using the new API and add it to the prototype area. Here is an [example](https://github.com/pytorch/vision/pull/4784/files) on how to do this. The new API requires adding more information about the weights such as the preprocessing transforms necessary for using the model, meta-data about the model, etc
-
-- Make sure you write tests for the model itself (see `_check_input_backprop`, `_model_params` and `_model_params` in `test/test_models.py`) and for any new operators/transforms or important functions that you introduce
-
-Note that this list is not exhaustive and there are details here related to the code quality etc, but these are rules that apply in all PRs (see [Contributing to TorchVision](https://github.com/pytorch/vision/blob/main/CONTRIBUTING.md)).
-
-Once the model is implemented, you need to train the model using the reference scripts. For example, in order to train a classification resnet18 model you would:
-
-1. go to `references/classification`
-
-2. run the train command (for example `torchrun --nproc_per_node=8 train.py --model resnet18`)
-
-After training the model, select the best checkpoint and estimate its accuracy with a batch size of 1 on a single GPU. This helps us get better measurements about the accuracy of the models and avoid variants introduced due to batch padding (read [here](https://github.com/pytorch/vision/pull/4609/commits/5264b1a670107bcb4dc89e83a369f6fd97466ef8) for more details).
-
-Finally, run the model test to generate expected model files for testing. Please include those generated files in the PR as well.:
-
-`EXPECTTEST_ACCEPT=1 pytest test/test_models.py -k {model_name}`
-
-
-### Documentation and Pytorch Hub
-
-- `docs/source/models.rst`:
-
-    - add the model to the corresponding section (classification/detection/video etc.)
-
-    - describe how to construct the model variants (with and without pre-trained weights)
-
-    - add model metrics and reference to the original paper
-
-- `hubconf.py`:
-
-    - import the model factory functions
-
-    - submit a PR to [https://github.com/pytorch/hub](https://github.com/pytorch/hub) with a model page (or update an existing one)
-
-- `README.md` under the reference script folder:
-
-    - command(s) to train the model
-
-
 ## New Weights for Existing Model Architectures
 
 The process of improving existing models, for instance improving accuracy by retraining the model with a different set of hyperparameters or augmentations, is the following:

From 0d3b4b45f14a65833131da57df105bf68a3afdd7 Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Tue, 8 Feb 2022 20:26:19 +0000
Subject: [PATCH 12/24] Consolidating __repr__ strings

---
 references/classification/transforms.py      |  32 ++--
 test/test_datasets_download.py               |   2 +-
 torchvision/models/detection/anchor_utils.py |  16 +-
 torchvision/models/detection/transform.py    |   2 +-
 torchvision/models/efficientnet.py           |  20 +-
 torchvision/ops/deform_conv.py               |  24 +--
 torchvision/ops/ps_roi_align.py              |  14 +-
 torchvision/ops/ps_roi_pool.py               |   7 +-
 torchvision/ops/roi_align.py                 |  16 +-
 torchvision/ops/roi_pool.py                  |   7 +-
 torchvision/ops/stochastic_depth.py          |   7 +-
 torchvision/prototype/features/_feature.py   |   2 +-
 torchvision/prototype/models/_api.py         |   2 +-
 torchvision/prototype/models/convnext.py     |  14 +-
 torchvision/transforms/_transforms_video.py  |  25 ++-
 torchvision/transforms/autoaugment.py        |  34 ++--
 torchvision/transforms/transforms.py         | 184 +++++++++----------
 17 files changed, 207 insertions(+), 201 deletions(-)

diff --git a/references/classification/transforms.py b/references/classification/transforms.py
index 69ee4182c54..892b4e7e6c0 100644
--- a/references/classification/transforms.py
+++ b/references/classification/transforms.py
@@ -72,13 +72,15 @@ def forward(self, batch: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]:
         return batch, target
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "num_classes={num_classes}"
-        s += ", p={p}"
-        s += ", alpha={alpha}"
-        s += ", inplace={inplace}"
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"num_classes={self.num_classes}"
+            f", p={self.p}"
+            f", alpha={self.alpha}"
+            f", inplace={self.inplace}"
+            f")"
+        )
+        return s
 
 
 class RandomCutmix(torch.nn.Module):
@@ -162,10 +164,12 @@ def forward(self, batch: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]:
         return batch, target
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "num_classes={num_classes}"
-        s += ", p={p}"
-        s += ", alpha={alpha}"
-        s += ", inplace={inplace}"
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"num_classes={self.num_classes}"
+            f", p={self.p}"
+            f", alpha={self.alpha}"
+            f", inplace={self.inplace}"
+            f")"
+        )
+        return s
diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py
index 4bf31eba92b..4d2e475e1df 100644
--- a/test/test_datasets_download.py
+++ b/test/test_datasets_download.py
@@ -180,7 +180,7 @@ def __init__(self, url, md5=None, id=None):
         self.md5 = md5
         self.id = id or url
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return self.id
 
 
diff --git a/torchvision/models/detection/anchor_utils.py b/torchvision/models/detection/anchor_utils.py
index 395bf6bbda6..202294153ea 100644
--- a/torchvision/models/detection/anchor_utils.py
+++ b/torchvision/models/detection/anchor_utils.py
@@ -239,13 +239,15 @@ def _grid_default_boxes(
         return torch.cat(default_boxes, dim=0)
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "aspect_ratios={aspect_ratios}"
-        s += ", clip={clip}"
-        s += ", scales={scales}"
-        s += ", steps={steps}"
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"aspect_ratios={self.aspect_ratios}"
+            f", clip={self.clip}"
+            f", scales={self.scales}"
+            f", steps={self.steps}"
+            ")"
+        )
+        return s
 
     def forward(self, image_list: ImageList, feature_maps: List[Tensor]) -> List[Tensor]:
         grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps]
diff --git a/torchvision/models/detection/transform.py b/torchvision/models/detection/transform.py
index 90d19cfc8de..960e28500a1 100644
--- a/torchvision/models/detection/transform.py
+++ b/torchvision/models/detection/transform.py
@@ -260,7 +260,7 @@ def postprocess(
         return result
 
     def __repr__(self) -> str:
-        format_string = self.__class__.__name__ + "("
+        format_string = f"{self.__class__.__name__}("
         _indent = "\n    "
         format_string += f"{_indent}Normalize(mean={self.image_mean}, std={self.image_std})"
         format_string += f"{_indent}Resize(min_size={self.min_size}, max_size={self.max_size}, mode='bilinear')"
diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index 6837018c09e..f7eba46cb39 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -61,15 +61,17 @@ def __init__(
         self.num_layers = self.adjust_depth(num_layers, depth_mult)
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "expand_ratio={expand_ratio}"
-        s += ", kernel={kernel}"
-        s += ", stride={stride}"
-        s += ", input_channels={input_channels}"
-        s += ", out_channels={out_channels}"
-        s += ", num_layers={num_layers}"
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"expand_ratio={self.expand_ratio}"
+            f", kernel={self.kernel}"
+            f", stride={self.stride}"
+            f", input_channels={self.input_channels}"
+            f", out_channels={self.out_channels}"
+            f", num_layers={self.num_layers}"
+            f")"
+        )
+        return s
 
     @staticmethod
     def adjust_channels(channels: int, width_mult: float, min_value: Optional[int] = None) -> int:
diff --git a/torchvision/ops/deform_conv.py b/torchvision/ops/deform_conv.py
index 9f9ed49f4b9..7bd7583f109 100644
--- a/torchvision/ops/deform_conv.py
+++ b/torchvision/ops/deform_conv.py
@@ -179,14 +179,16 @@ def forward(self, input: Tensor, offset: Tensor, mask: Optional[Tensor] = None)
         )
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "{in_channels}"
-        s += ", {out_channels}"
-        s += ", kernel_size={kernel_size}"
-        s += ", stride={stride}"
-        s += ", padding={padding}" if self.padding != (0, 0) else ""
-        s += ", dilation={dilation}" if self.dilation != (1, 1) else ""
-        s += ", groups={groups}" if self.groups != 1 else ""
-        s += ", bias=False" if self.bias is None else ""
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"{self.in_channels}"
+            f", {self.out_channels}"
+            f", kernel_size={self.kernel_size}"
+            f", stride={self.stride}"
+            f"{f', padding={self.padding}' if self.padding != (0, 0) else ''}"
+            f"{f', dilation={self.dilation}' if self.dilation != (1, 1) else ''}"
+            f"{f', groups={self.groups}'  if self.groups != 1 else ''}"
+            f"{', bias=False' if self.bias is None else ''}"
+            f")"
+        )
+        return s
diff --git a/torchvision/ops/ps_roi_align.py b/torchvision/ops/ps_roi_align.py
index b01dc35e129..7153e49ac05 100644
--- a/torchvision/ops/ps_roi_align.py
+++ b/torchvision/ops/ps_roi_align.py
@@ -78,9 +78,11 @@ def forward(self, input: Tensor, rois: Tensor) -> Tensor:
         return ps_roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio)
 
     def __repr__(self) -> str:
-        tmpstr = self.__class__.__name__ + "("
-        tmpstr += "output_size=" + str(self.output_size)
-        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
-        tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
-        tmpstr += ")"
-        return tmpstr
+        s = (
+            f"{self.__class__.__name__}("
+            f"output_size={self.output_size}"
+            f", spatial_scale={self.spatial_scale}"
+            f", sampling_ratio={self.sampling_ratio}"
+            f")"
+        )
+        return s
diff --git a/torchvision/ops/ps_roi_pool.py b/torchvision/ops/ps_roi_pool.py
index 867383eacf1..14b23f8ffe1 100644
--- a/torchvision/ops/ps_roi_pool.py
+++ b/torchvision/ops/ps_roi_pool.py
@@ -64,8 +64,5 @@ def forward(self, input: Tensor, rois: Tensor) -> Tensor:
         return ps_roi_pool(input, rois, self.output_size, self.spatial_scale)
 
     def __repr__(self) -> str:
-        tmpstr = self.__class__.__name__ + "("
-        tmpstr += "output_size=" + str(self.output_size)
-        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
-        tmpstr += ")"
-        return tmpstr
+        s = f"{self.__class__.__name__}(" f"output_size={self.output_size}" f", spatial_scale={self.spatial_scale}" f")"
+        return s
diff --git a/torchvision/ops/roi_align.py b/torchvision/ops/roi_align.py
index ed412965932..131c1b81d0f 100644
--- a/torchvision/ops/roi_align.py
+++ b/torchvision/ops/roi_align.py
@@ -86,10 +86,12 @@ def forward(self, input: Tensor, rois: Tensor) -> Tensor:
         return roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned)
 
     def __repr__(self) -> str:
-        tmpstr = self.__class__.__name__ + "("
-        tmpstr += "output_size=" + str(self.output_size)
-        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
-        tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
-        tmpstr += ", aligned=" + str(self.aligned)
-        tmpstr += ")"
-        return tmpstr
+        s = (
+            f"{self.__class__.__name__}("
+            f"output_size={self.output_size}"
+            f", spatial_scale={self.spatial_scale}"
+            f", sampling_ratio={self.sampling_ratio}"
+            f", aligned={self.aligned}"
+            f")"
+        )
+        return s
diff --git a/torchvision/ops/roi_pool.py b/torchvision/ops/roi_pool.py
index 37a8f42058c..ead81597dfb 100644
--- a/torchvision/ops/roi_pool.py
+++ b/torchvision/ops/roi_pool.py
@@ -66,8 +66,5 @@ def forward(self, input: Tensor, rois: Tensor) -> Tensor:
         return roi_pool(input, rois, self.output_size, self.spatial_scale)
 
     def __repr__(self) -> str:
-        tmpstr = self.__class__.__name__ + "("
-        tmpstr += "output_size=" + str(self.output_size)
-        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
-        tmpstr += ")"
-        return tmpstr
+        s = f"{self.__class__.__name__}(" f"output_size={self.output_size}" f", spatial_scale={self.spatial_scale}" f")"
+        return s
diff --git a/torchvision/ops/stochastic_depth.py b/torchvision/ops/stochastic_depth.py
index 9f5d432d79e..91f229c371b 100644
--- a/torchvision/ops/stochastic_depth.py
+++ b/torchvision/ops/stochastic_depth.py
@@ -62,8 +62,5 @@ def forward(self, input: Tensor) -> Tensor:
         return stochastic_depth(input, self.p, self.mode, self.training)
 
     def __repr__(self) -> str:
-        tmpstr = self.__class__.__name__ + "("
-        tmpstr += "p=" + str(self.p)
-        tmpstr += ", mode=" + str(self.mode)
-        tmpstr += ")"
-        return tmpstr
+        s = f"{self.__class__.__name__}(" f"p={self.p}" f", mode={self.mode}" f")"
+        return s
diff --git a/torchvision/prototype/features/_feature.py b/torchvision/prototype/features/_feature.py
index cd52f1f80ad..1837ffc1e89 100644
--- a/torchvision/prototype/features/_feature.py
+++ b/torchvision/prototype/features/_feature.py
@@ -96,5 +96,5 @@ def __torch_function__(
 
         return cls(output, like=args[0])
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return torch.Tensor.__repr__(self).replace("tensor", type(self).__name__)
diff --git a/torchvision/prototype/models/_api.py b/torchvision/prototype/models/_api.py
index e27b4d7adda..4ba0ee05f08 100644
--- a/torchvision/prototype/models/_api.py
+++ b/torchvision/prototype/models/_api.py
@@ -67,7 +67,7 @@ def from_str(cls, value: str) -> "WeightsEnum":
     def get_state_dict(self, progress: bool) -> OrderedDict:
         return load_state_dict_from_url(self.url, progress=progress)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return f"{self.__class__.__name__}.{self._name_}"
 
     def __getattr__(self, name):
diff --git a/torchvision/prototype/models/convnext.py b/torchvision/prototype/models/convnext.py
index 7fb3026b4e0..9abe01da3e5 100644
--- a/torchvision/prototype/models/convnext.py
+++ b/torchvision/prototype/models/convnext.py
@@ -80,12 +80,14 @@ def __init__(
         self.num_layers = num_layers
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "input_channels={input_channels}"
-        s += ", out_channels={out_channels}"
-        s += ", num_layers={num_layers}"
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"input_channels={self.input_channels}"
+            f", out_channels={self.out_channels}"
+            f", num_layers={self.num_layers}"
+            f")"
+        )
+        return s
 
 
 class ConvNeXt(nn.Module):
diff --git a/torchvision/transforms/_transforms_video.py b/torchvision/transforms/_transforms_video.py
index 440a75f286c..7f11b628712 100644
--- a/torchvision/transforms/_transforms_video.py
+++ b/torchvision/transforms/_transforms_video.py
@@ -43,8 +43,8 @@ def __call__(self, clip):
         i, j, h, w = self.get_params(clip, self.size)
         return F.crop(clip, i, j, h, w)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(size={self.size})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(size={self.size})"
 
 
 class RandomResizedCropVideo(RandomResizedCrop):
@@ -76,11 +76,8 @@ def __call__(self, clip):
         i, j, h, w = self.get_params(clip, self.scale, self.ratio)
         return F.resized_crop(clip, i, j, h, w, self.size, self.interpolation_mode)
 
-    def __repr__(self):
-        return (
-            self.__class__.__name__
-            + f"(size={self.size}, interpolation_mode={self.interpolation_mode}, scale={self.scale}, ratio={self.ratio})"
-        )
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(size={self.size}, interpolation_mode={self.interpolation_mode}, scale={self.scale}, ratio={self.ratio})"
 
 
 class CenterCropVideo:
@@ -100,8 +97,8 @@ def __call__(self, clip):
         """
         return F.center_crop(clip, self.crop_size)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(crop_size={self.crop_size})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(crop_size={self.crop_size})"
 
 
 class NormalizeVideo:
@@ -125,8 +122,8 @@ def __call__(self, clip):
         """
         return F.normalize(clip, self.mean, self.std, self.inplace)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(mean={self.mean}, std={self.std}, inplace={self.inplace})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(mean={self.mean}, std={self.std}, inplace={self.inplace})"
 
 
 class ToTensorVideo:
@@ -147,7 +144,7 @@ def __call__(self, clip):
         """
         return F.to_tensor(clip)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return self.__class__.__name__
 
 
@@ -172,5 +169,5 @@ def __call__(self, clip):
             clip = F.hflip(clip)
         return clip
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"
diff --git a/torchvision/transforms/autoaugment.py b/torchvision/transforms/autoaugment.py
index efb0cc90e8d..163f2fcef9a 100644
--- a/torchvision/transforms/autoaugment.py
+++ b/torchvision/transforms/autoaugment.py
@@ -270,7 +270,7 @@ def forward(self, img: Tensor) -> Tensor:
         return img
 
     def __repr__(self) -> str:
-        return self.__class__.__name__ + f"(policy={self.policy}, fill={self.fill})"
+        return f"{self.__class__.__name__}(policy={self.policy}, fill={self.fill})"
 
 
 class RandAugment(torch.nn.Module):
@@ -353,14 +353,16 @@ def forward(self, img: Tensor) -> Tensor:
         return img
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "num_ops={num_ops}"
-        s += ", magnitude={magnitude}"
-        s += ", num_magnitude_bins={num_magnitude_bins}"
-        s += ", interpolation={interpolation}"
-        s += ", fill={fill}"
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"num_ops={self.num_ops}"
+            f", magnitude={self.magnitude}"
+            f", num_magnitude_bins={self.num_magnitude_bins}"
+            f", interpolation={self.interpolation}"
+            f", fill={self.fill}"
+            f")"
+        )
+        return s
 
 
 class TrivialAugmentWide(torch.nn.Module):
@@ -438,9 +440,11 @@ def forward(self, img: Tensor) -> Tensor:
         return _apply_op(img, op_name, magnitude, interpolation=self.interpolation, fill=fill)
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "num_magnitude_bins={num_magnitude_bins}"
-        s += ", interpolation={interpolation}"
-        s += ", fill={fill}"
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"num_magnitude_bins={self.num_magnitude_bins}"
+            f", interpolation={self.interpolation}"
+            f", fill={self.fill}"
+            f")"
+        )
+        return s
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index 660f1a6f2dd..9fe11990ba3 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -97,7 +97,7 @@ def __call__(self, img):
             img = t(img)
         return img
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         format_string = self.__class__.__name__ + "("
         for t in self.transforms:
             format_string += "\n"
@@ -136,8 +136,8 @@ def __call__(self, pic):
         """
         return F.to_tensor(pic)
 
-    def __repr__(self):
-        return self.__class__.__name__ + "()"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}()"
 
 
 class PILToTensor:
@@ -163,8 +163,8 @@ def __call__(self, pic):
         """
         return F.pil_to_tensor(pic)
 
-    def __repr__(self):
-        return self.__class__.__name__ + "()"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}()"
 
 
 class ConvertImageDtype(torch.nn.Module):
@@ -228,7 +228,7 @@ def __call__(self, pic):
         """
         return F.to_pil_image(pic, self.mode)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         format_string = self.__class__.__name__ + "("
         if self.mode is not None:
             format_string += f"mode={self.mode}"
@@ -271,8 +271,8 @@ def forward(self, tensor: Tensor) -> Tensor:
         """
         return F.normalize(tensor, self.mean, self.std, self.inplace)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(mean={self.mean}, std={self.std})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(mean={self.mean}, std={self.std})"
 
 
 class Resize(torch.nn.Module):
@@ -350,9 +350,9 @@ def forward(self, img):
         """
         return F.resize(img, self.size, self.interpolation, self.max_size, self.antialias)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         detail = f"(size={self.size}, interpolation={self.interpolation.value}, max_size={self.max_size}, antialias={self.antialias})"
-        return self.__class__.__name__ + detail
+        return f"{self.__class__.__name__}{detail}"
 
 
 class Scale(Resize):
@@ -393,8 +393,8 @@ def forward(self, img):
         """
         return F.center_crop(img, self.size)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(size={self.size})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name_}(size={self.size})"
 
 
 class Pad(torch.nn.Module):
@@ -466,8 +466,8 @@ def forward(self, img):
         """
         return F.pad(img, self.padding, self.fill, self.padding_mode)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(padding={self.padding}, fill={self.fill}, padding_mode={self.padding_mode})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(padding={self.padding}, fill={self.fill}, padding_mode={self.padding_mode})"
 
 
 class Lambda:
@@ -486,8 +486,8 @@ def __init__(self, lambd):
     def __call__(self, img):
         return self.lambd(img)
 
-    def __repr__(self):
-        return self.__class__.__name__ + "()"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}()"
 
 
 class RandomTransforms:
@@ -506,7 +506,7 @@ def __init__(self, transforms):
     def __call__(self, *args, **kwargs):
         raise NotImplementedError()
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         format_string = self.__class__.__name__ + "("
         for t in self.transforms:
             format_string += "\n"
@@ -548,7 +548,7 @@ def forward(self, img):
             img = t(img)
         return img
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         format_string = self.__class__.__name__ + "("
         format_string += f"\n    p={self.p}"
         for t in self.transforms:
@@ -582,10 +582,8 @@ def __call__(self, *args):
         t = random.choices(self.transforms, weights=self.p)[0]
         return t(*args)
 
-    def __repr__(self):
-        format_string = super().__repr__()
-        format_string += f"(p={self.p})"
-        return format_string
+    def __repr__(self) -> str:
+        return f"{super().__repr__()}(p={self.p})"
 
 
 class RandomCrop(torch.nn.Module):
@@ -692,8 +690,8 @@ def forward(self, img):
 
         return F.crop(img, i, j, h, w)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(size={self.size}, padding={self.padding})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(size={self.size}, padding={self.padding})"
 
 
 class RandomHorizontalFlip(torch.nn.Module):
@@ -723,8 +721,8 @@ def forward(self, img):
             return F.hflip(img)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"
 
 
 class RandomVerticalFlip(torch.nn.Module):
@@ -754,8 +752,8 @@ def forward(self, img):
             return F.vflip(img)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"
 
 
 class RandomPerspective(torch.nn.Module):
@@ -855,8 +853,8 @@ def get_params(width: int, height: int, distortion_scale: float) -> Tuple[List[L
         endpoints = [topleft, topright, botright, botleft]
         return startpoints, endpoints
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"
 
 
 class RandomResizedCrop(torch.nn.Module):
@@ -967,7 +965,7 @@ def forward(self, img):
         i, j, h, w = self.get_params(img, self.scale, self.ratio)
         return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         interpolate_str = self.interpolation.value
         format_string = self.__class__.__name__ + f"(size={self.size}"
         format_string += f", scale={tuple(round(s, 4) for s in self.scale)}"
@@ -1033,8 +1031,8 @@ def forward(self, img):
         """
         return F.five_crop(img, self.size)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(size={self.size})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(size={self.size})"
 
 
 class TenCrop(torch.nn.Module):
@@ -1083,8 +1081,8 @@ def forward(self, img):
         """
         return F.ten_crop(img, self.size, self.vertical_flip)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(size={self.size}, vertical_flip={self.vertical_flip})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(size={self.size}, vertical_flip={self.vertical_flip})"
 
 
 class LinearTransformation(torch.nn.Module):
@@ -1157,11 +1155,13 @@ def forward(self, tensor: Tensor) -> Tensor:
         tensor = transformed_tensor.view(shape)
         return tensor
 
-    def __repr__(self):
-        format_string = self.__class__.__name__ + "(transformation_matrix="
-        format_string += str(self.transformation_matrix.tolist()) + ")"
-        format_string += ", (mean_vector=" + str(self.mean_vector.tolist()) + ")"
-        return format_string
+    def __repr__(self) -> str:
+        s = (
+            f"{self.__class__.__name__}(transformation_matrix="
+            f"{self.transformation_matrix.tolist()}"
+            f", mean_vector={self.mean_vector.tolist()})"
+        )
+        return s
 
 
 class ColorJitter(torch.nn.Module):
@@ -1269,13 +1269,15 @@ def forward(self, img):
 
         return img
 
-    def __repr__(self):
-        format_string = self.__class__.__name__ + "("
-        format_string += f"brightness={self.brightness}"
-        format_string += f", contrast={self.contrast}"
-        format_string += f", saturation={self.saturation}"
-        format_string += f", hue={self.hue})"
-        return format_string
+    def __repr__(self) -> str:
+        s = (
+            f"{self.__class__.__name__}("
+            f"brightness={self.brightness}"
+            f", contrast={self.contrast}"
+            f", saturation={self.saturation}"
+            f", hue={self.hue})"
+        )
+        return s
 
 
 class RandomRotation(torch.nn.Module):
@@ -1370,7 +1372,7 @@ def forward(self, img):
 
         return F.rotate(img, angle, self.resample, self.expand, self.center, fill)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         interpolate_str = self.interpolation.value
         format_string = self.__class__.__name__ + f"(degrees={self.degrees}"
         format_string += f", interpolation={interpolate_str}"
@@ -1548,24 +1550,18 @@ def forward(self, img):
 
         return F.affine(img, *ret, interpolation=self.interpolation, fill=fill, center=self.center)
 
-    def __repr__(self):
-        s = "{name}(degrees={degrees}"
-        if self.translate is not None:
-            s += ", translate={translate}"
-        if self.scale is not None:
-            s += ", scale={scale}"
-        if self.shear is not None:
-            s += ", shear={shear}"
-        if self.interpolation != InterpolationMode.NEAREST:
-            s += ", interpolation={interpolation}"
-        if self.fill != 0:
-            s += ", fill={fill}"
-        if self.center is not None:
-            s += ", center={center}"
-        s += ")"
-        d = dict(self.__dict__)
-        d["interpolation"] = self.interpolation.value
-        return s.format(name=self.__class__.__name__, **d)
+    def __repr__(self) -> str:
+        s = (
+            f"{self.__class__.__name__}(degrees={self.degrees}"
+            f"{f', translate={self.translate}' if self.translate is not None else ''}"
+            f"{f', translate={self.scale}' if self.scale is not None  else ''}"
+            f"{f', translate={self.shear}' if self.shear is not None  else ''}"
+            f"{f', translate={self.interpolation.value}' if self.interpolation != InterpolationMode.NEAREST else ''}"
+            f"{f', translate={self.fill}' if self.fill != 0 else ''}"
+            f"{f', translate={self.center}' if self.center is not None  else ''}"
+            f")"
+        )
+        return s
 
 
 class Grayscale(torch.nn.Module):
@@ -1599,8 +1595,8 @@ def forward(self, img):
         """
         return F.rgb_to_grayscale(img, num_output_channels=self.num_output_channels)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(num_output_channels={self.num_output_channels})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(num_output_channels={self.num_output_channels})"
 
 
 class RandomGrayscale(torch.nn.Module):
@@ -1637,8 +1633,8 @@ def forward(self, img):
             return F.rgb_to_grayscale(img, num_output_channels=num_output_channels)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"
 
 
 class RandomErasing(torch.nn.Module):
@@ -1767,13 +1763,16 @@ def forward(self, img):
             return F.erase(img, x, y, h, w, v, self.inplace)
         return img
 
-    def __repr__(self):
-        s = f"(p={self.p}, "
-        s += f"scale={self.scale}, "
-        s += f"ratio={self.ratio}, "
-        s += f"value={self.value}, "
-        s += f"inplace={self.inplace})"
-        return self.__class__.__name__ + s
+    def __repr__(self) -> str:
+        s = (
+            f"{self.__class__.__name__}"
+            f"(p={self.p}, "
+            f"scale={self.scale}, "
+            f"ratio={self.ratio}, "
+            f"value={self.value}, "
+            f"inplace={self.inplace})"
+        )
+        return s
 
 
 class GaussianBlur(torch.nn.Module):
@@ -1837,10 +1836,9 @@ def forward(self, img: Tensor) -> Tensor:
         sigma = self.get_params(self.sigma[0], self.sigma[1])
         return F.gaussian_blur(img, self.kernel_size, [sigma, sigma])
 
-    def __repr__(self):
-        s = f"(kernel_size={self.kernel_size}, "
-        s += f"sigma={self.sigma})"
-        return self.__class__.__name__ + s
+    def __repr__(self) -> str:
+        s = f"{self.__class__.__name__}(kernel_size={self.kernel_size}, sigma={self.sigma})"
+        return s
 
 
 def _setup_size(size, error_msg):
@@ -1902,8 +1900,8 @@ def forward(self, img):
             return F.invert(img)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"
 
 
 class RandomPosterize(torch.nn.Module):
@@ -1935,8 +1933,8 @@ def forward(self, img):
             return F.posterize(img, self.bits)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(bits={self.bits},p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(bits={self.bits},p={self.p})"
 
 
 class RandomSolarize(torch.nn.Module):
@@ -1968,8 +1966,8 @@ def forward(self, img):
             return F.solarize(img, self.threshold)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(threshold={self.threshold},p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(threshold={self.threshold},p={self.p})"
 
 
 class RandomAdjustSharpness(torch.nn.Module):
@@ -2001,8 +1999,8 @@ def forward(self, img):
             return F.adjust_sharpness(img, self.sharpness_factor)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(sharpness_factor={self.sharpness_factor},p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(sharpness_factor={self.sharpness_factor},p={self.p})"
 
 
 class RandomAutocontrast(torch.nn.Module):
@@ -2032,8 +2030,8 @@ def forward(self, img):
             return F.autocontrast(img)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"
 
 
 class RandomEqualize(torch.nn.Module):
@@ -2063,5 +2061,5 @@ def forward(self, img):
             return F.equalize(img)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"

From a2f369dee6e54d87dd44fa61fec2e9b57f2a198d Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Sat, 29 Jan 2022 11:11:38 +0000
Subject: [PATCH 13/24] add model contribution guidelines

---
 CONTRIBUTING.md        |   4 +-
 CONTRIBUTING_MODELS.md | 211 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 213 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e9b6204366c..25ef4bda336 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -186,9 +186,9 @@ You can also choose to only build a subset of the examples by using the
 example ``EXAMPLES_PATTERN="transforms" make html`` will only build the examples
 with "transforms" in their name.
 
-### New architecture or improved model weights
+### New or improved model
 
-Please refer to the guidelines in [Contributing to Torchvision - Models](https://github.com/pytorch/vision/blob/main/CONTRIBUTING_MODELS.md).
+Please refer to guidelines in [Contributing to Torchvision - Models](https://github.com/pytorch/vision/blob/main/CONTRIBUTING_MODELS.md).
  
 ### New dataset
 
diff --git a/CONTRIBUTING_MODELS.md b/CONTRIBUTING_MODELS.md
index 82845e6579a..2ade54ef7b1 100644
--- a/CONTRIBUTING_MODELS.md
+++ b/CONTRIBUTING_MODELS.md
@@ -63,3 +63,214 @@ The process of improving existing models, for instance improving accuracy by ret
 3. Open a PR with the new weights, together with the training logs and the checkpoint chosen so the reviewers can verify the submission.  Details on how the model was trained, i.e., the training command using the reference scripts, should be included in the PR.
 
 4. The PR reviewers should replicate the results on their side to verify the submission and if all goes well the new weights should be ready to be released!
+=======
+# Contributing to Torchvision
+
+We want to make contributing to this project as easy and transparent as possible.
+
+## TL;DR
+
+We appreciate all contributions. If you are interested in contributing to Torchvision, there are many ways to help out. 
+Your contributions may fall into the following categories:
+
+- It helps the project if you could 
+    - Report issues you're facing
+    - Give a :+1: on issues that others reported and that are relevant to you 
+
+- Answering queries on the issue tracker, investigating bugs are very valuable contributions to the project.
+
+- You would like to improve the documentation. This is no less important than improving the library itself! 
+If you find a typo in the documentation, do not hesitate to submit a GitHub pull request.
+
+- If you would like to fix a bug
+    - please pick one from the [list of open issues labelled as "help wanted"](https://github.com/pytorch/vision/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22)
+    - comment on the issue that you want to work on this issue
+    - send a PR with your fix, see below. 
+
+- If you plan to contribute new features, utility functions or extensions, please first open an issue and discuss the feature with us.
+
+## Issues
+
+We use GitHub issues to track public bugs. Please ensure your description is
+clear and has sufficient instructions to be able to reproduce the issue.
+
+## Development installation
+
+### Install PyTorch Nightly 
+
+```bash
+conda install pytorch -c pytorch-nightly
+# or with pip (see https://pytorch.org/get-started/locally/)
+# pip install numpy
+# pip install --pre torch -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
+```
+
+### Install Torchvision
+
+```bash
+git clone https://github.com/pytorch/vision.git
+cd vision
+python setup.py develop
+# or, for OSX
+# MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py develop
+# for C++ debugging, please use DEBUG=1
+# DEBUG=1 python setup.py develop
+pip install flake8 typing mypy pytest pytest-mock scipy
+```
+You may also have to install `libpng-dev` and `libjpeg-turbo8-dev` libraries:
+```bash
+conda install libpng jpeg
+```
+
+## Development Process
+
+If you plan to modify the code or documentation, please follow the steps below:
+
+1. Fork the repository and create your branch from `main`.
+2. If you have modified the code (new feature or bug-fix), please add unit tests.
+3. If you have changed APIs, update the documentation. Make sure the documentation builds.
+4. Ensure the test suite passes.
+5. Make sure your code passes the formatting checks (see below).
+
+For more details about pull requests, 
+please read [GitHub's guides](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request). 
+
+If you would like to contribute a new model, please see [here](#New-model).
+
+If you would like to contribute a new dataset, please see [here](#New-dataset). 
+
+### Code formatting and typing
+
+#### Formatting
+
+The torchvision code is formatted by [black](https://black.readthedocs.io/en/stable/),
+and checked against pep8 compliance with [flake8](https://flake8.pycqa.org/en/latest/).
+Instead of relying directly on `black` however, we rely on
+[ufmt](https://github.com/omnilib/ufmt), for compatibility reasons with Facebook
+internal infrastructure.
+
+To format your code, install `ufmt` with `pip install ufmt` and use e.g.:
+
+```bash
+ufmt format torchvision
+```
+
+For the vast majority of cases, this is all you should need to run. For the
+formatting to be a bit faster, you can also choose to only apply `ufmt` to the
+files that were edited in your PR with e.g.:
+
+```bash
+ufmt format `git diff main --name-only`
+```
+
+Similarly, you can check for `flake8` errors with `flake8 torchvision`, although
+they should be fairly rare considering that most of the errors are automatically
+taken care of by `ufmt` already.
+
+##### Pre-commit hooks
+
+For convenience and **purely optionally**, you can rely on [pre-commit
+hooks](https://pre-commit.com/) which will run both `ufmt` and `flake8` prior to
+every commit.
+
+First install the `pre-commit` package with `pip install pre-commit`, and then
+run `pre-commit install` at the root of the repo for the hooks to be set up -
+that's it.
+
+Feel free to read the [pre-commit docs](https://pre-commit.com/#usage) to learn
+more and improve your workflow. You'll see for example that `pre-commit run
+--all-files` will run both `ufmt` and `flake8` without the need for you to
+commit anything, and that the `--no-verify` flag can be added to `git commit` to
+temporarily deactivate the hooks.
+
+#### Type annotations
+
+The codebase has type annotations, please make sure to add type hints if required. We use `mypy` tool for type checking:
+```bash
+mypy --config-file mypy.ini
+```
+
+### Unit tests
+
+If you have modified the code by adding a new feature or a bug-fix, please add unit tests for that. To run a specific 
+test: 
+```bash
+pytest test/<test-module.py> -vvv -k <test_myfunc>
+# e.g. pytest test/test_transforms.py -vvv -k test_center_crop
+```
+
+If you would like to run all tests:
+```bash
+pytest test -vvv
+``` 
+
+Tests that require internet access should be in
+`test/test_internet.py`.
+
+### Documentation
+
+Torchvision uses [Google style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html)
+for formatting docstrings. Length of line inside docstrings block must be limited to 120 characters.
+
+Please, follow the instructions to build and deploy the documentation locally.
+
+#### Install requirements
+
+```bash
+cd docs
+pip install -r requirements.txt
+```
+
+#### Build
+
+```bash
+cd docs
+make html
+```
+
+Then open `docs/build/html/index.html` in your favorite browser.
+
+The docs are also automatically built when you submit a PR. The job that
+builds the docs is named `build_docs`. You can access the rendered docs by
+clicking on that job and then going to the "Artifacts" tab.
+
+You can clean the built docs and re-start the build from scratch by doing ``make
+clean``.
+
+#### Building the example gallery - or not
+
+When you run ``make html`` for the first time, all the examples in the gallery
+will be built. Subsequent builds should be faster, and will only build the
+examples that have been modified.
+
+You can run ``make html-noplot`` to not build the examples at all. This is
+useful after a ``make clean`` to do some quick checks that are not related to
+the examples.
+
+You can also choose to only build a subset of the examples by using the
+``EXAMPLES_PATTERN`` env variable, which accepts a regular expression. For
+example ``EXAMPLES_PATTERN="transforms" make html`` will only build the examples
+with "transforms" in their name.
+
+### New model
+
+More details on how to add a new model will be provided later. Please, do not send any PR with a new model without discussing 
+it in an issue as, most likely, it will not be accepted.
+ 
+### New dataset
+
+More details on how to add a new dataset will be provided later. Please, do not send any PR with a new dataset without discussing 
+it in an issue as, most likely, it will not be accepted.
+
+### Pull Request
+
+If all previous checks (flake8, mypy, unit tests) are passing, please send a PR. Submitted PR will pass other tests on 
+different operation systems, python versions and hardwares.
+
+For more details about pull requests workflow, 
+please read [GitHub's guides](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request).
+
+## License
+
+By contributing to Torchvision, you agree that your contributions will be licensed
+under the LICENSE file in the root directory of this source tree.

From ad38624935259232ba3d1a04a0d569d5f5c9668e Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Sat, 29 Jan 2022 11:14:55 +0000
Subject: [PATCH 14/24] update CONTRIBUTING_MODELS.md

---
 CONTRIBUTING_MODELS.md | 222 +++++++++++++----------------------------
 1 file changed, 69 insertions(+), 153 deletions(-)

diff --git a/CONTRIBUTING_MODELS.md b/CONTRIBUTING_MODELS.md
index 2ade54ef7b1..4a2cf3c4bc4 100644
--- a/CONTRIBUTING_MODELS.md
+++ b/CONTRIBUTING_MODELS.md
@@ -1,4 +1,5 @@
 # Contributing to Torchvision - Models
+<<<<<<< HEAD
 
 - [New Model Architectures - Overview](#new-model-architectures---overview)
 
@@ -65,212 +66,127 @@ The process of improving existing models, for instance improving accuracy by ret
 4. The PR reviewers should replicate the results on their side to verify the submission and if all goes well the new weights should be ready to be released!
 =======
 # Contributing to Torchvision
+=======
+>>>>>>> 1f79d422 (update CONTRIBUTING_MODELS.md)
 
-We want to make contributing to this project as easy and transparent as possible.
-
-## TL;DR
-
-We appreciate all contributions. If you are interested in contributing to Torchvision, there are many ways to help out. 
-Your contributions may fall into the following categories:
-
-- It helps the project if you could 
-    - Report issues you're facing
-    - Give a :+1: on issues that others reported and that are relevant to you 
-
-- Answering queries on the issue tracker, investigating bugs are very valuable contributions to the project.
-
-- You would like to improve the documentation. This is no less important than improving the library itself! 
-If you find a typo in the documentation, do not hesitate to submit a GitHub pull request.
+<!-- toc -->
 
-- If you would like to fix a bug
-    - please pick one from the [list of open issues labelled as "help wanted"](https://github.com/pytorch/vision/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22)
-    - comment on the issue that you want to work on this issue
-    - send a PR with your fix, see below. 
+- [New Model Architectures - Overview](#new-model-architectures-overview)
+- [New Model Architectures - Implementation Details](#new-model-architectures-implementation-details)
+- [New Weights for Existing Model Architectures](#new-weights-for-existing-model-architectures)
 
-- If you plan to contribute new features, utility functions or extensions, please first open an issue and discuss the feature with us.
+<!-- tocstop -->
 
-## Issues
 
-We use GitHub issues to track public bugs. Please ensure your description is
-clear and has sufficient instructions to be able to reproduce the issue.
+## New Model Architectures - Overview
 
-## Development installation
+For someone who would be interested in adding a model architecture, it is also expected to train the model, so here are a few important considerations:
 
-### Install PyTorch Nightly 
+- Training big models requires lots of resources and the cost quickly adds up
 
-```bash
-conda install pytorch -c pytorch-nightly
-# or with pip (see https://pytorch.org/get-started/locally/)
-# pip install numpy
-# pip install --pre torch -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
-```
+- Reproducing models is fun but also risky as you might not always get the results reported on the paper. It might require a huge amount of effort to close the gap
 
-### Install Torchvision
+- The contribution might not get merged if we significantly lack in terms of accuracy, speed etc
 
-```bash
-git clone https://github.com/pytorch/vision.git
-cd vision
-python setup.py develop
-# or, for OSX
-# MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py develop
-# for C++ debugging, please use DEBUG=1
-# DEBUG=1 python setup.py develop
-pip install flake8 typing mypy pytest pytest-mock scipy
-```
-You may also have to install `libpng-dev` and `libjpeg-turbo8-dev` libraries:
-```bash
-conda install libpng jpeg
-```
+- Including new models in TorchVision might not be the best approach, so other options such as releasing the model through to [Pytorch Hub](https://pytorch.org/hub/) should be considered
 
-## Development Process
+So, before starting any work and submitting a PR there are a few critical things that need to be taken into account in order to make sure the planned contribution is within the context of TorchVision, and the requirements and expectations are discussed beforehand. If this step is skipped and a PR is submitted without prior discussion it will almost certainly be rejected.
 
-If you plan to modify the code or documentation, please follow the steps below:
+### 1. Preparation work
 
-1. Fork the repository and create your branch from `main`.
-2. If you have modified the code (new feature or bug-fix), please add unit tests.
-3. If you have changed APIs, update the documentation. Make sure the documentation builds.
-4. Ensure the test suite passes.
-5. Make sure your code passes the formatting checks (see below).
+- Start by looking into this [issue](https://github.com/pytorch/vision/issues/2707) in order to have an idea of the models that are being considered, express your willingness to add a new model and discuss with the community whether or not this model should be included in TorchVision. It is very important at this stage to make sure that there is an agreement on the value of having this model in TorchVision and there is no one else already working on it.
 
-For more details about pull requests, 
-please read [GitHub's guides](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request). 
+- If the decision is to include the new model, then please create a new ticket which will be used for all design and implementation discussions prior to the PR. One of the TorchVision maintainers will reach out at this stage and this will be your POC from this point onwards in order to provide support, guidance and regular feedback.
 
-If you would like to contribute a new model, please see [here](#New-model).
+### 2.  Implement the model
 
-If you would like to contribute a new dataset, please see [here](#New-dataset). 
+Please take a look at existing models in TorchVision to get familiar with the idioms. Also please look at recent contributions for new models. If in doubt about any design decisions you can ask for feedback on the issue created in step 1.  Example of things to take into account:
 
-### Code formatting and typing
+- The implementation should be as close as possible to the canonical implementation/paper
+- The PR must include the code implementation, documentation and tests
+- It should also extend the existing reference scripts used to train the model
+- The weights need to reproduce closely the results of the paper in terms of accuracy, even though the final weights to be deployed will be those trained by the TorchVision maintainers
+- The PR description should include commands/configuration used to train the model, so that the TorchVision maintainers can easily run them to verify the implementation and generate the final model to be released
+- Make sure we re-use existing components as much as possible (inheritance)
+- New primitives (transforms, losses, etc) can be added if necessary, but the final location will be determined after discussion with the dedicated maintainer
 
-#### Formatting
+### 3. Train the model with reference scripts
 
-The torchvision code is formatted by [black](https://black.readthedocs.io/en/stable/),
-and checked against pep8 compliance with [flake8](https://flake8.pycqa.org/en/latest/).
-Instead of relying directly on `black` however, we rely on
-[ufmt](https://github.com/omnilib/ufmt), for compatibility reasons with Facebook
-internal infrastructure.
+To validate the new model against the common benchmark, as well as to generate pre-trained weights, you must use TorchVision’s reference scripts to train the model.
 
-To format your code, install `ufmt` with `pip install ufmt` and use e.g.:
+Make sure all logs and a final (or best) checkpoint are saved, because it is expected that a submission shows that a model has been successfully trained  and the results are in line with the original paper/repository. This will allow the reviewers to quickly check the validity of the submission, but please note that the final model to be released will be re-trained by the maintainers in order to verify reproducibility,  ensure that the changes occurred during the PR review did not introduce any bugs, and to avoid moving around a large amount of data (including all checkpoints and logs).
 
-```bash
-ufmt format torchvision
-```
+### 4. Submit a PR
 
-For the vast majority of cases, this is all you should need to run. For the
-formatting to be a bit faster, you can also choose to only apply `ufmt` to the
-files that were edited in your PR with e.g.:
+Submit a PR and tag the assigned maintainer. This PR should:
 
-```bash
-ufmt format `git diff main --name-only`
-```
+- Link the original ticket
+- Provide a link for the original paper and the original repository if available
+- Highlight the important test metrics and how they compare to the original paper
+- Highlight any design choices that deviate from the original paper/implementation and rationale for these choices
 
-Similarly, you can check for `flake8` errors with `flake8 torchvision`, although
-they should be fairly rare considering that most of the errors are automatically
-taken care of by `ufmt` already.
+## New Model Architectures - Implementation Details
 
-##### Pre-commit hooks
+### Model development and training steps
 
-For convenience and **purely optionally**, you can rely on [pre-commit
-hooks](https://pre-commit.com/) which will run both `ufmt` and `flake8` prior to
-every commit.
+When developing a new model there are some details not to be missed:
 
-First install the `pre-commit` package with `pip install pre-commit`, and then
-run `pre-commit install` at the root of the repo for the hooks to be set up -
-that's it.
+- Implement a model factory function for each of the model variants
 
-Feel free to read the [pre-commit docs](https://pre-commit.com/#usage) to learn
-more and improve your workflow. You'll see for example that `pre-commit run
---all-files` will run both `ufmt` and `flake8` without the need for you to
-commit anything, and that the `--no-verify` flag can be added to `git commit` to
-temporarily deactivate the hooks.
+- in the module constructor, [pass layer constructor instead of instance](https://github.com/pytorch/vision/blob/47bd962069ba03f753e7ba711cb825317be0b00a/torchvision/models/efficientnet.py#L88) for configurable layers like norm, activation, and log the api usage with `_log_api_usage_once(self)`
 
-#### Type annotations
+- fuse layers together with existing common blocks if possible; For example consecutive conv, bn, activation layers could be replaced by [ConvNormActication](https://github.com/pytorch/vision/blob/47bd962069ba03f753e7ba711cb825317be0b00a/torchvision/ops/misc.py#L104)
 
-The codebase has type annotations, please make sure to add type hints if required. We use `mypy` tool for type checking:
-```bash
-mypy --config-file mypy.ini
-```
+- define `__all__` in the beginning of the model file to expose model factory functions; import model public APIs (e.g. factory methods) in `torchvision/models/__init__.py`
 
-### Unit tests
+- create the model builder using the new API and add it to the prototype area. Here is an [example](https://github.com/pytorch/vision/pull/4784/files) on how to do this. The new API requires adding more information about the weights such as the preprocessing transforms necessary for using the model, meta-data about the model, etc
 
-If you have modified the code by adding a new feature or a bug-fix, please add unit tests for that. To run a specific 
-test: 
-```bash
-pytest test/<test-module.py> -vvv -k <test_myfunc>
-# e.g. pytest test/test_transforms.py -vvv -k test_center_crop
-```
+- Make sure you write tests for the model itself (see `_check_input_backprop`, `_model_params` and `_model_params` in `test/test_models.py`) and for any new operators/transforms or important functions that you introduce
 
-If you would like to run all tests:
-```bash
-pytest test -vvv
-``` 
+Note that this list is not exhaustive and there are details here related to the code quality etc, but these are rules that apply in all PRs (see [Contributing to TorchVision](https://github.com/pytorch/vision/blob/main/CONTRIBUTING.md)).
 
-Tests that require internet access should be in
-`test/test_internet.py`.
+Once the model is implemented, you need to train the model using the reference scripts. For example, in order to train a classification resnet18 model you would:
 
-### Documentation
+1. go to `references/classification`
 
-Torchvision uses [Google style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html)
-for formatting docstrings. Length of line inside docstrings block must be limited to 120 characters.
+2. run the train command (for example `torchrun --nproc_per_node=8 train.py --model resnet18`)
 
-Please, follow the instructions to build and deploy the documentation locally.
+After training the model, select the best checkpoint and estimate its accuracy with a batch size of 1 on a single GPU. This helps us get better measurements about the accuracy of the models and avoid variants introduced due to batch padding (read [here](https://github.com/pytorch/vision/pull/4609/commits/5264b1a670107bcb4dc89e83a369f6fd97466ef8) for more details).
 
-#### Install requirements
+Finally, run the model test to generate expected model files for testing. Please include those generated files in the PR as well.:
 
-```bash
-cd docs
-pip install -r requirements.txt
-```
+`EXPECTTEST_ACCEPT=1 pytest test/test_models.py -k {model_name}`
 
-#### Build
 
-```bash
-cd docs
-make html
-```
+### Documentation and Pytorch Hub
 
-Then open `docs/build/html/index.html` in your favorite browser.
+- `docs/source/models.rst`:
 
-The docs are also automatically built when you submit a PR. The job that
-builds the docs is named `build_docs`. You can access the rendered docs by
-clicking on that job and then going to the "Artifacts" tab.
+    - add the model to the corresponding section (classification/detection/video etc.)
 
-You can clean the built docs and re-start the build from scratch by doing ``make
-clean``.
+    - describe how to construct the model variants (with and without pre-trained weights)
 
-#### Building the example gallery - or not
+    - add model metrics and reference to the original paper
 
-When you run ``make html`` for the first time, all the examples in the gallery
-will be built. Subsequent builds should be faster, and will only build the
-examples that have been modified.
+- `hubconf.py`:
 
-You can run ``make html-noplot`` to not build the examples at all. This is
-useful after a ``make clean`` to do some quick checks that are not related to
-the examples.
+    - import the model factory functions
 
-You can also choose to only build a subset of the examples by using the
-``EXAMPLES_PATTERN`` env variable, which accepts a regular expression. For
-example ``EXAMPLES_PATTERN="transforms" make html`` will only build the examples
-with "transforms" in their name.
+    - submit a PR to [https://github.com/pytorch/hub](https://github.com/pytorch/hub) with a model page (or update an existing one)
 
-### New model
+- `README.md` under the reference script folder:
+    
+    - command(s) to train the model
 
-More details on how to add a new model will be provided later. Please, do not send any PR with a new model without discussing 
-it in an issue as, most likely, it will not be accepted.
- 
-### New dataset
 
-More details on how to add a new dataset will be provided later. Please, do not send any PR with a new dataset without discussing 
-it in an issue as, most likely, it will not be accepted.
+## New Weights for Existing Model Architectures
 
-### Pull Request
+The process of improving existing models, for instance improving accuracy by retraining the model with a different set of hyperparameters or augmentations, is the following:
 
-If all previous checks (flake8, mypy, unit tests) are passing, please send a PR. Submitted PR will pass other tests on 
-different operation systems, python versions and hardwares.
+1. Open a ticket and discuss with the community and maintainers whether this improvement should be added to TorchVision. Note that to add new weights the improvement should be significant. 
 
-For more details about pull requests workflow, 
-please read [GitHub's guides](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request).
+2. Train the model using TorchVision reference scripts. You can add new primitives (transforms, losses, etc) when necessary, but the final location will be determined after discussion with the dedicated maintainer.
 
-## License
+3. Open a PR with the new weights, together with the training logs and the checkpoint chosen so the reviewers can verify the submission.  Details on how the model was trained, i.e., the training command using the reference scripts, should be included in the PR. 
 
-By contributing to Torchvision, you agree that your contributions will be licensed
-under the LICENSE file in the root directory of this source tree.
+4. The PR reviewers should replicate the results on their side to verify the submission and if all goes well the new weights should be ready to be released! 

From da97cde03727e9ab44ef81c1a43d03747434e071 Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Sat, 29 Jan 2022 11:30:37 +0000
Subject: [PATCH 15/24] Fix formatting and typo

---
 CONTRIBUTING.md        |  2 +-
 CONTRIBUTING_MODELS.md | 15 ++++++---------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 25ef4bda336..99cadf98c12 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -188,7 +188,7 @@ with "transforms" in their name.
 
 ### New or improved model
 
-Please refer to guidelines in [Contributing to Torchvision - Models](https://github.com/pytorch/vision/blob/main/CONTRIBUTING_MODELS.md).
+Please refer to the guidelines in [Contributing to Torchvision - Models](https://github.com/pytorch/vision/blob/main/CONTRIBUTING_MODELS.md).
  
 ### New dataset
 
diff --git a/CONTRIBUTING_MODELS.md b/CONTRIBUTING_MODELS.md
index 4a2cf3c4bc4..c54a124d9d8 100644
--- a/CONTRIBUTING_MODELS.md
+++ b/CONTRIBUTING_MODELS.md
@@ -69,14 +69,11 @@ The process of improving existing models, for instance improving accuracy by ret
 =======
 >>>>>>> 1f79d422 (update CONTRIBUTING_MODELS.md)
 
-<!-- toc -->
-
 - [New Model Architectures - Overview](#new-model-architectures-overview)
-- [New Model Architectures - Implementation Details](#new-model-architectures-implementation-details)
-- [New Weights for Existing Model Architectures](#new-weights-for-existing-model-architectures)
 
-<!-- tocstop -->
+- [New Model Architectures - Implementation Details](#new-model-architectures-implementation-details)
 
+- [New Weights for Existing Model Architectures](#new-weights-for-existing-model-architectures)
 
 ## New Model Architectures - Overview
 
@@ -175,7 +172,7 @@ Finally, run the model test to generate expected model files for testing. Please
     - submit a PR to [https://github.com/pytorch/hub](https://github.com/pytorch/hub) with a model page (or update an existing one)
 
 - `README.md` under the reference script folder:
-    
+
     - command(s) to train the model
 
 
@@ -183,10 +180,10 @@ Finally, run the model test to generate expected model files for testing. Please
 
 The process of improving existing models, for instance improving accuracy by retraining the model with a different set of hyperparameters or augmentations, is the following:
 
-1. Open a ticket and discuss with the community and maintainers whether this improvement should be added to TorchVision. Note that to add new weights the improvement should be significant. 
+1. Open a ticket and discuss with the community and maintainers whether this improvement should be added to TorchVision. Note that to add new weights the improvement should be significant.
 
 2. Train the model using TorchVision reference scripts. You can add new primitives (transforms, losses, etc) when necessary, but the final location will be determined after discussion with the dedicated maintainer.
 
-3. Open a PR with the new weights, together with the training logs and the checkpoint chosen so the reviewers can verify the submission.  Details on how the model was trained, i.e., the training command using the reference scripts, should be included in the PR. 
+3. Open a PR with the new weights, together with the training logs and the checkpoint chosen so the reviewers can verify the submission.  Details on how the model was trained, i.e., the training command using the reference scripts, should be included in the PR.
 
-4. The PR reviewers should replicate the results on their side to verify the submission and if all goes well the new weights should be ready to be released! 
+4. The PR reviewers should replicate the results on their side to verify the submission and if all goes well the new weights should be ready to be released!

From 452b727cd03fc23318f7179467d5c8648f7c02be Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Sat, 29 Jan 2022 11:34:05 +0000
Subject: [PATCH 16/24] fix in-document links

---
 CONTRIBUTING_MODELS.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING_MODELS.md b/CONTRIBUTING_MODELS.md
index c54a124d9d8..cf23d90eda4 100644
--- a/CONTRIBUTING_MODELS.md
+++ b/CONTRIBUTING_MODELS.md
@@ -69,9 +69,9 @@ The process of improving existing models, for instance improving accuracy by ret
 =======
 >>>>>>> 1f79d422 (update CONTRIBUTING_MODELS.md)
 
-- [New Model Architectures - Overview](#new-model-architectures-overview)
+- [New Model Architectures - Overview](#new-model-architectures---overview)
 
-- [New Model Architectures - Implementation Details](#new-model-architectures-implementation-details)
+- [New Model Architectures - Implementation Details](#new-model-architectures---implementation-details)
 
 - [New Weights for Existing Model Architectures](#new-weights-for-existing-model-architectures)
 

From 89cfacab3e266310dcce0f9e26a8125ba01aeef8 Mon Sep 17 00:00:00 2001
From: Joao Gomes <joaopsgomes@gmail.com>
Date: Sun, 30 Jan 2022 15:49:59 +0000
Subject: [PATCH 17/24] Update CONTRIBUTING.md

Co-authored-by: Vasilis Vryniotis <datumbox@users.noreply.github.com>
---
 CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 99cadf98c12..e9b6204366c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -186,7 +186,7 @@ You can also choose to only build a subset of the examples by using the
 example ``EXAMPLES_PATTERN="transforms" make html`` will only build the examples
 with "transforms" in their name.
 
-### New or improved model
+### New architecture or improved model weights
 
 Please refer to the guidelines in [Contributing to Torchvision - Models](https://github.com/pytorch/vision/blob/main/CONTRIBUTING_MODELS.md).
  

From 217f45158e65a2448d048f81e3fc6f426bfabb04 Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Mon, 31 Jan 2022 10:47:13 +0000
Subject: [PATCH 18/24] remove Implementation Details section

---
 CONTRIBUTING_MODELS.md | 57 +-----------------------------------------
 1 file changed, 1 insertion(+), 56 deletions(-)

diff --git a/CONTRIBUTING_MODELS.md b/CONTRIBUTING_MODELS.md
index cf23d90eda4..da08d5a8622 100644
--- a/CONTRIBUTING_MODELS.md
+++ b/CONTRIBUTING_MODELS.md
@@ -71,8 +71,6 @@ The process of improving existing models, for instance improving accuracy by ret
 
 - [New Model Architectures - Overview](#new-model-architectures---overview)
 
-- [New Model Architectures - Implementation Details](#new-model-architectures---implementation-details)
-
 - [New Weights for Existing Model Architectures](#new-weights-for-existing-model-architectures)
 
 ## New Model Architectures - Overview
@@ -106,6 +104,7 @@ Please take a look at existing models in TorchVision to get familiar with the id
 - The PR description should include commands/configuration used to train the model, so that the TorchVision maintainers can easily run them to verify the implementation and generate the final model to be released
 - Make sure we re-use existing components as much as possible (inheritance)
 - New primitives (transforms, losses, etc) can be added if necessary, but the final location will be determined after discussion with the dedicated maintainer
+- Please take a look at the detailed [implementation and documentation guidelines](https://github.com/pytorch/vision/issues/5319) for a fine grain list of things not to be missed
 
 ### 3. Train the model with reference scripts
 
@@ -122,60 +121,6 @@ Submit a PR and tag the assigned maintainer. This PR should:
 - Highlight the important test metrics and how they compare to the original paper
 - Highlight any design choices that deviate from the original paper/implementation and rationale for these choices
 
-## New Model Architectures - Implementation Details
-
-### Model development and training steps
-
-When developing a new model there are some details not to be missed:
-
-- Implement a model factory function for each of the model variants
-
-- in the module constructor, [pass layer constructor instead of instance](https://github.com/pytorch/vision/blob/47bd962069ba03f753e7ba711cb825317be0b00a/torchvision/models/efficientnet.py#L88) for configurable layers like norm, activation, and log the api usage with `_log_api_usage_once(self)`
-
-- fuse layers together with existing common blocks if possible; For example consecutive conv, bn, activation layers could be replaced by [ConvNormActication](https://github.com/pytorch/vision/blob/47bd962069ba03f753e7ba711cb825317be0b00a/torchvision/ops/misc.py#L104)
-
-- define `__all__` in the beginning of the model file to expose model factory functions; import model public APIs (e.g. factory methods) in `torchvision/models/__init__.py`
-
-- create the model builder using the new API and add it to the prototype area. Here is an [example](https://github.com/pytorch/vision/pull/4784/files) on how to do this. The new API requires adding more information about the weights such as the preprocessing transforms necessary for using the model, meta-data about the model, etc
-
-- Make sure you write tests for the model itself (see `_check_input_backprop`, `_model_params` and `_model_params` in `test/test_models.py`) and for any new operators/transforms or important functions that you introduce
-
-Note that this list is not exhaustive and there are details here related to the code quality etc, but these are rules that apply in all PRs (see [Contributing to TorchVision](https://github.com/pytorch/vision/blob/main/CONTRIBUTING.md)).
-
-Once the model is implemented, you need to train the model using the reference scripts. For example, in order to train a classification resnet18 model you would:
-
-1. go to `references/classification`
-
-2. run the train command (for example `torchrun --nproc_per_node=8 train.py --model resnet18`)
-
-After training the model, select the best checkpoint and estimate its accuracy with a batch size of 1 on a single GPU. This helps us get better measurements about the accuracy of the models and avoid variants introduced due to batch padding (read [here](https://github.com/pytorch/vision/pull/4609/commits/5264b1a670107bcb4dc89e83a369f6fd97466ef8) for more details).
-
-Finally, run the model test to generate expected model files for testing. Please include those generated files in the PR as well.:
-
-`EXPECTTEST_ACCEPT=1 pytest test/test_models.py -k {model_name}`
-
-
-### Documentation and Pytorch Hub
-
-- `docs/source/models.rst`:
-
-    - add the model to the corresponding section (classification/detection/video etc.)
-
-    - describe how to construct the model variants (with and without pre-trained weights)
-
-    - add model metrics and reference to the original paper
-
-- `hubconf.py`:
-
-    - import the model factory functions
-
-    - submit a PR to [https://github.com/pytorch/hub](https://github.com/pytorch/hub) with a model page (or update an existing one)
-
-- `README.md` under the reference script folder:
-
-    - command(s) to train the model
-
-
 ## New Weights for Existing Model Architectures
 
 The process of improving existing models, for instance improving accuracy by retraining the model with a different set of hyperparameters or augmentations, is the following:

From 6ecd67560f99a5da28132fbd30f4db09f5c1615e Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Tue, 8 Feb 2022 20:26:19 +0000
Subject: [PATCH 19/24] Consolidating __repr__ strings

---
 references/classification/transforms.py      |  32 +--
 test/test_datasets_download.py               |   2 +-
 torchvision/models/detection/anchor_utils.py |  16 +-
 torchvision/models/detection/transform.py    |   2 +-
 torchvision/models/efficientnet.py           |  20 +-
 torchvision/ops/deform_conv.py               |  24 +--
 torchvision/ops/ps_roi_align.py              |  14 +-
 torchvision/ops/ps_roi_pool.py               |   7 +-
 torchvision/ops/roi_align.py                 |  16 +-
 torchvision/ops/roi_pool.py                  |   7 +-
 torchvision/ops/stochastic_depth.py          |   7 +-
 torchvision/prototype/features/_feature.py   |   2 +-
 torchvision/prototype/models/_api.py         |   2 +-
 torchvision/prototype/models/convnext.py     | 193 +++++++++++++++----
 torchvision/transforms/_transforms_video.py  |  25 ++-
 torchvision/transforms/autoaugment.py        |  34 ++--
 torchvision/transforms/transforms.py         | 184 +++++++++---------
 17 files changed, 356 insertions(+), 231 deletions(-)

diff --git a/references/classification/transforms.py b/references/classification/transforms.py
index 69ee4182c54..892b4e7e6c0 100644
--- a/references/classification/transforms.py
+++ b/references/classification/transforms.py
@@ -72,13 +72,15 @@ def forward(self, batch: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]:
         return batch, target
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "num_classes={num_classes}"
-        s += ", p={p}"
-        s += ", alpha={alpha}"
-        s += ", inplace={inplace}"
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"num_classes={self.num_classes}"
+            f", p={self.p}"
+            f", alpha={self.alpha}"
+            f", inplace={self.inplace}"
+            f")"
+        )
+        return s
 
 
 class RandomCutmix(torch.nn.Module):
@@ -162,10 +164,12 @@ def forward(self, batch: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]:
         return batch, target
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "num_classes={num_classes}"
-        s += ", p={p}"
-        s += ", alpha={alpha}"
-        s += ", inplace={inplace}"
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"num_classes={self.num_classes}"
+            f", p={self.p}"
+            f", alpha={self.alpha}"
+            f", inplace={self.inplace}"
+            f")"
+        )
+        return s
diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py
index 4bf31eba92b..4d2e475e1df 100644
--- a/test/test_datasets_download.py
+++ b/test/test_datasets_download.py
@@ -180,7 +180,7 @@ def __init__(self, url, md5=None, id=None):
         self.md5 = md5
         self.id = id or url
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return self.id
 
 
diff --git a/torchvision/models/detection/anchor_utils.py b/torchvision/models/detection/anchor_utils.py
index 395bf6bbda6..202294153ea 100644
--- a/torchvision/models/detection/anchor_utils.py
+++ b/torchvision/models/detection/anchor_utils.py
@@ -239,13 +239,15 @@ def _grid_default_boxes(
         return torch.cat(default_boxes, dim=0)
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "aspect_ratios={aspect_ratios}"
-        s += ", clip={clip}"
-        s += ", scales={scales}"
-        s += ", steps={steps}"
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"aspect_ratios={self.aspect_ratios}"
+            f", clip={self.clip}"
+            f", scales={self.scales}"
+            f", steps={self.steps}"
+            ")"
+        )
+        return s
 
     def forward(self, image_list: ImageList, feature_maps: List[Tensor]) -> List[Tensor]:
         grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps]
diff --git a/torchvision/models/detection/transform.py b/torchvision/models/detection/transform.py
index 90d19cfc8de..960e28500a1 100644
--- a/torchvision/models/detection/transform.py
+++ b/torchvision/models/detection/transform.py
@@ -260,7 +260,7 @@ def postprocess(
         return result
 
     def __repr__(self) -> str:
-        format_string = self.__class__.__name__ + "("
+        format_string = f"{self.__class__.__name__}("
         _indent = "\n    "
         format_string += f"{_indent}Normalize(mean={self.image_mean}, std={self.image_std})"
         format_string += f"{_indent}Resize(min_size={self.min_size}, max_size={self.max_size}, mode='bilinear')"
diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index 6837018c09e..f7eba46cb39 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -61,15 +61,17 @@ def __init__(
         self.num_layers = self.adjust_depth(num_layers, depth_mult)
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "expand_ratio={expand_ratio}"
-        s += ", kernel={kernel}"
-        s += ", stride={stride}"
-        s += ", input_channels={input_channels}"
-        s += ", out_channels={out_channels}"
-        s += ", num_layers={num_layers}"
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"expand_ratio={self.expand_ratio}"
+            f", kernel={self.kernel}"
+            f", stride={self.stride}"
+            f", input_channels={self.input_channels}"
+            f", out_channels={self.out_channels}"
+            f", num_layers={self.num_layers}"
+            f")"
+        )
+        return s
 
     @staticmethod
     def adjust_channels(channels: int, width_mult: float, min_value: Optional[int] = None) -> int:
diff --git a/torchvision/ops/deform_conv.py b/torchvision/ops/deform_conv.py
index 9f9ed49f4b9..7bd7583f109 100644
--- a/torchvision/ops/deform_conv.py
+++ b/torchvision/ops/deform_conv.py
@@ -179,14 +179,16 @@ def forward(self, input: Tensor, offset: Tensor, mask: Optional[Tensor] = None)
         )
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "{in_channels}"
-        s += ", {out_channels}"
-        s += ", kernel_size={kernel_size}"
-        s += ", stride={stride}"
-        s += ", padding={padding}" if self.padding != (0, 0) else ""
-        s += ", dilation={dilation}" if self.dilation != (1, 1) else ""
-        s += ", groups={groups}" if self.groups != 1 else ""
-        s += ", bias=False" if self.bias is None else ""
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"{self.in_channels}"
+            f", {self.out_channels}"
+            f", kernel_size={self.kernel_size}"
+            f", stride={self.stride}"
+            f"{f', padding={self.padding}' if self.padding != (0, 0) else ''}"
+            f"{f', dilation={self.dilation}' if self.dilation != (1, 1) else ''}"
+            f"{f', groups={self.groups}'  if self.groups != 1 else ''}"
+            f"{', bias=False' if self.bias is None else ''}"
+            f")"
+        )
+        return s
diff --git a/torchvision/ops/ps_roi_align.py b/torchvision/ops/ps_roi_align.py
index b01dc35e129..7153e49ac05 100644
--- a/torchvision/ops/ps_roi_align.py
+++ b/torchvision/ops/ps_roi_align.py
@@ -78,9 +78,11 @@ def forward(self, input: Tensor, rois: Tensor) -> Tensor:
         return ps_roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio)
 
     def __repr__(self) -> str:
-        tmpstr = self.__class__.__name__ + "("
-        tmpstr += "output_size=" + str(self.output_size)
-        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
-        tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
-        tmpstr += ")"
-        return tmpstr
+        s = (
+            f"{self.__class__.__name__}("
+            f"output_size={self.output_size}"
+            f", spatial_scale={self.spatial_scale}"
+            f", sampling_ratio={self.sampling_ratio}"
+            f")"
+        )
+        return s
diff --git a/torchvision/ops/ps_roi_pool.py b/torchvision/ops/ps_roi_pool.py
index 867383eacf1..14b23f8ffe1 100644
--- a/torchvision/ops/ps_roi_pool.py
+++ b/torchvision/ops/ps_roi_pool.py
@@ -64,8 +64,5 @@ def forward(self, input: Tensor, rois: Tensor) -> Tensor:
         return ps_roi_pool(input, rois, self.output_size, self.spatial_scale)
 
     def __repr__(self) -> str:
-        tmpstr = self.__class__.__name__ + "("
-        tmpstr += "output_size=" + str(self.output_size)
-        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
-        tmpstr += ")"
-        return tmpstr
+        s = f"{self.__class__.__name__}(" f"output_size={self.output_size}" f", spatial_scale={self.spatial_scale}" f")"
+        return s
diff --git a/torchvision/ops/roi_align.py b/torchvision/ops/roi_align.py
index ed412965932..131c1b81d0f 100644
--- a/torchvision/ops/roi_align.py
+++ b/torchvision/ops/roi_align.py
@@ -86,10 +86,12 @@ def forward(self, input: Tensor, rois: Tensor) -> Tensor:
         return roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned)
 
     def __repr__(self) -> str:
-        tmpstr = self.__class__.__name__ + "("
-        tmpstr += "output_size=" + str(self.output_size)
-        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
-        tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
-        tmpstr += ", aligned=" + str(self.aligned)
-        tmpstr += ")"
-        return tmpstr
+        s = (
+            f"{self.__class__.__name__}("
+            f"output_size={self.output_size}"
+            f", spatial_scale={self.spatial_scale}"
+            f", sampling_ratio={self.sampling_ratio}"
+            f", aligned={self.aligned}"
+            f")"
+        )
+        return s
diff --git a/torchvision/ops/roi_pool.py b/torchvision/ops/roi_pool.py
index 37a8f42058c..ead81597dfb 100644
--- a/torchvision/ops/roi_pool.py
+++ b/torchvision/ops/roi_pool.py
@@ -66,8 +66,5 @@ def forward(self, input: Tensor, rois: Tensor) -> Tensor:
         return roi_pool(input, rois, self.output_size, self.spatial_scale)
 
     def __repr__(self) -> str:
-        tmpstr = self.__class__.__name__ + "("
-        tmpstr += "output_size=" + str(self.output_size)
-        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
-        tmpstr += ")"
-        return tmpstr
+        s = f"{self.__class__.__name__}(" f"output_size={self.output_size}" f", spatial_scale={self.spatial_scale}" f")"
+        return s
diff --git a/torchvision/ops/stochastic_depth.py b/torchvision/ops/stochastic_depth.py
index 9f5d432d79e..91f229c371b 100644
--- a/torchvision/ops/stochastic_depth.py
+++ b/torchvision/ops/stochastic_depth.py
@@ -62,8 +62,5 @@ def forward(self, input: Tensor) -> Tensor:
         return stochastic_depth(input, self.p, self.mode, self.training)
 
     def __repr__(self) -> str:
-        tmpstr = self.__class__.__name__ + "("
-        tmpstr += "p=" + str(self.p)
-        tmpstr += ", mode=" + str(self.mode)
-        tmpstr += ")"
-        return tmpstr
+        s = f"{self.__class__.__name__}(" f"p={self.p}" f", mode={self.mode}" f")"
+        return s
diff --git a/torchvision/prototype/features/_feature.py b/torchvision/prototype/features/_feature.py
index cd52f1f80ad..1837ffc1e89 100644
--- a/torchvision/prototype/features/_feature.py
+++ b/torchvision/prototype/features/_feature.py
@@ -96,5 +96,5 @@ def __torch_function__(
 
         return cls(output, like=args[0])
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return torch.Tensor.__repr__(self).replace("tensor", type(self).__name__)
diff --git a/torchvision/prototype/models/_api.py b/torchvision/prototype/models/_api.py
index e27b4d7adda..4ba0ee05f08 100644
--- a/torchvision/prototype/models/_api.py
+++ b/torchvision/prototype/models/_api.py
@@ -67,7 +67,7 @@ def from_str(cls, value: str) -> "WeightsEnum":
     def get_state_dict(self, progress: bool) -> OrderedDict:
         return load_state_dict_from_url(self.url, progress=progress)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return f"{self.__class__.__name__}.{self._name_}"
 
     def __getattr__(self, name):
diff --git a/torchvision/prototype/models/convnext.py b/torchvision/prototype/models/convnext.py
index ab9d08fbd3a..0ea0f0766da 100644
--- a/torchvision/prototype/models/convnext.py
+++ b/torchvision/prototype/models/convnext.py
@@ -10,47 +10,168 @@
 from ._utils import handle_legacy_interface, _ovewrite_named_param
 
 
-__all__ = [
-    "ConvNeXt",
-    "ConvNeXt_Tiny_Weights",
-    "ConvNeXt_Small_Weights",
-    "ConvNeXt_Base_Weights",
-    "ConvNeXt_Large_Weights",
-    "convnext_tiny",
-    "convnext_small",
-    "convnext_base",
-    "convnext_large",
-]
-
-
-def _convnext(
-    block_setting: List[CNBlockConfig],
-    stochastic_depth_prob: float,
-    weights: Optional[WeightsEnum],
-    progress: bool,
-    **kwargs: Any,
-) -> ConvNeXt:
-    if weights is not None:
-        _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
+__all__ = ["ConvNeXt", "ConvNeXt_Tiny_Weights", "convnext_tiny"]
+
+
+class LayerNorm2d(nn.LayerNorm):
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        self.channels_last = kwargs.pop("channels_last", False)
+        super().__init__(*args, **kwargs)
+
+    def forward(self, x: Tensor) -> Tensor:
+        #  TODO: Benchmark this against the approach described at https://github.com/pytorch/vision/pull/5197#discussion_r786251298
+        if not self.channels_last:
+            x = x.permute(0, 2, 3, 1)
+        x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+        if not self.channels_last:
+            x = x.permute(0, 3, 1, 2)
+        return x
+
+
+class CNBlock(nn.Module):
+    def __init__(
+        self, dim, layer_scale: float, stochastic_depth_prob: float, norm_layer: Callable[..., nn.Module]
+    ) -> None:
+        super().__init__()
+        self.block = nn.Sequential(
+            ConvNormActivation(
+                dim,
+                dim,
+                kernel_size=7,
+                groups=dim,
+                norm_layer=norm_layer,
+                activation_layer=None,
+                bias=True,
+            ),
+            ConvNormActivation(dim, 4 * dim, kernel_size=1, norm_layer=None, activation_layer=nn.GELU, inplace=None),
+            ConvNormActivation(
+                4 * dim,
+                dim,
+                kernel_size=1,
+                norm_layer=None,
+                activation_layer=None,
+            ),
+        )
+        self.layer_scale = nn.Parameter(torch.ones(dim, 1, 1) * layer_scale)
+        self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
+
+    def forward(self, input: Tensor) -> Tensor:
+        result = self.layer_scale * self.block(input)
+        result = self.stochastic_depth(result)
+        result += input
+        return result
+
+
+class CNBlockConfig:
+    # Stores information listed at Section 3 of the ConvNeXt paper
+    def __init__(
+        self,
+        input_channels: int,
+        out_channels: Optional[int],
+        num_layers: int,
+    ) -> None:
+        self.input_channels = input_channels
+        self.out_channels = out_channels
+        self.num_layers = num_layers
+
+    def __repr__(self) -> str:
+        s = (
+            f"{self.__class__.__name__}("
+            f"input_channels={self.input_channels}"
+            f", out_channels={self.out_channels}"
+            f", num_layers={self.num_layers}"
+            f")"
+        )
+        return s
+
+
+class ConvNeXt(nn.Module):
+    def __init__(
+        self,
+        block_setting: List[CNBlockConfig],
+        stochastic_depth_prob: float = 0.0,
+        layer_scale: float = 1e-6,
+        num_classes: int = 1000,
+        block: Optional[Callable[..., nn.Module]] = None,
+        norm_layer: Optional[Callable[..., nn.Module]] = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__()
+        _log_api_usage_once(self)
+
+        if not block_setting:
+            raise ValueError("The block_setting should not be empty")
+        elif not (isinstance(block_setting, Sequence) and all([isinstance(s, CNBlockConfig) for s in block_setting])):
+            raise TypeError("The block_setting should be List[CNBlockConfig]")
+
+        if block is None:
+            block = CNBlock
+
+        if norm_layer is None:
+            norm_layer = partial(LayerNorm2d, eps=1e-6)
+
+        layers: List[nn.Module] = []
+
+        # Stem
+        firstconv_output_channels = block_setting[0].input_channels
+        layers.append(
+            ConvNormActivation(
+                3,
+                firstconv_output_channels,
+                kernel_size=4,
+                stride=4,
+                padding=0,
+                norm_layer=norm_layer,
+                activation_layer=None,
+                bias=True,
+            )
+        )
+
+        total_stage_blocks = sum(cnf.num_layers for cnf in block_setting)
+        stage_block_id = 0
+        for cnf in block_setting:
+            # Bottlenecks
+            stage: List[nn.Module] = []
+            for _ in range(cnf.num_layers):
+                # adjust stochastic depth probability based on the depth of the stage block
+                sd_prob = stochastic_depth_prob * stage_block_id / (total_stage_blocks - 1.0)
+                stage.append(block(cnf.input_channels, layer_scale, sd_prob, norm_layer))
+                stage_block_id += 1
+            layers.append(nn.Sequential(*stage))
+            if cnf.out_channels is not None:
+                # Downsampling
+                layers.append(
+                    nn.Sequential(
+                        norm_layer(cnf.input_channels),
+                        nn.Conv2d(cnf.input_channels, cnf.out_channels, kernel_size=2, stride=2),
+                    )
+                )
 
-    model = ConvNeXt(block_setting, stochastic_depth_prob=stochastic_depth_prob, **kwargs)
+        self.features = nn.Sequential(*layers)
+        self.avgpool = nn.AdaptiveAvgPool2d(1)
 
-    if weights is not None:
-        model.load_state_dict(weights.get_state_dict(progress=progress))
+        lastblock = block_setting[-1]
+        lastconv_output_channels = (
+            lastblock.out_channels if lastblock.out_channels is not None else lastblock.input_channels
+        )
+        self.classifier = nn.Sequential(
+            norm_layer(lastconv_output_channels), nn.Flatten(1), nn.Linear(lastconv_output_channels, num_classes)
+        )
 
-    return model
+        for m in self.modules():
+            if isinstance(m, (nn.Conv2d, nn.Linear)):
+                nn.init.trunc_normal_(m.weight, std=0.02)
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
 
+    def _forward_impl(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        x = self.avgpool(x)
+        x = self.classifier(x)
+        return x
 
-_COMMON_META = {
-    "task": "image_classification",
-    "architecture": "ConvNeXt",
-    "publication_year": 2022,
-    "size": (224, 224),
-    "min_size": (32, 32),
-    "categories": _IMAGENET_CATEGORIES,
-    "interpolation": InterpolationMode.BILINEAR,
-    "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#convnext",
-}
+    def forward(self, x: Tensor) -> Tensor:
+        return self._forward_impl(x)
 
 
 class ConvNeXt_Tiny_Weights(WeightsEnum):
diff --git a/torchvision/transforms/_transforms_video.py b/torchvision/transforms/_transforms_video.py
index 32fa0191959..4a36c8abbf9 100644
--- a/torchvision/transforms/_transforms_video.py
+++ b/torchvision/transforms/_transforms_video.py
@@ -46,8 +46,8 @@ def __call__(self, clip):
         i, j, h, w = self.get_params(clip, self.size)
         return F.crop(clip, i, j, h, w)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(size={self.size})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(size={self.size})"
 
 
 class RandomResizedCropVideo(RandomResizedCrop):
@@ -79,11 +79,8 @@ def __call__(self, clip):
         i, j, h, w = self.get_params(clip, self.scale, self.ratio)
         return F.resized_crop(clip, i, j, h, w, self.size, self.interpolation_mode)
 
-    def __repr__(self):
-        return (
-            self.__class__.__name__
-            + f"(size={self.size}, interpolation_mode={self.interpolation_mode}, scale={self.scale}, ratio={self.ratio})"
-        )
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(size={self.size}, interpolation_mode={self.interpolation_mode}, scale={self.scale}, ratio={self.ratio})"
 
 
 class CenterCropVideo:
@@ -103,8 +100,8 @@ def __call__(self, clip):
         """
         return F.center_crop(clip, self.crop_size)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(crop_size={self.crop_size})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(crop_size={self.crop_size})"
 
 
 class NormalizeVideo:
@@ -128,8 +125,8 @@ def __call__(self, clip):
         """
         return F.normalize(clip, self.mean, self.std, self.inplace)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(mean={self.mean}, std={self.std}, inplace={self.inplace})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(mean={self.mean}, std={self.std}, inplace={self.inplace})"
 
 
 class ToTensorVideo:
@@ -150,7 +147,7 @@ def __call__(self, clip):
         """
         return F.to_tensor(clip)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return self.__class__.__name__
 
 
@@ -175,5 +172,5 @@ def __call__(self, clip):
             clip = F.hflip(clip)
         return clip
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"
diff --git a/torchvision/transforms/autoaugment.py b/torchvision/transforms/autoaugment.py
index 228b2f8dd9b..d58077c9b14 100644
--- a/torchvision/transforms/autoaugment.py
+++ b/torchvision/transforms/autoaugment.py
@@ -280,7 +280,7 @@ def forward(self, img: Tensor) -> Tensor:
         return img
 
     def __repr__(self) -> str:
-        return self.__class__.__name__ + f"(policy={self.policy}, fill={self.fill})"
+        return f"{self.__class__.__name__}(policy={self.policy}, fill={self.fill})"
 
 
 class RandAugment(torch.nn.Module):
@@ -363,14 +363,16 @@ def forward(self, img: Tensor) -> Tensor:
         return img
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "num_ops={num_ops}"
-        s += ", magnitude={magnitude}"
-        s += ", num_magnitude_bins={num_magnitude_bins}"
-        s += ", interpolation={interpolation}"
-        s += ", fill={fill}"
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"num_ops={self.num_ops}"
+            f", magnitude={self.magnitude}"
+            f", num_magnitude_bins={self.num_magnitude_bins}"
+            f", interpolation={self.interpolation}"
+            f", fill={self.fill}"
+            f")"
+        )
+        return s
 
 
 class TrivialAugmentWide(torch.nn.Module):
@@ -448,9 +450,11 @@ def forward(self, img: Tensor) -> Tensor:
         return _apply_op(img, op_name, magnitude, interpolation=self.interpolation, fill=fill)
 
     def __repr__(self) -> str:
-        s = self.__class__.__name__ + "("
-        s += "num_magnitude_bins={num_magnitude_bins}"
-        s += ", interpolation={interpolation}"
-        s += ", fill={fill}"
-        s += ")"
-        return s.format(**self.__dict__)
+        s = (
+            f"{self.__class__.__name__}("
+            f"num_magnitude_bins={self.num_magnitude_bins}"
+            f", interpolation={self.interpolation}"
+            f", fill={self.fill}"
+            f")"
+        )
+        return s
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index 5fdc8df89f4..3bc295a4385 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -95,7 +95,7 @@ def __call__(self, img):
             img = t(img)
         return img
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         format_string = self.__class__.__name__ + "("
         for t in self.transforms:
             format_string += "\n"
@@ -134,8 +134,8 @@ def __call__(self, pic):
         """
         return F.to_tensor(pic)
 
-    def __repr__(self):
-        return self.__class__.__name__ + "()"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}()"
 
 
 class PILToTensor:
@@ -161,8 +161,8 @@ def __call__(self, pic):
         """
         return F.pil_to_tensor(pic)
 
-    def __repr__(self):
-        return self.__class__.__name__ + "()"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}()"
 
 
 class ConvertImageDtype(torch.nn.Module):
@@ -226,7 +226,7 @@ def __call__(self, pic):
         """
         return F.to_pil_image(pic, self.mode)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         format_string = self.__class__.__name__ + "("
         if self.mode is not None:
             format_string += f"mode={self.mode}"
@@ -269,8 +269,8 @@ def forward(self, tensor: Tensor) -> Tensor:
         """
         return F.normalize(tensor, self.mean, self.std, self.inplace)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(mean={self.mean}, std={self.std})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(mean={self.mean}, std={self.std})"
 
 
 class Resize(torch.nn.Module):
@@ -348,9 +348,9 @@ def forward(self, img):
         """
         return F.resize(img, self.size, self.interpolation, self.max_size, self.antialias)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         detail = f"(size={self.size}, interpolation={self.interpolation.value}, max_size={self.max_size}, antialias={self.antialias})"
-        return self.__class__.__name__ + detail
+        return f"{self.__class__.__name__}{detail}"
 
 
 class CenterCrop(torch.nn.Module):
@@ -380,8 +380,8 @@ def forward(self, img):
         """
         return F.center_crop(img, self.size)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(size={self.size})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name_}(size={self.size})"
 
 
 class Pad(torch.nn.Module):
@@ -453,8 +453,8 @@ def forward(self, img):
         """
         return F.pad(img, self.padding, self.fill, self.padding_mode)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(padding={self.padding}, fill={self.fill}, padding_mode={self.padding_mode})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(padding={self.padding}, fill={self.fill}, padding_mode={self.padding_mode})"
 
 
 class Lambda:
@@ -473,8 +473,8 @@ def __init__(self, lambd):
     def __call__(self, img):
         return self.lambd(img)
 
-    def __repr__(self):
-        return self.__class__.__name__ + "()"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}()"
 
 
 class RandomTransforms:
@@ -493,7 +493,7 @@ def __init__(self, transforms):
     def __call__(self, *args, **kwargs):
         raise NotImplementedError()
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         format_string = self.__class__.__name__ + "("
         for t in self.transforms:
             format_string += "\n"
@@ -535,7 +535,7 @@ def forward(self, img):
             img = t(img)
         return img
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         format_string = self.__class__.__name__ + "("
         format_string += f"\n    p={self.p}"
         for t in self.transforms:
@@ -569,10 +569,8 @@ def __call__(self, *args):
         t = random.choices(self.transforms, weights=self.p)[0]
         return t(*args)
 
-    def __repr__(self):
-        format_string = super().__repr__()
-        format_string += f"(p={self.p})"
-        return format_string
+    def __repr__(self) -> str:
+        return f"{super().__repr__()}(p={self.p})"
 
 
 class RandomCrop(torch.nn.Module):
@@ -679,8 +677,8 @@ def forward(self, img):
 
         return F.crop(img, i, j, h, w)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(size={self.size}, padding={self.padding})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(size={self.size}, padding={self.padding})"
 
 
 class RandomHorizontalFlip(torch.nn.Module):
@@ -710,8 +708,8 @@ def forward(self, img):
             return F.hflip(img)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"
 
 
 class RandomVerticalFlip(torch.nn.Module):
@@ -741,8 +739,8 @@ def forward(self, img):
             return F.vflip(img)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"
 
 
 class RandomPerspective(torch.nn.Module):
@@ -842,8 +840,8 @@ def get_params(width: int, height: int, distortion_scale: float) -> Tuple[List[L
         endpoints = [topleft, topright, botright, botleft]
         return startpoints, endpoints
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"
 
 
 class RandomResizedCrop(torch.nn.Module):
@@ -954,7 +952,7 @@ def forward(self, img):
         i, j, h, w = self.get_params(img, self.scale, self.ratio)
         return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         interpolate_str = self.interpolation.value
         format_string = self.__class__.__name__ + f"(size={self.size}"
         format_string += f", scale={tuple(round(s, 4) for s in self.scale)}"
@@ -1006,8 +1004,8 @@ def forward(self, img):
         """
         return F.five_crop(img, self.size)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(size={self.size})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(size={self.size})"
 
 
 class TenCrop(torch.nn.Module):
@@ -1056,8 +1054,8 @@ def forward(self, img):
         """
         return F.ten_crop(img, self.size, self.vertical_flip)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(size={self.size}, vertical_flip={self.vertical_flip})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(size={self.size}, vertical_flip={self.vertical_flip})"
 
 
 class LinearTransformation(torch.nn.Module):
@@ -1130,11 +1128,13 @@ def forward(self, tensor: Tensor) -> Tensor:
         tensor = transformed_tensor.view(shape)
         return tensor
 
-    def __repr__(self):
-        format_string = self.__class__.__name__ + "(transformation_matrix="
-        format_string += str(self.transformation_matrix.tolist()) + ")"
-        format_string += ", (mean_vector=" + str(self.mean_vector.tolist()) + ")"
-        return format_string
+    def __repr__(self) -> str:
+        s = (
+            f"{self.__class__.__name__}(transformation_matrix="
+            f"{self.transformation_matrix.tolist()}"
+            f", mean_vector={self.mean_vector.tolist()})"
+        )
+        return s
 
 
 class ColorJitter(torch.nn.Module):
@@ -1242,13 +1242,15 @@ def forward(self, img):
 
         return img
 
-    def __repr__(self):
-        format_string = self.__class__.__name__ + "("
-        format_string += f"brightness={self.brightness}"
-        format_string += f", contrast={self.contrast}"
-        format_string += f", saturation={self.saturation}"
-        format_string += f", hue={self.hue})"
-        return format_string
+    def __repr__(self) -> str:
+        s = (
+            f"{self.__class__.__name__}("
+            f"brightness={self.brightness}"
+            f", contrast={self.contrast}"
+            f", saturation={self.saturation}"
+            f", hue={self.hue})"
+        )
+        return s
 
 
 class RandomRotation(torch.nn.Module):
@@ -1346,7 +1348,7 @@ def forward(self, img):
 
         return F.rotate(img, angle, self.resample, self.expand, self.center, fill)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         interpolate_str = self.interpolation.value
         format_string = self.__class__.__name__ + f"(degrees={self.degrees}"
         format_string += f", interpolation={interpolate_str}"
@@ -1529,24 +1531,18 @@ def forward(self, img):
 
         return F.affine(img, *ret, interpolation=self.interpolation, fill=fill, center=self.center)
 
-    def __repr__(self):
-        s = "{name}(degrees={degrees}"
-        if self.translate is not None:
-            s += ", translate={translate}"
-        if self.scale is not None:
-            s += ", scale={scale}"
-        if self.shear is not None:
-            s += ", shear={shear}"
-        if self.interpolation != InterpolationMode.NEAREST:
-            s += ", interpolation={interpolation}"
-        if self.fill != 0:
-            s += ", fill={fill}"
-        if self.center is not None:
-            s += ", center={center}"
-        s += ")"
-        d = dict(self.__dict__)
-        d["interpolation"] = self.interpolation.value
-        return s.format(name=self.__class__.__name__, **d)
+    def __repr__(self) -> str:
+        s = (
+            f"{self.__class__.__name__}(degrees={self.degrees}"
+            f"{f', translate={self.translate}' if self.translate is not None else ''}"
+            f"{f', translate={self.scale}' if self.scale is not None  else ''}"
+            f"{f', translate={self.shear}' if self.shear is not None  else ''}"
+            f"{f', translate={self.interpolation.value}' if self.interpolation != InterpolationMode.NEAREST else ''}"
+            f"{f', translate={self.fill}' if self.fill != 0 else ''}"
+            f"{f', translate={self.center}' if self.center is not None  else ''}"
+            f")"
+        )
+        return s
 
 
 class Grayscale(torch.nn.Module):
@@ -1580,8 +1576,8 @@ def forward(self, img):
         """
         return F.rgb_to_grayscale(img, num_output_channels=self.num_output_channels)
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(num_output_channels={self.num_output_channels})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(num_output_channels={self.num_output_channels})"
 
 
 class RandomGrayscale(torch.nn.Module):
@@ -1618,8 +1614,8 @@ def forward(self, img):
             return F.rgb_to_grayscale(img, num_output_channels=num_output_channels)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"
 
 
 class RandomErasing(torch.nn.Module):
@@ -1748,13 +1744,16 @@ def forward(self, img):
             return F.erase(img, x, y, h, w, v, self.inplace)
         return img
 
-    def __repr__(self):
-        s = f"(p={self.p}, "
-        s += f"scale={self.scale}, "
-        s += f"ratio={self.ratio}, "
-        s += f"value={self.value}, "
-        s += f"inplace={self.inplace})"
-        return self.__class__.__name__ + s
+    def __repr__(self) -> str:
+        s = (
+            f"{self.__class__.__name__}"
+            f"(p={self.p}, "
+            f"scale={self.scale}, "
+            f"ratio={self.ratio}, "
+            f"value={self.value}, "
+            f"inplace={self.inplace})"
+        )
+        return s
 
 
 class GaussianBlur(torch.nn.Module):
@@ -1818,10 +1817,9 @@ def forward(self, img: Tensor) -> Tensor:
         sigma = self.get_params(self.sigma[0], self.sigma[1])
         return F.gaussian_blur(img, self.kernel_size, [sigma, sigma])
 
-    def __repr__(self):
-        s = f"(kernel_size={self.kernel_size}, "
-        s += f"sigma={self.sigma})"
-        return self.__class__.__name__ + s
+    def __repr__(self) -> str:
+        s = f"{self.__class__.__name__}(kernel_size={self.kernel_size}, sigma={self.sigma})"
+        return s
 
 
 def _setup_size(size, error_msg):
@@ -1883,8 +1881,8 @@ def forward(self, img):
             return F.invert(img)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"
 
 
 class RandomPosterize(torch.nn.Module):
@@ -1916,8 +1914,8 @@ def forward(self, img):
             return F.posterize(img, self.bits)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(bits={self.bits},p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(bits={self.bits},p={self.p})"
 
 
 class RandomSolarize(torch.nn.Module):
@@ -1949,8 +1947,8 @@ def forward(self, img):
             return F.solarize(img, self.threshold)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(threshold={self.threshold},p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(threshold={self.threshold},p={self.p})"
 
 
 class RandomAdjustSharpness(torch.nn.Module):
@@ -1982,8 +1980,8 @@ def forward(self, img):
             return F.adjust_sharpness(img, self.sharpness_factor)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(sharpness_factor={self.sharpness_factor},p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(sharpness_factor={self.sharpness_factor},p={self.p})"
 
 
 class RandomAutocontrast(torch.nn.Module):
@@ -2013,8 +2011,8 @@ def forward(self, img):
             return F.autocontrast(img)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"
 
 
 class RandomEqualize(torch.nn.Module):
@@ -2044,5 +2042,5 @@ def forward(self, img):
             return F.equalize(img)
         return img
 
-    def __repr__(self):
-        return self.__class__.__name__ + f"(p={self.p})"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(p={self.p})"

From c7da16842e8bf38ad0fec95005b1a86405bcddff Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Tue, 8 Feb 2022 20:48:44 +0000
Subject: [PATCH 20/24] fix merge

---
 CONTRIBUTING_MODELS.md                   |  72 ---------
 torchvision/prototype/models/convnext.py | 193 +++++------------------
 2 files changed, 36 insertions(+), 229 deletions(-)

diff --git a/CONTRIBUTING_MODELS.md b/CONTRIBUTING_MODELS.md
index 10d32e021b2..82845e6579a 100644
--- a/CONTRIBUTING_MODELS.md
+++ b/CONTRIBUTING_MODELS.md
@@ -1,76 +1,4 @@
 # Contributing to Torchvision - Models
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-- [New Model Architectures - Overview](#new-model-architectures---overview)
-
-- [New Weights for Existing Model Architectures](#new-weights-for-existing-model-architectures)
-
-## New Model Architectures - Overview
-
-For someone who would be interested in adding a model architecture, it is also expected to train the model, so here are a few important considerations:
-
-- Training big models requires lots of resources and the cost quickly adds up
-
-- Reproducing models is fun but also risky as you might not always get the results reported on the paper. It might require a huge amount of effort to close the gap
-
-- The contribution might not get merged if we significantly lack in terms of accuracy, speed etc
-
-- Including new models in TorchVision might not be the best approach, so other options such as releasing the model through to [Pytorch Hub](https://pytorch.org/hub/) should be considered
-
-So, before starting any work and submitting a PR there are a few critical things that need to be taken into account in order to make sure the planned contribution is within the context of TorchVision, and the requirements and expectations are discussed beforehand. If this step is skipped and a PR is submitted without prior discussion it will almost certainly be rejected.
-
-### 1. Preparation work
-
-- Start by looking into this [issue](https://github.com/pytorch/vision/issues/2707) in order to have an idea of the models that are being considered, express your willingness to add a new model and discuss with the community whether or not this model should be included in TorchVision. It is very important at this stage to make sure that there is an agreement on the value of having this model in TorchVision and there is no one else already working on it.
-
-- If the decision is to include the new model, then please create a new ticket which will be used for all design and implementation discussions prior to the PR. One of the TorchVision maintainers will reach out at this stage and this will be your POC from this point onwards in order to provide support, guidance and regular feedback.
-
-### 2.  Implement the model
-
-Please take a look at existing models in TorchVision to get familiar with the idioms. Also please look at recent contributions for new models. If in doubt about any design decisions you can ask for feedback on the issue created in step 1.  Example of things to take into account:
-
-- The implementation should be as close as possible to the canonical implementation/paper
-- The PR must include the code implementation, documentation and tests
-- It should also extend the existing reference scripts used to train the model
-- The weights need to reproduce closely the results of the paper in terms of accuracy, even though the final weights to be deployed will be those trained by the TorchVision maintainers
-- The PR description should include commands/configuration used to train the model, so that the TorchVision maintainers can easily run them to verify the implementation and generate the final model to be released
-- Make sure we re-use existing components as much as possible (inheritance)
-- New primitives (transforms, losses, etc) can be added if necessary, but the final location will be determined after discussion with the dedicated maintainer
-- Please take a look at the detailed [implementation and documentation guidelines](https://github.com/pytorch/vision/issues/5319) for a fine grain list of things not to be missed
-
-### 3. Train the model with reference scripts
-
-To validate the new model against the common benchmark, as well as to generate pre-trained weights, you must use TorchVision’s reference scripts to train the model.
-
-Make sure all logs and a final (or best) checkpoint are saved, because it is expected that a submission shows that a model has been successfully trained  and the results are in line with the original paper/repository. This will allow the reviewers to quickly check the validity of the submission, but please note that the final model to be released will be re-trained by the maintainers in order to verify reproducibility,  ensure that the changes occurred during the PR review did not introduce any bugs, and to avoid moving around a large amount of data (including all checkpoints and logs).
-
-### 4. Submit a PR
-
-Submit a PR and tag the assigned maintainer. This PR should:
-
-- Link the original ticket
-- Provide a link for the original paper and the original repository if available
-- Highlight the important test metrics and how they compare to the original paper
-- Highlight any design choices that deviate from the original paper/implementation and rationale for these choices
-
-## New Weights for Existing Model Architectures
-
-The process of improving existing models, for instance improving accuracy by retraining the model with a different set of hyperparameters or augmentations, is the following:
-
-1. Open a ticket and discuss with the community and maintainers whether this improvement should be added to TorchVision. Note that to add new weights the improvement should be significant.
-
-2. Train the model using TorchVision reference scripts. You can add new primitives (transforms, losses, etc) when necessary, but the final location will be determined after discussion with the dedicated maintainer.
-
-3. Open a PR with the new weights, together with the training logs and the checkpoint chosen so the reviewers can verify the submission.  Details on how the model was trained, i.e., the training command using the reference scripts, should be included in the PR.
-
-4. The PR reviewers should replicate the results on their side to verify the submission and if all goes well the new weights should be ready to be released!
-=======
-# Contributing to Torchvision
-=======
->>>>>>> 1f79d422 (update CONTRIBUTING_MODELS.md)
-=======
->>>>>>> 0d3b4b45f14a65833131da57df105bf68a3afdd7
 
 - [New Model Architectures - Overview](#new-model-architectures---overview)
 
diff --git a/torchvision/prototype/models/convnext.py b/torchvision/prototype/models/convnext.py
index 0ea0f0766da..ab9d08fbd3a 100644
--- a/torchvision/prototype/models/convnext.py
+++ b/torchvision/prototype/models/convnext.py
@@ -10,168 +10,47 @@
 from ._utils import handle_legacy_interface, _ovewrite_named_param
 
 
-__all__ = ["ConvNeXt", "ConvNeXt_Tiny_Weights", "convnext_tiny"]
-
-
-class LayerNorm2d(nn.LayerNorm):
-    def __init__(self, *args: Any, **kwargs: Any) -> None:
-        self.channels_last = kwargs.pop("channels_last", False)
-        super().__init__(*args, **kwargs)
-
-    def forward(self, x: Tensor) -> Tensor:
-        #  TODO: Benchmark this against the approach described at https://github.com/pytorch/vision/pull/5197#discussion_r786251298
-        if not self.channels_last:
-            x = x.permute(0, 2, 3, 1)
-        x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
-        if not self.channels_last:
-            x = x.permute(0, 3, 1, 2)
-        return x
-
-
-class CNBlock(nn.Module):
-    def __init__(
-        self, dim, layer_scale: float, stochastic_depth_prob: float, norm_layer: Callable[..., nn.Module]
-    ) -> None:
-        super().__init__()
-        self.block = nn.Sequential(
-            ConvNormActivation(
-                dim,
-                dim,
-                kernel_size=7,
-                groups=dim,
-                norm_layer=norm_layer,
-                activation_layer=None,
-                bias=True,
-            ),
-            ConvNormActivation(dim, 4 * dim, kernel_size=1, norm_layer=None, activation_layer=nn.GELU, inplace=None),
-            ConvNormActivation(
-                4 * dim,
-                dim,
-                kernel_size=1,
-                norm_layer=None,
-                activation_layer=None,
-            ),
-        )
-        self.layer_scale = nn.Parameter(torch.ones(dim, 1, 1) * layer_scale)
-        self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
-
-    def forward(self, input: Tensor) -> Tensor:
-        result = self.layer_scale * self.block(input)
-        result = self.stochastic_depth(result)
-        result += input
-        return result
-
-
-class CNBlockConfig:
-    # Stores information listed at Section 3 of the ConvNeXt paper
-    def __init__(
-        self,
-        input_channels: int,
-        out_channels: Optional[int],
-        num_layers: int,
-    ) -> None:
-        self.input_channels = input_channels
-        self.out_channels = out_channels
-        self.num_layers = num_layers
-
-    def __repr__(self) -> str:
-        s = (
-            f"{self.__class__.__name__}("
-            f"input_channels={self.input_channels}"
-            f", out_channels={self.out_channels}"
-            f", num_layers={self.num_layers}"
-            f")"
-        )
-        return s
-
-
-class ConvNeXt(nn.Module):
-    def __init__(
-        self,
-        block_setting: List[CNBlockConfig],
-        stochastic_depth_prob: float = 0.0,
-        layer_scale: float = 1e-6,
-        num_classes: int = 1000,
-        block: Optional[Callable[..., nn.Module]] = None,
-        norm_layer: Optional[Callable[..., nn.Module]] = None,
-        **kwargs: Any,
-    ) -> None:
-        super().__init__()
-        _log_api_usage_once(self)
-
-        if not block_setting:
-            raise ValueError("The block_setting should not be empty")
-        elif not (isinstance(block_setting, Sequence) and all([isinstance(s, CNBlockConfig) for s in block_setting])):
-            raise TypeError("The block_setting should be List[CNBlockConfig]")
-
-        if block is None:
-            block = CNBlock
-
-        if norm_layer is None:
-            norm_layer = partial(LayerNorm2d, eps=1e-6)
-
-        layers: List[nn.Module] = []
-
-        # Stem
-        firstconv_output_channels = block_setting[0].input_channels
-        layers.append(
-            ConvNormActivation(
-                3,
-                firstconv_output_channels,
-                kernel_size=4,
-                stride=4,
-                padding=0,
-                norm_layer=norm_layer,
-                activation_layer=None,
-                bias=True,
-            )
-        )
-
-        total_stage_blocks = sum(cnf.num_layers for cnf in block_setting)
-        stage_block_id = 0
-        for cnf in block_setting:
-            # Bottlenecks
-            stage: List[nn.Module] = []
-            for _ in range(cnf.num_layers):
-                # adjust stochastic depth probability based on the depth of the stage block
-                sd_prob = stochastic_depth_prob * stage_block_id / (total_stage_blocks - 1.0)
-                stage.append(block(cnf.input_channels, layer_scale, sd_prob, norm_layer))
-                stage_block_id += 1
-            layers.append(nn.Sequential(*stage))
-            if cnf.out_channels is not None:
-                # Downsampling
-                layers.append(
-                    nn.Sequential(
-                        norm_layer(cnf.input_channels),
-                        nn.Conv2d(cnf.input_channels, cnf.out_channels, kernel_size=2, stride=2),
-                    )
-                )
+__all__ = [
+    "ConvNeXt",
+    "ConvNeXt_Tiny_Weights",
+    "ConvNeXt_Small_Weights",
+    "ConvNeXt_Base_Weights",
+    "ConvNeXt_Large_Weights",
+    "convnext_tiny",
+    "convnext_small",
+    "convnext_base",
+    "convnext_large",
+]
+
+
+def _convnext(
+    block_setting: List[CNBlockConfig],
+    stochastic_depth_prob: float,
+    weights: Optional[WeightsEnum],
+    progress: bool,
+    **kwargs: Any,
+) -> ConvNeXt:
+    if weights is not None:
+        _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
 
-        self.features = nn.Sequential(*layers)
-        self.avgpool = nn.AdaptiveAvgPool2d(1)
+    model = ConvNeXt(block_setting, stochastic_depth_prob=stochastic_depth_prob, **kwargs)
 
-        lastblock = block_setting[-1]
-        lastconv_output_channels = (
-            lastblock.out_channels if lastblock.out_channels is not None else lastblock.input_channels
-        )
-        self.classifier = nn.Sequential(
-            norm_layer(lastconv_output_channels), nn.Flatten(1), nn.Linear(lastconv_output_channels, num_classes)
-        )
+    if weights is not None:
+        model.load_state_dict(weights.get_state_dict(progress=progress))
 
-        for m in self.modules():
-            if isinstance(m, (nn.Conv2d, nn.Linear)):
-                nn.init.trunc_normal_(m.weight, std=0.02)
-                if m.bias is not None:
-                    nn.init.zeros_(m.bias)
+    return model
 
-    def _forward_impl(self, x: Tensor) -> Tensor:
-        x = self.features(x)
-        x = self.avgpool(x)
-        x = self.classifier(x)
-        return x
 
-    def forward(self, x: Tensor) -> Tensor:
-        return self._forward_impl(x)
+_COMMON_META = {
+    "task": "image_classification",
+    "architecture": "ConvNeXt",
+    "publication_year": 2022,
+    "size": (224, 224),
+    "min_size": (32, 32),
+    "categories": _IMAGENET_CATEGORIES,
+    "interpolation": InterpolationMode.BILINEAR,
+    "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#convnext",
+}
 
 
 class ConvNeXt_Tiny_Weights(WeightsEnum):

From 27e16048945e834041ef24ac203995b6f7ddc63d Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Tue, 8 Feb 2022 20:57:52 +0000
Subject: [PATCH 21/24] fix merge

---
 torchvision/ops/ps_roi_pool.py      | 2 +-
 torchvision/ops/stochastic_depth.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/torchvision/ops/ps_roi_pool.py b/torchvision/ops/ps_roi_pool.py
index 14b23f8ffe1..a27c36ee76c 100644
--- a/torchvision/ops/ps_roi_pool.py
+++ b/torchvision/ops/ps_roi_pool.py
@@ -64,5 +64,5 @@ def forward(self, input: Tensor, rois: Tensor) -> Tensor:
         return ps_roi_pool(input, rois, self.output_size, self.spatial_scale)
 
     def __repr__(self) -> str:
-        s = f"{self.__class__.__name__}(" f"output_size={self.output_size}" f", spatial_scale={self.spatial_scale}" f")"
+        s = f"{self.__class__.__name__}(output_size={self.output_size}, spatial_scale={self.spatial_scale})"
         return s
diff --git a/torchvision/ops/stochastic_depth.py b/torchvision/ops/stochastic_depth.py
index 91f229c371b..ff8167b2315 100644
--- a/torchvision/ops/stochastic_depth.py
+++ b/torchvision/ops/stochastic_depth.py
@@ -62,5 +62,5 @@ def forward(self, input: Tensor) -> Tensor:
         return stochastic_depth(input, self.p, self.mode, self.training)
 
     def __repr__(self) -> str:
-        s = f"{self.__class__.__name__}(" f"p={self.p}" f", mode={self.mode}" f")"
+        s = f"{self.__class__.__name__}(p={self.p}, mode={self.mode})"
         return s

From c25619d24737bb5418a1f6539e76582532dc19f3 Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Tue, 8 Feb 2022 21:00:46 +0000
Subject: [PATCH 22/24] remove unused code

---
 torchvision/ops/roi_pool.py          |  2 +-
 torchvision/transforms/transforms.py | 11 -----------
 2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/torchvision/ops/roi_pool.py b/torchvision/ops/roi_pool.py
index ead81597dfb..37cbf7febee 100644
--- a/torchvision/ops/roi_pool.py
+++ b/torchvision/ops/roi_pool.py
@@ -66,5 +66,5 @@ def forward(self, input: Tensor, rois: Tensor) -> Tensor:
         return roi_pool(input, rois, self.output_size, self.spatial_scale)
 
     def __repr__(self) -> str:
-        s = f"{self.__class__.__name__}(" f"output_size={self.output_size}" f", spatial_scale={self.spatial_scale}" f")"
+        s = f"{self.__class__.__name__}(output_size={self.output_size}, spatial_scale={self.spatial_scale})"
         return s
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index 05c0add2b0d..3bc295a4385 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -353,17 +353,6 @@ def __repr__(self) -> str:
         return f"{self.__class__.__name__}{detail}"
 
 
-class Scale(Resize):
-    """
-    Note: This transform is deprecated in favor of Resize.
-    """
-
-    def __init__(self, *args, **kwargs):
-        warnings.warn("The use of the transforms.Scale transform is deprecated, please use transforms.Resize instead.")
-        super().__init__(*args, **kwargs)
-        _log_api_usage_once(self)
-
-
 class CenterCrop(torch.nn.Module):
     """Crops the given image at the center.
     If the image is torch Tensor, it is expected

From eab137128133796fa514744a1a89185c793090d2 Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Wed, 9 Feb 2022 09:28:38 +0000
Subject: [PATCH 23/24] adress PR comments

---
 torchvision/ops/deform_conv.py       | 11 ++++++-----
 torchvision/transforms/transforms.py | 21 ++++++++++-----------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/torchvision/ops/deform_conv.py b/torchvision/ops/deform_conv.py
index 7bd7583f109..51a88e2dde8 100644
--- a/torchvision/ops/deform_conv.py
+++ b/torchvision/ops/deform_conv.py
@@ -185,10 +185,11 @@ def __repr__(self) -> str:
             f", {self.out_channels}"
             f", kernel_size={self.kernel_size}"
             f", stride={self.stride}"
-            f"{f', padding={self.padding}' if self.padding != (0, 0) else ''}"
-            f"{f', dilation={self.dilation}' if self.dilation != (1, 1) else ''}"
-            f"{f', groups={self.groups}'  if self.groups != 1 else ''}"
-            f"{', bias=False' if self.bias is None else ''}"
-            f")"
         )
+        s += f", padding={self.padding}" if self.padding != (0, 0) else ""
+        s += f", dilation={self.dilation}" if self.dilation != (1, 1) else ""
+        s += f", groups={self.groups}" if self.groups != 1 else ""
+        s += f", bias=False" if self.bias is None else ""
+        s += ")"
+
         return s
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index 3bc295a4385..9fc79c1d8cc 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -381,7 +381,7 @@ def forward(self, img):
         return F.center_crop(img, self.size)
 
     def __repr__(self) -> str:
-        return f"{self.__class__.__name_}(size={self.size})"
+        return f"{self.__class__.__name__}(size={self.size})"
 
 
 class Pad(torch.nn.Module):
@@ -1532,16 +1532,15 @@ def forward(self, img):
         return F.affine(img, *ret, interpolation=self.interpolation, fill=fill, center=self.center)
 
     def __repr__(self) -> str:
-        s = (
-            f"{self.__class__.__name__}(degrees={self.degrees}"
-            f"{f', translate={self.translate}' if self.translate is not None else ''}"
-            f"{f', translate={self.scale}' if self.scale is not None  else ''}"
-            f"{f', translate={self.shear}' if self.shear is not None  else ''}"
-            f"{f', translate={self.interpolation.value}' if self.interpolation != InterpolationMode.NEAREST else ''}"
-            f"{f', translate={self.fill}' if self.fill != 0 else ''}"
-            f"{f', translate={self.center}' if self.center is not None  else ''}"
-            f")"
-        )
+        s = f"{self.__class__.__name__}(degrees={self.degrees}"
+        s += f", translate={self.translate}" if self.translate is not None else ""
+        s += f", scale={self.scale}" if self.scale is not None else ""
+        s += f", shear={self.shear}" if self.shear is not None else ""
+        s += f", interpolation={self.interpolation.value}" if self.interpolation != InterpolationMode.NEAREST else ""
+        s += f", fill={self.fill}" if self.fill != 0 else ""
+        s += f", center={self.center}" if self.center is not None else ""
+        s += ")"
+
         return s
 
 

From 1d3910fa4c018b52f65eb6119d17c0f7e9352148 Mon Sep 17 00:00:00 2001
From: Joao Gomes <jdsgomes@fb.com>
Date: Wed, 9 Feb 2022 09:37:37 +0000
Subject: [PATCH 24/24] fix flake8 error

---
 torchvision/ops/deform_conv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/ops/deform_conv.py b/torchvision/ops/deform_conv.py
index 51a88e2dde8..bb4400e5c29 100644
--- a/torchvision/ops/deform_conv.py
+++ b/torchvision/ops/deform_conv.py
@@ -189,7 +189,7 @@ def __repr__(self) -> str:
         s += f", padding={self.padding}" if self.padding != (0, 0) else ""
         s += f", dilation={self.dilation}" if self.dilation != (1, 1) else ""
         s += f", groups={self.groups}" if self.groups != 1 else ""
-        s += f", bias=False" if self.bias is None else ""
+        s += ", bias=False" if self.bias is None else ""
         s += ")"
 
         return s