pytorch · datumbox · Apr 22, 2022 · Apr 21, 2022 · Apr 22, 2022 · Apr 22, 2022
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -347,8 +347,8 @@ def generate_classification_table():
     content = [
         (
             f":class:`{w} <{type(w).__name__}>`",
-            w.meta["acc@1"],
-            w.meta["acc@5"],
+            w.meta["metrics"]["acc@1"],
+            w.meta["metrics"]["acc@5"],
             f"{w.meta['num_params']/1e6:.1f}M",
             f"`link <{w.meta['recipe']}>`__",
         )

diff --git a/test/test_extended_models.py b/test/test_extended_models.py
@@ -79,20 +79,32 @@ def test_naming_conventions(model_fn):
 )
 @run_if_test_with_extended
 def test_schema_meta_validation(model_fn):
-    # TODO: add list of permitted fields
-    classification_fields = ["categories", "acc@1", "acc@5"]
+    # list of all possible supported high-level fields for weights meta-data
+    permitted_fields = {
+        "backend",
+        "categories",
+        "keypoint_names",
+        "license",
+        "metrics",
+        "min_size",
+        "num_params",
+        "recipe",
+        "unquantized",
+    }
+    # mandatory fields for each computer vision task
+    classification_fields = {"categories", ("metrics", "acc@1"), ("metrics", "acc@5")}
     defaults = {
-        "all": ["recipe", "num_params", "min_size"],
+        "all": {"metrics", "min_size", "num_params", "recipe"},
         "models": classification_fields,
-        "detection": ["categories", "map"],
-        "quantization": classification_fields + ["backend", "unquantized"],
-        "segmentation": ["categories", "mIoU", "acc"],
+        "detection": {"categories", ("metrics", "box_map")},
+        "quantization": classification_fields | {"backend", "unquantized"},
+        "segmentation": {"categories", ("metrics", "miou"), ("metrics", "pixel_acc")},
         "video": classification_fields,
-        "optical_flow": [],
+        "optical_flow": set(),
     }
     model_name = model_fn.__name__
     module_name = model_fn.__module__.split(".")[-2]
-    fields = set(defaults["all"] + defaults[module_name])
+    fields = defaults["all"] | defaults[module_name]
 
     weights_enum = _get_model_weights(model_fn)
     if len(weights_enum) == 0:
@@ -102,9 +114,10 @@ def test_schema_meta_validation(model_fn):
     incorrect_params = []
     bad_names = []
     for w in weights_enum:
-        missing_fields = fields - set(w.meta.keys())
-        if missing_fields:
-            problematic_weights[w] = missing_fields
+        missing_fields = fields - (set(w.meta.keys()) | set(("metrics", x) for x in w.meta.get("metrics", {}).keys()))
+        unsupported_fields = set(w.meta.keys()) - permitted_fields
+        if missing_fields or unsupported_fields:
+            problematic_weights[w] = {"missing": missing_fields, "unsupported": unsupported_fields}
         if w == weights_enum.DEFAULT:
             if module_name == "quantization":
                 # parameters() count doesn't work well with quantization, so we check against the non-quantized

diff --git a/torchvision/models/alexnet.py b/torchvision/models/alexnet.py
@@ -61,8 +61,10 @@ class AlexNet_Weights(WeightsEnum):
             "min_size": (63, 63),
             "categories": _IMAGENET_CATEGORIES,
             "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#alexnet-and-vgg",
-            "acc@1": 56.522,
-            "acc@5": 79.066,
+            "metrics": {
+                "acc@1": 56.522,
+                "acc@5": 79.066,
+            },
         },
     )
     DEFAULT = IMAGENET1K_V1

diff --git a/torchvision/models/convnext.py b/torchvision/models/convnext.py
@@ -217,8 +217,10 @@ class ConvNeXt_Tiny_Weights(WeightsEnum):
         meta={
             **_COMMON_META,
             "num_params": 28589128,
-            "acc@1": 82.520,
-            "acc@5": 96.146,
+            "metrics": {
+                "acc@1": 82.520,
+                "acc@5": 96.146,
+            },
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -231,8 +233,10 @@ class ConvNeXt_Small_Weights(WeightsEnum):
         meta={
             **_COMMON_META,
             "num_params": 50223688,
-            "acc@1": 83.616,
-            "acc@5": 96.650,
+            "metrics": {
+                "acc@1": 83.616,
+                "acc@5": 96.650,
+            },
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -245,8 +249,10 @@ class ConvNeXt_Base_Weights(WeightsEnum):
         meta={
             **_COMMON_META,
             "num_params": 88591464,
-            "acc@1": 84.062,
-            "acc@5": 96.870,
+            "metrics": {
+                "acc@1": 84.062,
+                "acc@5": 96.870,
+            },
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -259,8 +265,10 @@ class ConvNeXt_Large_Weights(WeightsEnum):
         meta={
             **_COMMON_META,
             "num_params": 197767336,
-            "acc@1": 84.414,
-            "acc@5": 96.976,
+            "metrics": {
+                "acc@1": 84.414,
+                "acc@5": 96.976,
+            },
         },
     )
     DEFAULT = IMAGENET1K_V1

diff --git a/torchvision/models/densenet.py b/torchvision/models/densenet.py
@@ -279,8 +279,10 @@ class DenseNet121_Weights(WeightsEnum):
         meta={
             **_COMMON_META,
             "num_params": 7978856,
-            "acc@1": 74.434,
-            "acc@5": 91.972,
+            "metrics": {
+                "acc@1": 74.434,
+                "acc@5": 91.972,
+            },
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -293,8 +295,10 @@ class DenseNet161_Weights(WeightsEnum):
         meta={
             **_COMMON_META,
             "num_params": 28681000,
-            "acc@1": 77.138,
-            "acc@5": 93.560,
+            "metrics": {
+                "acc@1": 77.138,
+                "acc@5": 93.560,
+            },
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -307,8 +311,10 @@ class DenseNet169_Weights(WeightsEnum):
         meta={
             **_COMMON_META,
             "num_params": 14149480,
-            "acc@1": 75.600,
-            "acc@5": 92.806,
+            "metrics": {
+                "acc@1": 75.600,
+                "acc@5": 92.806,
+            },
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -321,8 +327,10 @@ class DenseNet201_Weights(WeightsEnum):
         meta={
             **_COMMON_META,
             "num_params": 20013928,
-            "acc@1": 76.896,
-            "acc@5": 93.370,
+            "metrics": {
+                "acc@1": 76.896,
+                "acc@5": 93.370,
+            },
         },
     )
     DEFAULT = IMAGENET1K_V1

diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py
@@ -383,7 +383,9 @@ class FasterRCNN_ResNet50_FPN_Weights(WeightsEnum):
             **_COMMON_META,
             "num_params": 41755286,
             "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-resnet-50-fpn",
-            "map": 37.0,
+            "metrics": {
+                "box_map": 37.0,
+            },
         },
     )
     DEFAULT = COCO_V1
@@ -397,7 +399,9 @@ class FasterRCNN_ResNet50_FPN_V2_Weights(WeightsEnum):
             **_COMMON_META,
             "num_params": 43712278,
             "recipe": "https://github.com/pytorch/vision/pull/5763",
-            "map": 46.7,
+            "metrics": {
+                "box_map": 46.7,
+            },
         },
     )
     DEFAULT = COCO_V1
@@ -411,7 +415,9 @@ class FasterRCNN_MobileNet_V3_Large_FPN_Weights(WeightsEnum):
             **_COMMON_META,
             "num_params": 19386354,
             "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-fpn",
-            "map": 32.8,
+            "metrics": {
+                "box_map": 32.8,
+            },
         },
     )
     DEFAULT = COCO_V1
@@ -425,7 +431,9 @@ class FasterRCNN_MobileNet_V3_Large_320_FPN_Weights(WeightsEnum):
             **_COMMON_META,
             "num_params": 19386354,
             "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-320-fpn",
-            "map": 22.8,
+            "metrics": {
+                "box_map": 22.8,
+            },
         },
     )
     DEFAULT = COCO_V1

diff --git a/torchvision/models/detection/fcos.py b/torchvision/models/detection/fcos.py
@@ -655,7 +655,9 @@ class FCOS_ResNet50_FPN_Weights(WeightsEnum):
             "categories": _COCO_CATEGORIES,
             "min_size": (1, 1),
             "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#fcos-resnet-50-fpn",
-            "map": 39.2,
+            "metrics": {
+                "box_map": 39.2,
+            },
         },
     )
     DEFAULT = COCO_V1

diff --git a/torchvision/models/detection/keypoint_rcnn.py b/torchvision/models/detection/keypoint_rcnn.py
@@ -322,8 +322,10 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum):
             **_COMMON_META,
             "num_params": 59137258,
             "recipe": "https://github.com/pytorch/vision/issues/1606",
-            "map": 50.6,
-            "map_kp": 61.1,
+            "metrics": {
+                "box_map": 50.6,
+                "kp_map": 61.1,
+            },
         },
     )
     COCO_V1 = Weights(
@@ -333,8 +335,10 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum):
             **_COMMON_META,
             "num_params": 59137258,
             "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#keypoint-r-cnn",
-            "map": 54.6,
-            "map_kp": 65.0,
+            "metrics": {
+                "box_map": 54.6,
+                "kp_map": 65.0,
+            },
         },
     )
     DEFAULT = COCO_V1

diff --git a/torchvision/models/detection/mask_rcnn.py b/torchvision/models/detection/mask_rcnn.py
@@ -364,8 +364,10 @@ class MaskRCNN_ResNet50_FPN_Weights(WeightsEnum):
             **_COMMON_META,
             "num_params": 44401393,
             "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#mask-r-cnn",
-            "map": 37.9,
-            "map_mask": 34.6,
+            "metrics": {
+                "box_map": 37.9,
+                "mask_map": 34.6,
+            },
         },
     )
     DEFAULT = COCO_V1
@@ -379,8 +381,10 @@ class MaskRCNN_ResNet50_FPN_V2_Weights(WeightsEnum):
             **_COMMON_META,
             "num_params": 46359409,
             "recipe": "https://github.com/pytorch/vision/pull/5773",
-            "map": 47.4,
-            "map_mask": 41.8,
+            "metrics": {
+                "box_map": 47.4,
+                "mask_map": 41.8,
+            },
         },
     )
     DEFAULT = COCO_V1

diff --git a/torchvision/models/detection/retinanet.py b/torchvision/models/detection/retinanet.py
@@ -687,7 +687,9 @@ class RetinaNet_ResNet50_FPN_Weights(WeightsEnum):
             **_COMMON_META,
             "num_params": 34014999,
             "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#retinanet",
-            "map": 36.4,
+            "metrics": {
+                "box_map": 36.4,
+            },
         },
     )
     DEFAULT = COCO_V1
@@ -701,7 +703,9 @@ class RetinaNet_ResNet50_FPN_V2_Weights(WeightsEnum):
             **_COMMON_META,
             "num_params": 38198935,
             "recipe": "https://github.com/pytorch/vision/pull/5756",
-            "map": 41.5,
+            "metrics": {
+                "box_map": 41.5,
+            },
         },
     )
     DEFAULT = COCO_V1

diff --git a/torchvision/models/detection/ssd.py b/torchvision/models/detection/ssd.py
@@ -34,7 +34,9 @@ class SSD300_VGG16_Weights(WeightsEnum):
             "categories": _COCO_CATEGORIES,
             "min_size": (1, 1),
             "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssd300-vgg16",
-            "map": 25.1,
+            "metrics": {
+                "box_map": 25.1,
+            },
         },
     )
     DEFAULT = COCO_V1

diff --git a/torchvision/models/detection/ssdlite.py b/torchvision/models/detection/ssdlite.py
@@ -193,7 +193,9 @@ class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum):
             "categories": _COCO_CATEGORIES,
             "min_size": (1, 1),
             "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssdlite320-mobilenetv3-large",
-            "map": 21.3,
+            "metrics": {
+                "box_map": 21.3,
+            },
         },
     )
     DEFAULT = COCO_V1