diff --git a/docs/source/conf.py b/docs/source/conf.py index 137f4f86122..3dbe5c04810 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -347,8 +347,8 @@ def generate_classification_table(): content = [ ( f":class:`{w} <{type(w).__name__}>`", - w.meta["acc@1"], - w.meta["acc@5"], + w.meta["metrics"]["acc@1"], + w.meta["metrics"]["acc@5"], f"{w.meta['num_params']/1e6:.1f}M", f"`link <{w.meta['recipe']}>`__", ) diff --git a/test/test_extended_models.py b/test/test_extended_models.py index be45a53b17f..651585cfa7d 100644 --- a/test/test_extended_models.py +++ b/test/test_extended_models.py @@ -79,20 +79,32 @@ def test_naming_conventions(model_fn): ) @run_if_test_with_extended def test_schema_meta_validation(model_fn): - # TODO: add list of permitted fields - classification_fields = ["categories", "acc@1", "acc@5"] + # list of all possible supported high-level fields for weights meta-data + permitted_fields = { + "backend", + "categories", + "keypoint_names", + "license", + "metrics", + "min_size", + "num_params", + "recipe", + "unquantized", + } + # mandatory fields for each computer vision task + classification_fields = {"categories", ("metrics", "acc@1"), ("metrics", "acc@5")} defaults = { - "all": ["recipe", "num_params", "min_size"], + "all": {"metrics", "min_size", "num_params", "recipe"}, "models": classification_fields, - "detection": ["categories", "map"], - "quantization": classification_fields + ["backend", "unquantized"], - "segmentation": ["categories", "mIoU", "acc"], + "detection": {"categories", ("metrics", "box_map")}, + "quantization": classification_fields | {"backend", "unquantized"}, + "segmentation": {"categories", ("metrics", "miou"), ("metrics", "pixel_acc")}, "video": classification_fields, - "optical_flow": [], + "optical_flow": set(), } model_name = model_fn.__name__ module_name = model_fn.__module__.split(".")[-2] - fields = set(defaults["all"] + defaults[module_name]) + fields = defaults["all"] | defaults[module_name] weights_enum = _get_model_weights(model_fn) if len(weights_enum) == 0: @@ -102,9 +114,10 @@ def test_schema_meta_validation(model_fn): incorrect_params = [] bad_names = [] for w in weights_enum: - missing_fields = fields - set(w.meta.keys()) - if missing_fields: - problematic_weights[w] = missing_fields + missing_fields = fields - (set(w.meta.keys()) | set(("metrics", x) for x in w.meta.get("metrics", {}).keys())) + unsupported_fields = set(w.meta.keys()) - permitted_fields + if missing_fields or unsupported_fields: + problematic_weights[w] = {"missing": missing_fields, "unsupported": unsupported_fields} if w == weights_enum.DEFAULT: if module_name == "quantization": # parameters() count doesn't work well with quantization, so we check against the non-quantized diff --git a/torchvision/models/alexnet.py b/torchvision/models/alexnet.py index 327530d6b2b..348417693b2 100644 --- a/torchvision/models/alexnet.py +++ b/torchvision/models/alexnet.py @@ -61,8 +61,10 @@ class AlexNet_Weights(WeightsEnum): "min_size": (63, 63), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#alexnet-and-vgg", - "acc@1": 56.522, - "acc@5": 79.066, + "metrics": { + "acc@1": 56.522, + "acc@5": 79.066, + }, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/convnext.py b/torchvision/models/convnext.py index 4b64157634e..b341a6d6691 100644 --- a/torchvision/models/convnext.py +++ b/torchvision/models/convnext.py @@ -217,8 +217,10 @@ class ConvNeXt_Tiny_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 28589128, - "acc@1": 82.520, - "acc@5": 96.146, + "metrics": { + "acc@1": 82.520, + "acc@5": 96.146, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -231,8 +233,10 @@ class ConvNeXt_Small_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 50223688, - "acc@1": 83.616, - "acc@5": 96.650, + "metrics": { + "acc@1": 83.616, + "acc@5": 96.650, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -245,8 +249,10 @@ class ConvNeXt_Base_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 88591464, - "acc@1": 84.062, - "acc@5": 96.870, + "metrics": { + "acc@1": 84.062, + "acc@5": 96.870, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -259,8 +265,10 @@ class ConvNeXt_Large_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 197767336, - "acc@1": 84.414, - "acc@5": 96.976, + "metrics": { + "acc@1": 84.414, + "acc@5": 96.976, + }, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/densenet.py b/torchvision/models/densenet.py index 38e726d1825..405af0cbfde 100644 --- a/torchvision/models/densenet.py +++ b/torchvision/models/densenet.py @@ -279,8 +279,10 @@ class DenseNet121_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 7978856, - "acc@1": 74.434, - "acc@5": 91.972, + "metrics": { + "acc@1": 74.434, + "acc@5": 91.972, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -293,8 +295,10 @@ class DenseNet161_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 28681000, - "acc@1": 77.138, - "acc@5": 93.560, + "metrics": { + "acc@1": 77.138, + "acc@5": 93.560, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -307,8 +311,10 @@ class DenseNet169_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 14149480, - "acc@1": 75.600, - "acc@5": 92.806, + "metrics": { + "acc@1": 75.600, + "acc@5": 92.806, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -321,8 +327,10 @@ class DenseNet201_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 20013928, - "acc@1": 76.896, - "acc@5": 93.370, + "metrics": { + "acc@1": 76.896, + "acc@5": 93.370, + }, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index 08274e7bc75..f1da6f77835 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -383,7 +383,9 @@ class FasterRCNN_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 41755286, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-resnet-50-fpn", - "map": 37.0, + "metrics": { + "box_map": 37.0, + }, }, ) DEFAULT = COCO_V1 @@ -397,7 +399,9 @@ class FasterRCNN_ResNet50_FPN_V2_Weights(WeightsEnum): **_COMMON_META, "num_params": 43712278, "recipe": "https://github.com/pytorch/vision/pull/5763", - "map": 46.7, + "metrics": { + "box_map": 46.7, + }, }, ) DEFAULT = COCO_V1 @@ -411,7 +415,9 @@ class FasterRCNN_MobileNet_V3_Large_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 19386354, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-fpn", - "map": 32.8, + "metrics": { + "box_map": 32.8, + }, }, ) DEFAULT = COCO_V1 @@ -425,7 +431,9 @@ class FasterRCNN_MobileNet_V3_Large_320_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 19386354, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-320-fpn", - "map": 22.8, + "metrics": { + "box_map": 22.8, + }, }, ) DEFAULT = COCO_V1 diff --git a/torchvision/models/detection/fcos.py b/torchvision/models/detection/fcos.py index 438566d3ecd..628a343e5a3 100644 --- a/torchvision/models/detection/fcos.py +++ b/torchvision/models/detection/fcos.py @@ -655,7 +655,9 @@ class FCOS_ResNet50_FPN_Weights(WeightsEnum): "categories": _COCO_CATEGORIES, "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#fcos-resnet-50-fpn", - "map": 39.2, + "metrics": { + "box_map": 39.2, + }, }, ) DEFAULT = COCO_V1 diff --git a/torchvision/models/detection/keypoint_rcnn.py b/torchvision/models/detection/keypoint_rcnn.py index b864c2854ef..e0a22194c83 100644 --- a/torchvision/models/detection/keypoint_rcnn.py +++ b/torchvision/models/detection/keypoint_rcnn.py @@ -322,8 +322,10 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 59137258, "recipe": "https://github.com/pytorch/vision/issues/1606", - "map": 50.6, - "map_kp": 61.1, + "metrics": { + "box_map": 50.6, + "kp_map": 61.1, + }, }, ) COCO_V1 = Weights( @@ -333,8 +335,10 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 59137258, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#keypoint-r-cnn", - "map": 54.6, - "map_kp": 65.0, + "metrics": { + "box_map": 54.6, + "kp_map": 65.0, + }, }, ) DEFAULT = COCO_V1 diff --git a/torchvision/models/detection/mask_rcnn.py b/torchvision/models/detection/mask_rcnn.py index 44254913a22..1f5953af48e 100644 --- a/torchvision/models/detection/mask_rcnn.py +++ b/torchvision/models/detection/mask_rcnn.py @@ -364,8 +364,10 @@ class MaskRCNN_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 44401393, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#mask-r-cnn", - "map": 37.9, - "map_mask": 34.6, + "metrics": { + "box_map": 37.9, + "mask_map": 34.6, + }, }, ) DEFAULT = COCO_V1 @@ -379,8 +381,10 @@ class MaskRCNN_ResNet50_FPN_V2_Weights(WeightsEnum): **_COMMON_META, "num_params": 46359409, "recipe": "https://github.com/pytorch/vision/pull/5773", - "map": 47.4, - "map_mask": 41.8, + "metrics": { + "box_map": 47.4, + "mask_map": 41.8, + }, }, ) DEFAULT = COCO_V1 diff --git a/torchvision/models/detection/retinanet.py b/torchvision/models/detection/retinanet.py index ed40702eed8..671eab864a2 100644 --- a/torchvision/models/detection/retinanet.py +++ b/torchvision/models/detection/retinanet.py @@ -687,7 +687,9 @@ class RetinaNet_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 34014999, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#retinanet", - "map": 36.4, + "metrics": { + "box_map": 36.4, + }, }, ) DEFAULT = COCO_V1 @@ -701,7 +703,9 @@ class RetinaNet_ResNet50_FPN_V2_Weights(WeightsEnum): **_COMMON_META, "num_params": 38198935, "recipe": "https://github.com/pytorch/vision/pull/5756", - "map": 41.5, + "metrics": { + "box_map": 41.5, + }, }, ) DEFAULT = COCO_V1 diff --git a/torchvision/models/detection/ssd.py b/torchvision/models/detection/ssd.py index 7d9800f15ea..e0045a21640 100644 --- a/torchvision/models/detection/ssd.py +++ b/torchvision/models/detection/ssd.py @@ -34,7 +34,9 @@ class SSD300_VGG16_Weights(WeightsEnum): "categories": _COCO_CATEGORIES, "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssd300-vgg16", - "map": 25.1, + "metrics": { + "box_map": 25.1, + }, }, ) DEFAULT = COCO_V1 diff --git a/torchvision/models/detection/ssdlite.py b/torchvision/models/detection/ssdlite.py index 73afc1eaca6..a18e166c429 100644 --- a/torchvision/models/detection/ssdlite.py +++ b/torchvision/models/detection/ssdlite.py @@ -193,7 +193,9 @@ class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum): "categories": _COCO_CATEGORIES, "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssdlite320-mobilenetv3-large", - "map": 21.3, + "metrics": { + "box_map": 21.3, + }, }, ) DEFAULT = COCO_V1 diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index ef7e0c323a4..e9568f5917e 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -456,8 +456,10 @@ class EfficientNet_B0_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 5288548, - "acc@1": 77.692, - "acc@5": 93.532, + "metrics": { + "acc@1": 77.692, + "acc@5": 93.532, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -472,8 +474,10 @@ class EfficientNet_B1_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 7794184, - "acc@1": 78.642, - "acc@5": 94.186, + "metrics": { + "acc@1": 78.642, + "acc@5": 94.186, + }, }, ) IMAGENET1K_V2 = Weights( @@ -485,8 +489,10 @@ class EfficientNet_B1_Weights(WeightsEnum): **_COMMON_META_V1, "num_params": 7794184, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-lr-wd-crop-tuning", - "acc@1": 79.838, - "acc@5": 94.934, + "metrics": { + "acc@1": 79.838, + "acc@5": 94.934, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -501,8 +507,10 @@ class EfficientNet_B2_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 9109994, - "acc@1": 80.608, - "acc@5": 95.310, + "metrics": { + "acc@1": 80.608, + "acc@5": 95.310, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -517,8 +525,10 @@ class EfficientNet_B3_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 12233232, - "acc@1": 82.008, - "acc@5": 96.054, + "metrics": { + "acc@1": 82.008, + "acc@5": 96.054, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -533,8 +543,10 @@ class EfficientNet_B4_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 19341616, - "acc@1": 83.384, - "acc@5": 96.594, + "metrics": { + "acc@1": 83.384, + "acc@5": 96.594, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -549,8 +561,10 @@ class EfficientNet_B5_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 30389784, - "acc@1": 83.444, - "acc@5": 96.628, + "metrics": { + "acc@1": 83.444, + "acc@5": 96.628, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -565,8 +579,10 @@ class EfficientNet_B6_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 43040704, - "acc@1": 84.008, - "acc@5": 96.916, + "metrics": { + "acc@1": 84.008, + "acc@5": 96.916, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -581,8 +597,10 @@ class EfficientNet_B7_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 66347960, - "acc@1": 84.122, - "acc@5": 96.908, + "metrics": { + "acc@1": 84.122, + "acc@5": 96.908, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -600,8 +618,10 @@ class EfficientNet_V2_S_Weights(WeightsEnum): meta={ **_COMMON_META_V2, "num_params": 21458488, - "acc@1": 84.228, - "acc@5": 96.878, + "metrics": { + "acc@1": 84.228, + "acc@5": 96.878, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -619,8 +639,10 @@ class EfficientNet_V2_M_Weights(WeightsEnum): meta={ **_COMMON_META_V2, "num_params": 54139356, - "acc@1": 85.112, - "acc@5": 97.156, + "metrics": { + "acc@1": 85.112, + "acc@5": 97.156, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -640,8 +662,10 @@ class EfficientNet_V2_L_Weights(WeightsEnum): meta={ **_COMMON_META_V2, "num_params": 118515272, - "acc@1": 85.808, - "acc@5": 97.788, + "metrics": { + "acc@1": 85.808, + "acc@5": 97.788, + }, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/googlenet.py b/torchvision/models/googlenet.py index e5f420198e0..94ce3c99b47 100644 --- a/torchvision/models/googlenet.py +++ b/torchvision/models/googlenet.py @@ -284,8 +284,10 @@ class GoogLeNet_Weights(WeightsEnum): "min_size": (15, 15), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#googlenet", - "acc@1": 69.778, - "acc@5": 89.530, + "metrics": { + "acc@1": 69.778, + "acc@5": 89.530, + }, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/inception.py b/torchvision/models/inception.py index f7e006233af..1428f7f7002 100644 --- a/torchvision/models/inception.py +++ b/torchvision/models/inception.py @@ -416,8 +416,10 @@ class Inception_V3_Weights(WeightsEnum): "min_size": (75, 75), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#inception-v3", - "acc@1": 77.294, - "acc@5": 93.450, + "metrics": { + "acc@1": 77.294, + "acc@5": 93.450, + }, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/mnasnet.py b/torchvision/models/mnasnet.py index 27967b50608..ac15722ca10 100644 --- a/torchvision/models/mnasnet.py +++ b/torchvision/models/mnasnet.py @@ -225,8 +225,10 @@ class MNASNet0_5_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 2218512, - "acc@1": 67.734, - "acc@5": 87.490, + "metrics": { + "acc@1": 67.734, + "acc@5": 87.490, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -244,8 +246,10 @@ class MNASNet1_0_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 4383312, - "acc@1": 73.456, - "acc@5": 91.510, + "metrics": { + "acc@1": 73.456, + "acc@5": 91.510, + }, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/mobilenetv2.py b/torchvision/models/mobilenetv2.py index af8b51049fd..5763fb79acb 100644 --- a/torchvision/models/mobilenetv2.py +++ b/torchvision/models/mobilenetv2.py @@ -208,8 +208,10 @@ class MobileNet_V2_Weights(WeightsEnum): meta={ **_COMMON_META, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv2", - "acc@1": 71.878, - "acc@5": 90.286, + "metrics": { + "acc@1": 71.878, + "acc@5": 90.286, + }, }, ) IMAGENET1K_V2 = Weights( @@ -218,8 +220,10 @@ class MobileNet_V2_Weights(WeightsEnum): meta={ **_COMMON_META, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-reg-tuning", - "acc@1": 72.154, - "acc@5": 90.822, + "metrics": { + "acc@1": 72.154, + "acc@5": 90.822, + }, }, ) DEFAULT = IMAGENET1K_V2 diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py index 128ff580641..7e68369d265 100644 --- a/torchvision/models/mobilenetv3.py +++ b/torchvision/models/mobilenetv3.py @@ -317,8 +317,10 @@ class MobileNet_V3_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5483032, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv3-large--small", - "acc@1": 74.042, - "acc@5": 91.340, + "metrics": { + "acc@1": 74.042, + "acc@5": 91.340, + }, }, ) IMAGENET1K_V2 = Weights( @@ -328,8 +330,10 @@ class MobileNet_V3_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5483032, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-reg-tuning", - "acc@1": 75.274, - "acc@5": 92.566, + "metrics": { + "acc@1": 75.274, + "acc@5": 92.566, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -343,8 +347,10 @@ class MobileNet_V3_Small_Weights(WeightsEnum): **_COMMON_META, "num_params": 2542856, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv3-large--small", - "acc@1": 67.668, - "acc@5": 87.402, + "metrics": { + "acc@1": 67.668, + "acc@5": 87.402, + }, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/optical_flow/raft.py b/torchvision/models/optical_flow/raft.py index 346641ae83d..869477f0d81 100644 --- a/torchvision/models/optical_flow/raft.py +++ b/torchvision/models/optical_flow/raft.py @@ -525,10 +525,12 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", - "sintel_train_cleanpass_epe": 1.4411, - "sintel_train_finalpass_epe": 2.7894, - "kitti_train_per_image_epe": 5.0172, - "kitti_train_f1-all": 17.4506, + "metrics": { + "sintel_train_cleanpass_epe": 1.4411, + "sintel_train_finalpass_epe": 2.7894, + "kitti_train_per_image_epe": 5.0172, + "kitti_train_fl_all": 17.4506, + }, }, ) @@ -540,10 +542,12 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", - "sintel_train_cleanpass_epe": 1.3822, - "sintel_train_finalpass_epe": 2.7161, - "kitti_train_per_image_epe": 4.5118, - "kitti_train_f1-all": 16.0679, + "metrics": { + "sintel_train_cleanpass_epe": 1.3822, + "sintel_train_finalpass_epe": 2.7161, + "kitti_train_per_image_epe": 4.5118, + "kitti_train_fl_all": 16.0679, + }, }, ) @@ -555,8 +559,10 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", - "sintel_test_cleanpass_epe": 1.94, - "sintel_test_finalpass_epe": 3.18, + "metrics": { + "sintel_test_cleanpass_epe": 1.94, + "sintel_test_finalpass_epe": 3.18, + }, }, ) @@ -570,8 +576,10 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", - "sintel_test_cleanpass_epe": 1.819, - "sintel_test_finalpass_epe": 3.067, + "metrics": { + "sintel_test_cleanpass_epe": 1.819, + "sintel_test_finalpass_epe": 3.067, + }, }, ) @@ -583,7 +591,9 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", - "kitti_test_f1-all": 5.10, + "metrics": { + "kitti_test_fl_all": 5.10, + }, }, ) @@ -598,7 +608,9 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", - "kitti_test_f1-all": 5.19, + "metrics": { + "kitti_test_fl_all": 5.19, + }, }, ) @@ -614,10 +626,12 @@ class Raft_Small_Weights(WeightsEnum): **_COMMON_META, "num_params": 990162, "recipe": "https://github.com/princeton-vl/RAFT", - "sintel_train_cleanpass_epe": 2.1231, - "sintel_train_finalpass_epe": 3.2790, - "kitti_train_per_image_epe": 7.6557, - "kitti_train_f1-all": 25.2801, + "metrics": { + "sintel_train_cleanpass_epe": 2.1231, + "sintel_train_finalpass_epe": 3.2790, + "kitti_train_per_image_epe": 7.6557, + "kitti_train_fl_all": 25.2801, + }, }, ) C_T_V2 = Weights( @@ -628,10 +642,12 @@ class Raft_Small_Weights(WeightsEnum): **_COMMON_META, "num_params": 990162, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", - "sintel_train_cleanpass_epe": 1.9901, - "sintel_train_finalpass_epe": 3.2831, - "kitti_train_per_image_epe": 7.5978, - "kitti_train_f1-all": 25.2369, + "metrics": { + "sintel_train_cleanpass_epe": 1.9901, + "sintel_train_finalpass_epe": 3.2831, + "kitti_train_per_image_epe": 7.5978, + "kitti_train_fl_all": 25.2369, + }, }, ) diff --git a/torchvision/models/quantization/googlenet.py b/torchvision/models/quantization/googlenet.py index 196bedf1b29..00cf37fc349 100644 --- a/torchvision/models/quantization/googlenet.py +++ b/torchvision/models/quantization/googlenet.py @@ -117,8 +117,10 @@ class GoogLeNet_QuantizedWeights(WeightsEnum): "backend": "fbgemm", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models", "unquantized": GoogLeNet_Weights.IMAGENET1K_V1, - "acc@1": 69.826, - "acc@5": 89.404, + "metrics": { + "acc@1": 69.826, + "acc@5": 89.404, + }, }, ) DEFAULT = IMAGENET1K_FBGEMM_V1 diff --git a/torchvision/models/quantization/inception.py b/torchvision/models/quantization/inception.py index d0d19b36a7e..ed53d43e8af 100644 --- a/torchvision/models/quantization/inception.py +++ b/torchvision/models/quantization/inception.py @@ -183,8 +183,10 @@ class Inception_V3_QuantizedWeights(WeightsEnum): "backend": "fbgemm", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models", "unquantized": Inception_V3_Weights.IMAGENET1K_V1, - "acc@1": 77.176, - "acc@5": 93.354, + "metrics": { + "acc@1": 77.176, + "acc@5": 93.354, + }, }, ) DEFAULT = IMAGENET1K_FBGEMM_V1 diff --git a/torchvision/models/quantization/mobilenetv2.py b/torchvision/models/quantization/mobilenetv2.py index d550b92d30a..bb0bf0b79ed 100644 --- a/torchvision/models/quantization/mobilenetv2.py +++ b/torchvision/models/quantization/mobilenetv2.py @@ -75,8 +75,10 @@ class MobileNet_V2_QuantizedWeights(WeightsEnum): "backend": "qnnpack", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#qat-mobilenetv2", "unquantized": MobileNet_V2_Weights.IMAGENET1K_V1, - "acc@1": 71.658, - "acc@5": 90.150, + "metrics": { + "acc@1": 71.658, + "acc@5": 90.150, + }, }, ) DEFAULT = IMAGENET1K_QNNPACK_V1 diff --git a/torchvision/models/quantization/mobilenetv3.py b/torchvision/models/quantization/mobilenetv3.py index be061ca6364..f8fe39ed195 100644 --- a/torchvision/models/quantization/mobilenetv3.py +++ b/torchvision/models/quantization/mobilenetv3.py @@ -165,8 +165,10 @@ class MobileNet_V3_Large_QuantizedWeights(WeightsEnum): "backend": "qnnpack", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#qat-mobilenetv3", "unquantized": MobileNet_V3_Large_Weights.IMAGENET1K_V1, - "acc@1": 73.004, - "acc@5": 90.858, + "metrics": { + "acc@1": 73.004, + "acc@5": 90.858, + }, }, ) DEFAULT = IMAGENET1K_QNNPACK_V1 diff --git a/torchvision/models/quantization/resnet.py b/torchvision/models/quantization/resnet.py index b01f92ae547..4c575adc0c9 100644 --- a/torchvision/models/quantization/resnet.py +++ b/torchvision/models/quantization/resnet.py @@ -162,8 +162,10 @@ class ResNet18_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 11689512, "unquantized": ResNet18_Weights.IMAGENET1K_V1, - "acc@1": 69.494, - "acc@5": 88.882, + "metrics": { + "acc@1": 69.494, + "acc@5": 88.882, + }, }, ) DEFAULT = IMAGENET1K_FBGEMM_V1 @@ -177,8 +179,10 @@ class ResNet50_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 25557032, "unquantized": ResNet50_Weights.IMAGENET1K_V1, - "acc@1": 75.920, - "acc@5": 92.814, + "metrics": { + "acc@1": 75.920, + "acc@5": 92.814, + }, }, ) IMAGENET1K_FBGEMM_V2 = Weights( @@ -188,8 +192,10 @@ class ResNet50_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 25557032, "unquantized": ResNet50_Weights.IMAGENET1K_V2, - "acc@1": 80.282, - "acc@5": 94.976, + "metrics": { + "acc@1": 80.282, + "acc@5": 94.976, + }, }, ) DEFAULT = IMAGENET1K_FBGEMM_V2 @@ -203,8 +209,10 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 88791336, "unquantized": ResNeXt101_32X8D_Weights.IMAGENET1K_V1, - "acc@1": 78.986, - "acc@5": 94.480, + "metrics": { + "acc@1": 78.986, + "acc@5": 94.480, + }, }, ) IMAGENET1K_FBGEMM_V2 = Weights( @@ -214,8 +222,10 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 88791336, "unquantized": ResNeXt101_32X8D_Weights.IMAGENET1K_V2, - "acc@1": 82.574, - "acc@5": 96.132, + "metrics": { + "acc@1": 82.574, + "acc@5": 96.132, + }, }, ) DEFAULT = IMAGENET1K_FBGEMM_V2 diff --git a/torchvision/models/quantization/shufflenetv2.py b/torchvision/models/quantization/shufflenetv2.py index f795fcad5b8..44e7772aa37 100644 --- a/torchvision/models/quantization/shufflenetv2.py +++ b/torchvision/models/quantization/shufflenetv2.py @@ -117,8 +117,10 @@ class ShuffleNet_V2_X0_5_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 1366792, "unquantized": ShuffleNet_V2_X0_5_Weights.IMAGENET1K_V1, - "acc@1": 57.972, - "acc@5": 79.780, + "metrics": { + "acc@1": 57.972, + "acc@5": 79.780, + }, }, ) DEFAULT = IMAGENET1K_FBGEMM_V1 @@ -132,8 +134,10 @@ class ShuffleNet_V2_X1_0_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 2278604, "unquantized": ShuffleNet_V2_X1_0_Weights.IMAGENET1K_V1, - "acc@1": 68.360, - "acc@5": 87.582, + "metrics": { + "acc@1": 68.360, + "acc@5": 87.582, + }, }, ) DEFAULT = IMAGENET1K_FBGEMM_V1 diff --git a/torchvision/models/regnet.py b/torchvision/models/regnet.py index 821d86f11f0..8bf81d0a051 100644 --- a/torchvision/models/regnet.py +++ b/torchvision/models/regnet.py @@ -422,8 +422,10 @@ class RegNet_Y_400MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 4344144, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "acc@1": 74.046, - "acc@5": 91.716, + "metrics": { + "acc@1": 74.046, + "acc@5": 91.716, + }, }, ) IMAGENET1K_V2 = Weights( @@ -433,8 +435,10 @@ class RegNet_Y_400MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 4344144, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 75.804, - "acc@5": 92.742, + "metrics": { + "acc@1": 75.804, + "acc@5": 92.742, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -448,8 +452,10 @@ class RegNet_Y_800MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 6432512, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "acc@1": 76.420, - "acc@5": 93.136, + "metrics": { + "acc@1": 76.420, + "acc@5": 93.136, + }, }, ) IMAGENET1K_V2 = Weights( @@ -459,8 +465,10 @@ class RegNet_Y_800MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 6432512, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 78.828, - "acc@5": 94.502, + "metrics": { + "acc@1": 78.828, + "acc@5": 94.502, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -474,8 +482,10 @@ class RegNet_Y_1_6GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 11202430, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "acc@1": 77.950, - "acc@5": 93.966, + "metrics": { + "acc@1": 77.950, + "acc@5": 93.966, + }, }, ) IMAGENET1K_V2 = Weights( @@ -485,8 +495,10 @@ class RegNet_Y_1_6GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 11202430, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 80.876, - "acc@5": 95.444, + "metrics": { + "acc@1": 80.876, + "acc@5": 95.444, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -500,8 +512,10 @@ class RegNet_Y_3_2GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 19436338, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "acc@1": 78.948, - "acc@5": 94.576, + "metrics": { + "acc@1": 78.948, + "acc@5": 94.576, + }, }, ) IMAGENET1K_V2 = Weights( @@ -511,8 +525,10 @@ class RegNet_Y_3_2GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 19436338, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 81.982, - "acc@5": 95.972, + "metrics": { + "acc@1": 81.982, + "acc@5": 95.972, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -526,8 +542,10 @@ class RegNet_Y_8GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 39381472, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "acc@1": 80.032, - "acc@5": 95.048, + "metrics": { + "acc@1": 80.032, + "acc@5": 95.048, + }, }, ) IMAGENET1K_V2 = Weights( @@ -537,8 +555,10 @@ class RegNet_Y_8GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 39381472, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 82.828, - "acc@5": 96.330, + "metrics": { + "acc@1": 82.828, + "acc@5": 96.330, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -552,8 +572,10 @@ class RegNet_Y_16GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 83590140, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#large-models", - "acc@1": 80.424, - "acc@5": 95.240, + "metrics": { + "acc@1": 80.424, + "acc@5": 95.240, + }, }, ) IMAGENET1K_V2 = Weights( @@ -563,8 +585,10 @@ class RegNet_Y_16GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 83590140, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 82.886, - "acc@5": 96.328, + "metrics": { + "acc@1": 82.886, + "acc@5": 96.328, + }, }, ) IMAGENET1K_SWAG_E2E_V1 = Weights( @@ -575,8 +599,10 @@ class RegNet_Y_16GF_Weights(WeightsEnum): meta={ **_COMMON_SWAG_META, "num_params": 83590140, - "acc@1": 86.012, - "acc@5": 98.054, + "metrics": { + "acc@1": 86.012, + "acc@5": 98.054, + }, }, ) IMAGENET1K_SWAG_LINEAR_V1 = Weights( @@ -588,8 +614,10 @@ class RegNet_Y_16GF_Weights(WeightsEnum): **_COMMON_SWAG_META, "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 83590140, - "acc@1": 83.976, - "acc@5": 97.244, + "metrics": { + "acc@1": 83.976, + "acc@5": 97.244, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -603,8 +631,10 @@ class RegNet_Y_32GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 145046770, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#large-models", - "acc@1": 80.878, - "acc@5": 95.340, + "metrics": { + "acc@1": 80.878, + "acc@5": 95.340, + }, }, ) IMAGENET1K_V2 = Weights( @@ -614,8 +644,10 @@ class RegNet_Y_32GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 145046770, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 83.368, - "acc@5": 96.498, + "metrics": { + "acc@1": 83.368, + "acc@5": 96.498, + }, }, ) IMAGENET1K_SWAG_E2E_V1 = Weights( @@ -626,8 +658,10 @@ class RegNet_Y_32GF_Weights(WeightsEnum): meta={ **_COMMON_SWAG_META, "num_params": 145046770, - "acc@1": 86.838, - "acc@5": 98.362, + "metrics": { + "acc@1": 86.838, + "acc@5": 98.362, + }, }, ) IMAGENET1K_SWAG_LINEAR_V1 = Weights( @@ -639,8 +673,10 @@ class RegNet_Y_32GF_Weights(WeightsEnum): **_COMMON_SWAG_META, "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 145046770, - "acc@1": 84.622, - "acc@5": 97.480, + "metrics": { + "acc@1": 84.622, + "acc@5": 97.480, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -655,8 +691,10 @@ class RegNet_Y_128GF_Weights(WeightsEnum): meta={ **_COMMON_SWAG_META, "num_params": 644812894, - "acc@1": 88.228, - "acc@5": 98.682, + "metrics": { + "acc@1": 88.228, + "acc@5": 98.682, + }, }, ) IMAGENET1K_SWAG_LINEAR_V1 = Weights( @@ -668,8 +706,10 @@ class RegNet_Y_128GF_Weights(WeightsEnum): **_COMMON_SWAG_META, "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 644812894, - "acc@1": 86.068, - "acc@5": 97.844, + "metrics": { + "acc@1": 86.068, + "acc@5": 97.844, + }, }, ) DEFAULT = IMAGENET1K_SWAG_E2E_V1 @@ -683,8 +723,10 @@ class RegNet_X_400MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 5495976, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "acc@1": 72.834, - "acc@5": 90.950, + "metrics": { + "acc@1": 72.834, + "acc@5": 90.950, + }, }, ) IMAGENET1K_V2 = Weights( @@ -694,8 +736,10 @@ class RegNet_X_400MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 5495976, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "acc@1": 74.864, - "acc@5": 92.322, + "metrics": { + "acc@1": 74.864, + "acc@5": 92.322, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -709,8 +753,10 @@ class RegNet_X_800MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 7259656, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "acc@1": 75.212, - "acc@5": 92.348, + "metrics": { + "acc@1": 75.212, + "acc@5": 92.348, + }, }, ) IMAGENET1K_V2 = Weights( @@ -720,8 +766,10 @@ class RegNet_X_800MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 7259656, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "acc@1": 77.522, - "acc@5": 93.826, + "metrics": { + "acc@1": 77.522, + "acc@5": 93.826, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -735,8 +783,10 @@ class RegNet_X_1_6GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 9190136, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "acc@1": 77.040, - "acc@5": 93.440, + "metrics": { + "acc@1": 77.040, + "acc@5": 93.440, + }, }, ) IMAGENET1K_V2 = Weights( @@ -746,8 +796,10 @@ class RegNet_X_1_6GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 9190136, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "acc@1": 79.668, - "acc@5": 94.922, + "metrics": { + "acc@1": 79.668, + "acc@5": 94.922, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -761,8 +813,10 @@ class RegNet_X_3_2GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 15296552, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "acc@1": 78.364, - "acc@5": 93.992, + "metrics": { + "acc@1": 78.364, + "acc@5": 93.992, + }, }, ) IMAGENET1K_V2 = Weights( @@ -772,8 +826,10 @@ class RegNet_X_3_2GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 15296552, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 81.196, - "acc@5": 95.430, + "metrics": { + "acc@1": 81.196, + "acc@5": 95.430, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -787,8 +843,10 @@ class RegNet_X_8GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 39572648, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "acc@1": 79.344, - "acc@5": 94.686, + "metrics": { + "acc@1": 79.344, + "acc@5": 94.686, + }, }, ) IMAGENET1K_V2 = Weights( @@ -798,8 +856,10 @@ class RegNet_X_8GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 39572648, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 81.682, - "acc@5": 95.678, + "metrics": { + "acc@1": 81.682, + "acc@5": 95.678, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -813,8 +873,10 @@ class RegNet_X_16GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 54278536, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "acc@1": 80.058, - "acc@5": 94.944, + "metrics": { + "acc@1": 80.058, + "acc@5": 94.944, + }, }, ) IMAGENET1K_V2 = Weights( @@ -824,8 +886,10 @@ class RegNet_X_16GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 54278536, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 82.716, - "acc@5": 96.196, + "metrics": { + "acc@1": 82.716, + "acc@5": 96.196, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -839,8 +903,10 @@ class RegNet_X_32GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 107811560, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#large-models", - "acc@1": 80.622, - "acc@5": 95.248, + "metrics": { + "acc@1": 80.622, + "acc@5": 95.248, + }, }, ) IMAGENET1K_V2 = Weights( @@ -850,8 +916,10 @@ class RegNet_X_32GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 107811560, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 83.014, - "acc@5": 96.288, + "metrics": { + "acc@1": 83.014, + "acc@5": 96.288, + }, }, ) DEFAULT = IMAGENET1K_V2 diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py index 25990e0d4d4..ae8edaff3c6 100644 --- a/torchvision/models/resnet.py +++ b/torchvision/models/resnet.py @@ -315,8 +315,10 @@ class ResNet18_Weights(WeightsEnum): **_COMMON_META, "num_params": 11689512, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "acc@1": 69.758, - "acc@5": 89.078, + "metrics": { + "acc@1": 69.758, + "acc@5": 89.078, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -330,8 +332,10 @@ class ResNet34_Weights(WeightsEnum): **_COMMON_META, "num_params": 21797672, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "acc@1": 73.314, - "acc@5": 91.420, + "metrics": { + "acc@1": 73.314, + "acc@5": 91.420, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -345,8 +349,10 @@ class ResNet50_Weights(WeightsEnum): **_COMMON_META, "num_params": 25557032, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "acc@1": 76.130, - "acc@5": 92.862, + "metrics": { + "acc@1": 76.130, + "acc@5": 92.862, + }, }, ) IMAGENET1K_V2 = Weights( @@ -356,8 +362,10 @@ class ResNet50_Weights(WeightsEnum): **_COMMON_META, "num_params": 25557032, "recipe": "https://github.com/pytorch/vision/issues/3995#issuecomment-1013906621", - "acc@1": 80.858, - "acc@5": 95.434, + "metrics": { + "acc@1": 80.858, + "acc@5": 95.434, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -371,8 +379,10 @@ class ResNet101_Weights(WeightsEnum): **_COMMON_META, "num_params": 44549160, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "acc@1": 77.374, - "acc@5": 93.546, + "metrics": { + "acc@1": 77.374, + "acc@5": 93.546, + }, }, ) IMAGENET1K_V2 = Weights( @@ -382,8 +392,10 @@ class ResNet101_Weights(WeightsEnum): **_COMMON_META, "num_params": 44549160, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 81.886, - "acc@5": 95.780, + "metrics": { + "acc@1": 81.886, + "acc@5": 95.780, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -397,8 +409,10 @@ class ResNet152_Weights(WeightsEnum): **_COMMON_META, "num_params": 60192808, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "acc@1": 78.312, - "acc@5": 94.046, + "metrics": { + "acc@1": 78.312, + "acc@5": 94.046, + }, }, ) IMAGENET1K_V2 = Weights( @@ -408,8 +422,10 @@ class ResNet152_Weights(WeightsEnum): **_COMMON_META, "num_params": 60192808, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 82.284, - "acc@5": 96.002, + "metrics": { + "acc@1": 82.284, + "acc@5": 96.002, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -423,8 +439,10 @@ class ResNeXt50_32X4D_Weights(WeightsEnum): **_COMMON_META, "num_params": 25028904, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnext", - "acc@1": 77.618, - "acc@5": 93.698, + "metrics": { + "acc@1": 77.618, + "acc@5": 93.698, + }, }, ) IMAGENET1K_V2 = Weights( @@ -434,8 +452,10 @@ class ResNeXt50_32X4D_Weights(WeightsEnum): **_COMMON_META, "num_params": 25028904, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 81.198, - "acc@5": 95.340, + "metrics": { + "acc@1": 81.198, + "acc@5": 95.340, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -449,8 +469,10 @@ class ResNeXt101_32X8D_Weights(WeightsEnum): **_COMMON_META, "num_params": 88791336, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnext", - "acc@1": 79.312, - "acc@5": 94.526, + "metrics": { + "acc@1": 79.312, + "acc@5": 94.526, + }, }, ) IMAGENET1K_V2 = Weights( @@ -460,8 +482,10 @@ class ResNeXt101_32X8D_Weights(WeightsEnum): **_COMMON_META, "num_params": 88791336, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "acc@1": 82.834, - "acc@5": 96.228, + "metrics": { + "acc@1": 82.834, + "acc@5": 96.228, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -475,8 +499,10 @@ class Wide_ResNet50_2_Weights(WeightsEnum): **_COMMON_META, "num_params": 68883240, "recipe": "https://github.com/pytorch/vision/pull/912#issue-445437439", - "acc@1": 78.468, - "acc@5": 94.086, + "metrics": { + "acc@1": 78.468, + "acc@5": 94.086, + }, }, ) IMAGENET1K_V2 = Weights( @@ -486,8 +512,10 @@ class Wide_ResNet50_2_Weights(WeightsEnum): **_COMMON_META, "num_params": 68883240, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "acc@1": 81.602, - "acc@5": 95.758, + "metrics": { + "acc@1": 81.602, + "acc@5": 95.758, + }, }, ) DEFAULT = IMAGENET1K_V2 @@ -501,8 +529,10 @@ class Wide_ResNet101_2_Weights(WeightsEnum): **_COMMON_META, "num_params": 126886696, "recipe": "https://github.com/pytorch/vision/pull/912#issue-445437439", - "acc@1": 78.848, - "acc@5": 94.284, + "metrics": { + "acc@1": 78.848, + "acc@5": 94.284, + }, }, ) IMAGENET1K_V2 = Weights( @@ -512,8 +542,10 @@ class Wide_ResNet101_2_Weights(WeightsEnum): **_COMMON_META, "num_params": 126886696, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "acc@1": 82.510, - "acc@5": 96.020, + "metrics": { + "acc@1": 82.510, + "acc@5": 96.020, + }, }, ) DEFAULT = IMAGENET1K_V2 diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index f4fbab128bb..e8b39165f9b 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -142,8 +142,10 @@ class DeepLabV3_ResNet50_Weights(WeightsEnum): **_COMMON_META, "num_params": 42004074, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_resnet50", - "mIoU": 66.4, - "acc": 92.4, + "metrics": { + "miou": 66.4, + "pixel_acc": 92.4, + }, }, ) DEFAULT = COCO_WITH_VOC_LABELS_V1 @@ -157,8 +159,10 @@ class DeepLabV3_ResNet101_Weights(WeightsEnum): **_COMMON_META, "num_params": 60996202, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#fcn_resnet101", - "mIoU": 67.4, - "acc": 92.4, + "metrics": { + "miou": 67.4, + "pixel_acc": 92.4, + }, }, ) DEFAULT = COCO_WITH_VOC_LABELS_V1 @@ -172,8 +176,10 @@ class DeepLabV3_MobileNet_V3_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 11029328, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_mobilenet_v3_large", - "mIoU": 60.3, - "acc": 91.2, + "metrics": { + "miou": 60.3, + "pixel_acc": 91.2, + }, }, ) DEFAULT = COCO_WITH_VOC_LABELS_V1 diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index 09a62c97089..dddd50ae4b8 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -61,8 +61,10 @@ class FCN_ResNet50_Weights(WeightsEnum): **_COMMON_META, "num_params": 35322218, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#fcn_resnet50", - "mIoU": 60.5, - "acc": 91.4, + "metrics": { + "miou": 60.5, + "pixel_acc": 91.4, + }, }, ) DEFAULT = COCO_WITH_VOC_LABELS_V1 @@ -76,8 +78,10 @@ class FCN_ResNet101_Weights(WeightsEnum): **_COMMON_META, "num_params": 54314346, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_resnet101", - "mIoU": 63.7, - "acc": 91.9, + "metrics": { + "miou": 63.7, + "pixel_acc": 91.9, + }, }, ) DEFAULT = COCO_WITH_VOC_LABELS_V1 diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py index 48107f13466..316c91d2cea 100644 --- a/torchvision/models/segmentation/lraspp.py +++ b/torchvision/models/segmentation/lraspp.py @@ -102,8 +102,10 @@ class LRASPP_MobileNet_V3_Large_Weights(WeightsEnum): "categories": _VOC_CATEGORIES, "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#lraspp_mobilenet_v3_large", - "mIoU": 57.9, - "acc": 91.2, + "metrics": { + "miou": 57.9, + "pixel_acc": 91.2, + }, }, ) DEFAULT = COCO_WITH_VOC_LABELS_V1 diff --git a/torchvision/models/shufflenetv2.py b/torchvision/models/shufflenetv2.py index c4f6f2466b0..77f07438b20 100644 --- a/torchvision/models/shufflenetv2.py +++ b/torchvision/models/shufflenetv2.py @@ -197,8 +197,10 @@ class ShuffleNet_V2_X0_5_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 1366792, - "acc@1": 69.362, - "acc@5": 88.316, + "metrics": { + "acc@1": 69.362, + "acc@5": 88.316, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -211,8 +213,10 @@ class ShuffleNet_V2_X1_0_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 2278604, - "acc@1": 60.552, - "acc@5": 81.746, + "metrics": { + "acc@1": 60.552, + "acc@5": 81.746, + }, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/squeezenet.py b/torchvision/models/squeezenet.py index 43fe8a516e3..c01b4d576d6 100644 --- a/torchvision/models/squeezenet.py +++ b/torchvision/models/squeezenet.py @@ -128,8 +128,10 @@ class SqueezeNet1_0_Weights(WeightsEnum): **_COMMON_META, "min_size": (21, 21), "num_params": 1248424, - "acc@1": 58.092, - "acc@5": 80.420, + "metrics": { + "acc@1": 58.092, + "acc@5": 80.420, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -143,8 +145,10 @@ class SqueezeNet1_1_Weights(WeightsEnum): **_COMMON_META, "min_size": (17, 17), "num_params": 1235496, - "acc@1": 58.178, - "acc@5": 80.624, + "metrics": { + "acc@1": 58.178, + "acc@5": 80.624, + }, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/vgg.py b/torchvision/models/vgg.py index 89e08b331f6..68cbae7957d 100644 --- a/torchvision/models/vgg.py +++ b/torchvision/models/vgg.py @@ -120,8 +120,10 @@ class VGG11_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 132863336, - "acc@1": 69.020, - "acc@5": 88.628, + "metrics": { + "acc@1": 69.020, + "acc@5": 88.628, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -134,8 +136,10 @@ class VGG11_BN_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 132868840, - "acc@1": 70.370, - "acc@5": 89.810, + "metrics": { + "acc@1": 70.370, + "acc@5": 89.810, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -148,8 +152,10 @@ class VGG13_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 133047848, - "acc@1": 69.928, - "acc@5": 89.246, + "metrics": { + "acc@1": 69.928, + "acc@5": 89.246, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -162,8 +168,10 @@ class VGG13_BN_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 133053736, - "acc@1": 71.586, - "acc@5": 90.374, + "metrics": { + "acc@1": 71.586, + "acc@5": 90.374, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -176,8 +184,10 @@ class VGG16_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 138357544, - "acc@1": 71.592, - "acc@5": 90.382, + "metrics": { + "acc@1": 71.592, + "acc@5": 90.382, + }, }, ) # We port the features of a VGG16 backbone trained by amdegroot because unlike the one on TorchVision, it uses the @@ -196,8 +206,10 @@ class VGG16_Weights(WeightsEnum): "num_params": 138357544, "categories": None, "recipe": "https://github.com/amdegroot/ssd.pytorch#training-ssd", - "acc@1": float("nan"), - "acc@5": float("nan"), + "metrics": { + "acc@1": float("nan"), + "acc@5": float("nan"), + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -210,8 +222,10 @@ class VGG16_BN_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 138365992, - "acc@1": 73.360, - "acc@5": 91.516, + "metrics": { + "acc@1": 73.360, + "acc@5": 91.516, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -224,8 +238,10 @@ class VGG19_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 143667240, - "acc@1": 72.376, - "acc@5": 90.876, + "metrics": { + "acc@1": 72.376, + "acc@5": 90.876, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -238,8 +254,10 @@ class VGG19_BN_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 143678248, - "acc@1": 74.218, - "acc@5": 91.842, + "metrics": { + "acc@1": 74.218, + "acc@5": 91.842, + }, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index dab837571bd..777057a088a 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -322,8 +322,10 @@ class R3D_18_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 33371472, - "acc@1": 52.75, - "acc@5": 75.45, + "metrics": { + "acc@1": 52.75, + "acc@5": 75.45, + }, }, ) DEFAULT = KINETICS400_V1 @@ -336,8 +338,10 @@ class MC3_18_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 11695440, - "acc@1": 53.90, - "acc@5": 76.29, + "metrics": { + "acc@1": 53.90, + "acc@5": 76.29, + }, }, ) DEFAULT = KINETICS400_V1 @@ -350,8 +354,10 @@ class R2Plus1D_18_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 31505325, - "acc@1": 57.50, - "acc@5": 78.81, + "metrics": { + "acc@1": 57.50, + "acc@5": 78.81, + }, }, ) DEFAULT = KINETICS400_V1 diff --git a/torchvision/models/vision_transformer.py b/torchvision/models/vision_transformer.py index f85404c4bde..60ad2d4924a 100644 --- a/torchvision/models/vision_transformer.py +++ b/torchvision/models/vision_transformer.py @@ -328,8 +328,10 @@ class ViT_B_16_Weights(WeightsEnum): "num_params": 86567656, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_16", - "acc@1": 81.072, - "acc@5": 95.318, + "metrics": { + "acc@1": 81.072, + "acc@5": 95.318, + }, }, ) IMAGENET1K_SWAG_E2E_V1 = Weights( @@ -344,8 +346,10 @@ class ViT_B_16_Weights(WeightsEnum): **_COMMON_SWAG_META, "num_params": 86859496, "min_size": (384, 384), - "acc@1": 85.304, - "acc@5": 97.650, + "metrics": { + "acc@1": 85.304, + "acc@5": 97.650, + }, }, ) IMAGENET1K_SWAG_LINEAR_V1 = Weights( @@ -361,8 +365,10 @@ class ViT_B_16_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 86567656, "min_size": (224, 224), - "acc@1": 81.886, - "acc@5": 96.180, + "metrics": { + "acc@1": 81.886, + "acc@5": 96.180, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -377,8 +383,10 @@ class ViT_B_32_Weights(WeightsEnum): "num_params": 88224232, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_32", - "acc@1": 75.912, - "acc@5": 92.466, + "metrics": { + "acc@1": 75.912, + "acc@5": 92.466, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -393,8 +401,10 @@ class ViT_L_16_Weights(WeightsEnum): "num_params": 304326632, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_16", - "acc@1": 79.662, - "acc@5": 94.638, + "metrics": { + "acc@1": 79.662, + "acc@5": 94.638, + }, }, ) IMAGENET1K_SWAG_E2E_V1 = Weights( @@ -409,8 +419,10 @@ class ViT_L_16_Weights(WeightsEnum): **_COMMON_SWAG_META, "num_params": 305174504, "min_size": (512, 512), - "acc@1": 88.064, - "acc@5": 98.512, + "metrics": { + "acc@1": 88.064, + "acc@5": 98.512, + }, }, ) IMAGENET1K_SWAG_LINEAR_V1 = Weights( @@ -426,8 +438,10 @@ class ViT_L_16_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 304326632, "min_size": (224, 224), - "acc@1": 85.146, - "acc@5": 97.422, + "metrics": { + "acc@1": 85.146, + "acc@5": 97.422, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -442,8 +456,10 @@ class ViT_L_32_Weights(WeightsEnum): "num_params": 306535400, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_32", - "acc@1": 76.972, - "acc@5": 93.07, + "metrics": { + "acc@1": 76.972, + "acc@5": 93.07, + }, }, ) DEFAULT = IMAGENET1K_V1 @@ -462,8 +478,10 @@ class ViT_H_14_Weights(WeightsEnum): **_COMMON_SWAG_META, "num_params": 633470440, "min_size": (518, 518), - "acc@1": 88.552, - "acc@5": 98.694, + "metrics": { + "acc@1": 88.552, + "acc@5": 98.694, + }, }, ) IMAGENET1K_SWAG_LINEAR_V1 = Weights( @@ -479,8 +497,10 @@ class ViT_H_14_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 632045800, "min_size": (224, 224), - "acc@1": 85.708, - "acc@5": 97.730, + "metrics": { + "acc@1": 85.708, + "acc@5": 97.730, + }, }, ) DEFAULT = IMAGENET1K_SWAG_E2E_V1