diff --git a/docs/source/conf.py b/docs/source/conf.py index e4db34c3889..014eb3c3ae9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -334,25 +334,22 @@ def inject_weight_metadata(app, what, name, obj, options, lines): lines.append("") for field in obj: - lines += [f"**{str(field)}**:", ""] - - table = [] - - # the `meta` dict contains another embedded `metrics` dict. To - # simplify the table generation below, we create the - # `meta_with_metrics` dict, where the metrics dict has been "flattened" meta = copy(field.meta) - metrics = meta.pop("metrics", {}) - meta_with_metrics = dict(meta, **metrics) - lines += [meta_with_metrics.pop("_docs")] + lines += [f"**{str(field)}**:", ""] + lines += [meta.pop("_docs")] if field == obj.DEFAULT: lines += [f"Also available as ``{obj.__name__}.DEFAULT``."] - lines += [""] - for k, v in meta_with_metrics.items(): + table = [] + metrics = meta.pop("_metrics") + for dataset, dataset_metrics in metrics.items(): + for metric_name, metric_value in dataset_metrics.items(): + table.append((f"{metric_name} (on {dataset})", str(metric_value))) + + for k, v in meta.items(): if k in {"recipe", "license"}: v = f"`link <{v}>`__" elif k == "min_size": @@ -374,7 +371,7 @@ def inject_weight_metadata(app, what, name, obj, options, lines): lines.append("") -def generate_weights_table(module, table_name, metrics, include_patterns=None, exclude_patterns=None): +def generate_weights_table(module, table_name, metrics, dataset, include_patterns=None, exclude_patterns=None): weights_endswith = "_QuantizedWeights" if module.__name__.split(".")[-1] == "quantization" else "_Weights" weight_enums = [getattr(module, name) for name in dir(module) if name.endswith(weights_endswith)] weights = [w for weight_enum in weight_enums for w in weight_enum] @@ -391,7 +388,7 @@ def generate_weights_table(module, table_name, metrics, include_patterns=None, e content = [ ( f":class:`{w} <{type(w).__name__}>`", - *(w.meta["metrics"][metric] for metric in metrics_keys), + *(w.meta["_metrics"][dataset][metric] for metric in metrics_keys), f"{w.meta['num_params']/1e6:.1f}M", f"`link <{w.meta['recipe']}>`__", ) @@ -408,29 +405,45 @@ def generate_weights_table(module, table_name, metrics, include_patterns=None, e table_file.write(f"{textwrap.indent(table, ' ' * 4)}\n\n") -generate_weights_table(module=M, table_name="classification", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")]) generate_weights_table( - module=M.quantization, table_name="classification_quant", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")] + module=M, table_name="classification", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], dataset="ImageNet-1K" +) +generate_weights_table( + module=M.quantization, + table_name="classification_quant", + metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], + dataset="ImageNet-1K", ) generate_weights_table( - module=M.detection, table_name="detection", metrics=[("box_map", "Box MAP")], exclude_patterns=["Mask", "Keypoint"] + module=M.detection, + table_name="detection", + metrics=[("box_map", "Box MAP")], + exclude_patterns=["Mask", "Keypoint"], + dataset="COCO-val2017", ) generate_weights_table( module=M.detection, table_name="instance_segmentation", metrics=[("box_map", "Box MAP"), ("mask_map", "Mask MAP")], + dataset="COCO-val2017", include_patterns=["Mask"], ) generate_weights_table( module=M.detection, table_name="detection_keypoint", metrics=[("box_map", "Box MAP"), ("kp_map", "Keypoint MAP")], + dataset="COCO-val2017", include_patterns=["Keypoint"], ) generate_weights_table( - module=M.segmentation, table_name="segmentation", metrics=[("miou", "Mean IoU"), ("pixel_acc", "pixelwise Acc")] + module=M.segmentation, + table_name="segmentation", + metrics=[("miou", "Mean IoU"), ("pixel_acc", "pixelwise Acc")], + dataset="COCO-val2017-VOC-labels", +) +generate_weights_table( + module=M.video, table_name="video", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], dataset="Kinetics-400" ) -generate_weights_table(module=M.video, table_name="video", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")]) def setup(app): diff --git a/test/test_extended_models.py b/test/test_extended_models.py index 7acdd1c0ca5..408a8c0514c 100644 --- a/test/test_extended_models.py +++ b/test/test_extended_models.py @@ -85,7 +85,7 @@ def test_schema_meta_validation(model_fn): "categories", "keypoint_names", "license", - "metrics", + "_metrics", "min_size", "num_params", "recipe", @@ -93,19 +93,23 @@ def test_schema_meta_validation(model_fn): "_docs", } # mandatory fields for each computer vision task - classification_fields = {"categories", ("metrics", "acc@1"), ("metrics", "acc@5")} + classification_fields = {"categories", ("_metrics", "ImageNet-1K", "acc@1"), ("_metrics", "ImageNet-1K", "acc@5")} defaults = { - "all": {"metrics", "min_size", "num_params", "recipe", "_docs"}, + "all": {"_metrics", "min_size", "num_params", "recipe", "_docs"}, "models": classification_fields, - "detection": {"categories", ("metrics", "box_map")}, + "detection": {"categories", ("_metrics", "COCO-val2017", "box_map")}, "quantization": classification_fields | {"backend", "unquantized"}, - "segmentation": {"categories", ("metrics", "miou"), ("metrics", "pixel_acc")}, - "video": classification_fields, + "segmentation": { + "categories", + ("_metrics", "COCO-val2017-VOC-labels", "miou"), + ("_metrics", "COCO-val2017-VOC-labels", "pixel_acc"), + }, + "video": {"categories", ("_metrics", "Kinetics-400", "acc@1"), ("_metrics", "Kinetics-400", "acc@5")}, "optical_flow": set(), } model_name = model_fn.__name__ module_name = model_fn.__module__.split(".")[-2] - fields = defaults["all"] | defaults[module_name] + expected_fields = defaults["all"] | defaults[module_name] weights_enum = _get_model_weights(model_fn) if len(weights_enum) == 0: @@ -115,7 +119,13 @@ def test_schema_meta_validation(model_fn): incorrect_params = [] bad_names = [] for w in weights_enum: - missing_fields = fields - (set(w.meta.keys()) | set(("metrics", x) for x in w.meta.get("metrics", {}).keys())) + actual_fields = set(w.meta.keys()) + actual_fields |= set( + ("_metrics", dataset, metric_key) + for dataset in w.meta.get("_metrics", {}).keys() + for metric_key in w.meta.get("_metrics", {}).get(dataset, {}).keys() + ) + missing_fields = expected_fields - actual_fields unsupported_fields = set(w.meta.keys()) - permitted_fields if missing_fields or unsupported_fields: problematic_weights[w] = {"missing": missing_fields, "unsupported": unsupported_fields} diff --git a/torchvision/models/alexnet.py b/torchvision/models/alexnet.py index 733928bb5e3..6c461a501c9 100644 --- a/torchvision/models/alexnet.py +++ b/torchvision/models/alexnet.py @@ -61,9 +61,11 @@ class AlexNet_Weights(WeightsEnum): "min_size": (63, 63), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#alexnet-and-vgg", - "metrics": { - "acc@1": 56.522, - "acc@5": 79.066, + "_metrics": { + "ImageNet-1K": { + "acc@1": 56.522, + "acc@5": 79.066, + } }, "_docs": """ These weights reproduce closely the results of the paper using a simplified training recipe. diff --git a/torchvision/models/convnext.py b/torchvision/models/convnext.py index f9aa8d3b1c9..4cd75690df4 100644 --- a/torchvision/models/convnext.py +++ b/torchvision/models/convnext.py @@ -222,9 +222,11 @@ class ConvNeXt_Tiny_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 28589128, - "metrics": { - "acc@1": 82.520, - "acc@5": 96.146, + "_metrics": { + "ImageNet-1K": { + "acc@1": 82.520, + "acc@5": 96.146, + } }, }, ) @@ -238,9 +240,11 @@ class ConvNeXt_Small_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 50223688, - "metrics": { - "acc@1": 83.616, - "acc@5": 96.650, + "_metrics": { + "ImageNet-1K": { + "acc@1": 83.616, + "acc@5": 96.650, + } }, }, ) @@ -254,9 +258,11 @@ class ConvNeXt_Base_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 88591464, - "metrics": { - "acc@1": 84.062, - "acc@5": 96.870, + "_metrics": { + "ImageNet-1K": { + "acc@1": 84.062, + "acc@5": 96.870, + } }, }, ) @@ -270,9 +276,11 @@ class ConvNeXt_Large_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 197767336, - "metrics": { - "acc@1": 84.414, - "acc@5": 96.976, + "_metrics": { + "ImageNet-1K": { + "acc@1": 84.414, + "acc@5": 96.976, + } }, }, ) diff --git a/torchvision/models/densenet.py b/torchvision/models/densenet.py index fc354a0c437..e8a66f5771b 100644 --- a/torchvision/models/densenet.py +++ b/torchvision/models/densenet.py @@ -272,9 +272,11 @@ class DenseNet121_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 7978856, - "metrics": { - "acc@1": 74.434, - "acc@5": 91.972, + "_metrics": { + "ImageNet-1K": { + "acc@1": 74.434, + "acc@5": 91.972, + } }, }, ) @@ -288,9 +290,11 @@ class DenseNet161_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 28681000, - "metrics": { - "acc@1": 77.138, - "acc@5": 93.560, + "_metrics": { + "ImageNet-1K": { + "acc@1": 77.138, + "acc@5": 93.560, + } }, }, ) @@ -304,9 +308,11 @@ class DenseNet169_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 14149480, - "metrics": { - "acc@1": 75.600, - "acc@5": 92.806, + "_metrics": { + "ImageNet-1K": { + "acc@1": 75.600, + "acc@5": 92.806, + } }, }, ) @@ -320,9 +326,11 @@ class DenseNet201_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 20013928, - "metrics": { - "acc@1": 76.896, - "acc@5": 93.370, + "_metrics": { + "ImageNet-1K": { + "acc@1": 76.896, + "acc@5": 93.370, + } }, }, ) diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index f768089666a..191acecf69f 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -383,8 +383,10 @@ class FasterRCNN_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 41755286, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-resnet-50-fpn", - "metrics": { - "box_map": 37.0, + "_metrics": { + "COCO-val2017": { + "box_map": 37.0, + } }, "_docs": """These weights were produced by following a similar training recipe as on the paper.""", }, @@ -400,8 +402,10 @@ class FasterRCNN_ResNet50_FPN_V2_Weights(WeightsEnum): **_COMMON_META, "num_params": 43712278, "recipe": "https://github.com/pytorch/vision/pull/5763", - "metrics": { - "box_map": 46.7, + "_metrics": { + "COCO-val2017": { + "box_map": 46.7, + } }, "_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""", }, @@ -417,8 +421,10 @@ class FasterRCNN_MobileNet_V3_Large_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 19386354, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-fpn", - "metrics": { - "box_map": 32.8, + "_metrics": { + "COCO-val2017": { + "box_map": 32.8, + } }, "_docs": """These weights were produced by following a similar training recipe as on the paper.""", }, @@ -434,8 +440,10 @@ class FasterRCNN_MobileNet_V3_Large_320_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 19386354, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-320-fpn", - "metrics": { - "box_map": 22.8, + "_metrics": { + "COCO-val2017": { + "box_map": 22.8, + } }, "_docs": """These weights were produced by following a similar training recipe as on the paper.""", }, diff --git a/torchvision/models/detection/fcos.py b/torchvision/models/detection/fcos.py index 4780a93f731..63f42259ce6 100644 --- a/torchvision/models/detection/fcos.py +++ b/torchvision/models/detection/fcos.py @@ -658,8 +658,10 @@ class FCOS_ResNet50_FPN_Weights(WeightsEnum): "categories": _COCO_CATEGORIES, "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#fcos-resnet-50-fpn", - "metrics": { - "box_map": 39.2, + "_metrics": { + "COCO-val2017": { + "box_map": 39.2, + } }, "_docs": """These weights were produced by following a similar training recipe as on the paper.""", }, diff --git a/torchvision/models/detection/keypoint_rcnn.py b/torchvision/models/detection/keypoint_rcnn.py index 4932e21b474..c14ec2c7244 100644 --- a/torchvision/models/detection/keypoint_rcnn.py +++ b/torchvision/models/detection/keypoint_rcnn.py @@ -322,9 +322,11 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 59137258, "recipe": "https://github.com/pytorch/vision/issues/1606", - "metrics": { - "box_map": 50.6, - "kp_map": 61.1, + "_metrics": { + "COCO-val2017": { + "box_map": 50.6, + "kp_map": 61.1, + } }, "_docs": """ These weights were produced by following a similar training recipe as on the paper but use a checkpoint @@ -339,9 +341,11 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 59137258, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#keypoint-r-cnn", - "metrics": { - "box_map": 54.6, - "kp_map": 65.0, + "_metrics": { + "COCO-val2017": { + "box_map": 54.6, + "kp_map": 65.0, + } }, "_docs": """These weights were produced by following a similar training recipe as on the paper.""", }, diff --git a/torchvision/models/detection/mask_rcnn.py b/torchvision/models/detection/mask_rcnn.py index 71450e287e4..3988b5b1bcc 100644 --- a/torchvision/models/detection/mask_rcnn.py +++ b/torchvision/models/detection/mask_rcnn.py @@ -364,9 +364,11 @@ class MaskRCNN_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 44401393, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#mask-r-cnn", - "metrics": { - "box_map": 37.9, - "mask_map": 34.6, + "_metrics": { + "COCO-val2017": { + "box_map": 37.9, + "mask_map": 34.6, + } }, "_docs": """These weights were produced by following a similar training recipe as on the paper.""", }, @@ -382,9 +384,11 @@ class MaskRCNN_ResNet50_FPN_V2_Weights(WeightsEnum): **_COMMON_META, "num_params": 46359409, "recipe": "https://github.com/pytorch/vision/pull/5773", - "metrics": { - "box_map": 47.4, - "mask_map": 41.8, + "_metrics": { + "COCO-val2017": { + "box_map": 47.4, + "mask_map": 41.8, + } }, "_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""", }, diff --git a/torchvision/models/detection/retinanet.py b/torchvision/models/detection/retinanet.py index 0f44a482cde..ea463f4c51e 100644 --- a/torchvision/models/detection/retinanet.py +++ b/torchvision/models/detection/retinanet.py @@ -687,8 +687,10 @@ class RetinaNet_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 34014999, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#retinanet", - "metrics": { - "box_map": 36.4, + "_metrics": { + "COCO-val2017": { + "box_map": 36.4, + } }, "_docs": """These weights were produced by following a similar training recipe as on the paper.""", }, @@ -704,8 +706,10 @@ class RetinaNet_ResNet50_FPN_V2_Weights(WeightsEnum): **_COMMON_META, "num_params": 38198935, "recipe": "https://github.com/pytorch/vision/pull/5756", - "metrics": { - "box_map": 41.5, + "_metrics": { + "COCO-val2017": { + "box_map": 41.5, + } }, "_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""", }, diff --git a/torchvision/models/detection/ssd.py b/torchvision/models/detection/ssd.py index f0bd01a5879..4ec56f76366 100644 --- a/torchvision/models/detection/ssd.py +++ b/torchvision/models/detection/ssd.py @@ -34,8 +34,10 @@ class SSD300_VGG16_Weights(WeightsEnum): "categories": _COCO_CATEGORIES, "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssd300-vgg16", - "metrics": { - "box_map": 25.1, + "_metrics": { + "COCO-val2017": { + "box_map": 25.1, + } }, "_docs": """These weights were produced by following a similar training recipe as on the paper.""", }, diff --git a/torchvision/models/detection/ssdlite.py b/torchvision/models/detection/ssdlite.py index 7fdcb70b673..4d721e87208 100644 --- a/torchvision/models/detection/ssdlite.py +++ b/torchvision/models/detection/ssdlite.py @@ -193,8 +193,10 @@ class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum): "categories": _COCO_CATEGORIES, "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssdlite320-mobilenetv3-large", - "metrics": { - "box_map": 21.3, + "_metrics": { + "COCO-val2017": { + "box_map": 21.3, + } }, "_docs": """These weights were produced by following a similar training recipe as on the paper.""", }, diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index 4b911dbfaba..bfd59aee951 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -458,9 +458,11 @@ class EfficientNet_B0_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 5288548, - "metrics": { - "acc@1": 77.692, - "acc@5": 93.532, + "_metrics": { + "ImageNet-1K": { + "acc@1": 77.692, + "acc@5": 93.532, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -478,9 +480,11 @@ class EfficientNet_B1_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 7794184, - "metrics": { - "acc@1": 78.642, - "acc@5": 94.186, + "_metrics": { + "ImageNet-1K": { + "acc@1": 78.642, + "acc@5": 94.186, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -494,9 +498,11 @@ class EfficientNet_B1_Weights(WeightsEnum): **_COMMON_META_V1, "num_params": 7794184, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-lr-wd-crop-tuning", - "metrics": { - "acc@1": 79.838, - "acc@5": 94.934, + "_metrics": { + "ImageNet-1K": { + "acc@1": 79.838, + "acc@5": 94.934, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -518,9 +524,11 @@ class EfficientNet_B2_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 9109994, - "metrics": { - "acc@1": 80.608, - "acc@5": 95.310, + "_metrics": { + "ImageNet-1K": { + "acc@1": 80.608, + "acc@5": 95.310, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -538,9 +546,11 @@ class EfficientNet_B3_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 12233232, - "metrics": { - "acc@1": 82.008, - "acc@5": 96.054, + "_metrics": { + "ImageNet-1K": { + "acc@1": 82.008, + "acc@5": 96.054, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -558,9 +568,11 @@ class EfficientNet_B4_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 19341616, - "metrics": { - "acc@1": 83.384, - "acc@5": 96.594, + "_metrics": { + "ImageNet-1K": { + "acc@1": 83.384, + "acc@5": 96.594, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -578,9 +590,11 @@ class EfficientNet_B5_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 30389784, - "metrics": { - "acc@1": 83.444, - "acc@5": 96.628, + "_metrics": { + "ImageNet-1K": { + "acc@1": 83.444, + "acc@5": 96.628, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -598,9 +612,11 @@ class EfficientNet_B6_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 43040704, - "metrics": { - "acc@1": 84.008, - "acc@5": 96.916, + "_metrics": { + "ImageNet-1K": { + "acc@1": 84.008, + "acc@5": 96.916, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -618,9 +634,11 @@ class EfficientNet_B7_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 66347960, - "metrics": { - "acc@1": 84.122, - "acc@5": 96.908, + "_metrics": { + "ImageNet-1K": { + "acc@1": 84.122, + "acc@5": 96.908, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -640,9 +658,11 @@ class EfficientNet_V2_S_Weights(WeightsEnum): meta={ **_COMMON_META_V2, "num_params": 21458488, - "metrics": { - "acc@1": 84.228, - "acc@5": 96.878, + "_metrics": { + "ImageNet-1K": { + "acc@1": 84.228, + "acc@5": 96.878, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -666,9 +686,11 @@ class EfficientNet_V2_M_Weights(WeightsEnum): meta={ **_COMMON_META_V2, "num_params": 54139356, - "metrics": { - "acc@1": 85.112, - "acc@5": 97.156, + "_metrics": { + "ImageNet-1K": { + "acc@1": 85.112, + "acc@5": 97.156, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -695,9 +717,11 @@ class EfficientNet_V2_L_Weights(WeightsEnum): meta={ **_COMMON_META_V2, "num_params": 118515272, - "metrics": { - "acc@1": 85.808, - "acc@5": 97.788, + "_metrics": { + "ImageNet-1K": { + "acc@1": 85.808, + "acc@5": 97.788, + } }, "_docs": """These weights are ported from the original paper.""", }, diff --git a/torchvision/models/googlenet.py b/torchvision/models/googlenet.py index 94923dd2a48..5b0a91d4791 100644 --- a/torchvision/models/googlenet.py +++ b/torchvision/models/googlenet.py @@ -284,9 +284,11 @@ class GoogLeNet_Weights(WeightsEnum): "min_size": (15, 15), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#googlenet", - "metrics": { - "acc@1": 69.778, - "acc@5": 89.530, + "_metrics": { + "ImageNet-1K": { + "acc@1": 69.778, + "acc@5": 89.530, + } }, "_docs": """These weights are ported from the original paper.""", }, diff --git a/torchvision/models/inception.py b/torchvision/models/inception.py index b034d3aa79a..9207485085f 100644 --- a/torchvision/models/inception.py +++ b/torchvision/models/inception.py @@ -416,9 +416,11 @@ class Inception_V3_Weights(WeightsEnum): "min_size": (75, 75), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#inception-v3", - "metrics": { - "acc@1": 77.294, - "acc@5": 93.450, + "_metrics": { + "ImageNet-1K": { + "acc@1": 77.294, + "acc@5": 93.450, + } }, "_docs": """These weights are ported from the original paper.""", }, diff --git a/torchvision/models/mnasnet.py b/torchvision/models/mnasnet.py index eb26a7ea5c0..8286674d232 100644 --- a/torchvision/models/mnasnet.py +++ b/torchvision/models/mnasnet.py @@ -225,9 +225,11 @@ class MNASNet0_5_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 2218512, - "metrics": { - "acc@1": 67.734, - "acc@5": 87.490, + "_metrics": { + "ImageNet-1K": { + "acc@1": 67.734, + "acc@5": 87.490, + } }, "_docs": """These weights reproduce closely the results of the paper.""", }, @@ -243,9 +245,11 @@ class MNASNet0_75_Weights(WeightsEnum): **_COMMON_META, "recipe": "https://github.com/pytorch/vision/pull/6019", "num_params": 3170208, - "metrics": { - "acc@1": 71.180, - "acc@5": 90.496, + "_metrics": { + "ImageNet-1K": { + "acc@1": 71.180, + "acc@5": 90.496, + } }, "_docs": """ These weights were trained from scratch by using TorchVision's `new training recipe @@ -263,9 +267,11 @@ class MNASNet1_0_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 4383312, - "metrics": { - "acc@1": 73.456, - "acc@5": 91.510, + "_metrics": { + "ImageNet-1K": { + "acc@1": 73.456, + "acc@5": 91.510, + } }, "_docs": """These weights reproduce closely the results of the paper.""", }, @@ -281,9 +287,11 @@ class MNASNet1_3_Weights(WeightsEnum): **_COMMON_META, "recipe": "https://github.com/pytorch/vision/pull/6019", "num_params": 6282256, - "metrics": { - "acc@1": 76.506, - "acc@5": 93.522, + "_metrics": { + "ImageNet-1K": { + "acc@1": 76.506, + "acc@5": 93.522, + } }, "_docs": """ These weights were trained from scratch by using TorchVision's `new training recipe diff --git a/torchvision/models/mobilenetv2.py b/torchvision/models/mobilenetv2.py index b27d305dc82..4c4a7d1e293 100644 --- a/torchvision/models/mobilenetv2.py +++ b/torchvision/models/mobilenetv2.py @@ -208,9 +208,11 @@ class MobileNet_V2_Weights(WeightsEnum): meta={ **_COMMON_META, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv2", - "metrics": { - "acc@1": 71.878, - "acc@5": 90.286, + "_metrics": { + "ImageNet-1K": { + "acc@1": 71.878, + "acc@5": 90.286, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -221,9 +223,11 @@ class MobileNet_V2_Weights(WeightsEnum): meta={ **_COMMON_META, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-reg-tuning", - "metrics": { - "acc@1": 72.154, - "acc@5": 90.822, + "_metrics": { + "ImageNet-1K": { + "acc@1": 72.154, + "acc@5": 90.822, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py index 465d4e58ac8..dfdd529bfc2 100644 --- a/torchvision/models/mobilenetv3.py +++ b/torchvision/models/mobilenetv3.py @@ -317,9 +317,11 @@ class MobileNet_V3_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5483032, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv3-large--small", - "metrics": { - "acc@1": 74.042, - "acc@5": 91.340, + "_metrics": { + "ImageNet-1K": { + "acc@1": 74.042, + "acc@5": 91.340, + } }, "_docs": """These weights were trained from scratch by using a simple training recipe.""", }, @@ -331,9 +333,11 @@ class MobileNet_V3_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5483032, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-reg-tuning", - "metrics": { - "acc@1": 75.274, - "acc@5": 92.566, + "_metrics": { + "ImageNet-1K": { + "acc@1": 75.274, + "acc@5": 92.566, + } }, "_docs": """ These weights improve marginally upon the results of the original paper by using a modified version of @@ -353,9 +357,11 @@ class MobileNet_V3_Small_Weights(WeightsEnum): **_COMMON_META, "num_params": 2542856, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv3-large--small", - "metrics": { - "acc@1": 67.668, - "acc@5": 87.402, + "_metrics": { + "ImageNet-1K": { + "acc@1": 67.668, + "acc@5": 87.402, + } }, "_docs": """ These weights improve upon the results of the original paper by using a simple training recipe. diff --git a/torchvision/models/optical_flow/raft.py b/torchvision/models/optical_flow/raft.py index 6c194d6e6ab..b382906517d 100644 --- a/torchvision/models/optical_flow/raft.py +++ b/torchvision/models/optical_flow/raft.py @@ -525,11 +525,10 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", - "metrics": { - "sintel_train_cleanpass_epe": 1.4411, - "sintel_train_finalpass_epe": 2.7894, - "kitti_train_per_image_epe": 5.0172, - "kitti_train_fl_all": 17.4506, + "_metrics": { + "Sintel-Train-Cleanpass": {"epe": 1.4411}, + "Sintel-Train-Finalpass": {"epe": 2.7894}, + "Kitti-Train": {"per_image_epe": 5.0172, "fl_all": 17.4506}, }, "_docs": """These weights were ported from the original paper. They are trained on Chairs + Things.""", }, @@ -542,11 +541,10 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", - "metrics": { - "sintel_train_cleanpass_epe": 1.3822, - "sintel_train_finalpass_epe": 2.7161, - "kitti_train_per_image_epe": 4.5118, - "kitti_train_fl_all": 16.0679, + "_metrics": { + "Sintel-Train-Cleanpass": {"epe": 1.3822}, + "Sintel-Train-Finalpass": {"epe": 2.7161}, + "Kitti-Train": {"per_image_epe": 4.5118, "fl_all": 16.0679}, }, "_docs": """These weights were trained from scratch on Chairs + Things.""", }, @@ -560,9 +558,9 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", - "metrics": { - "sintel_test_cleanpass_epe": 1.94, - "sintel_test_finalpass_epe": 3.18, + "_metrics": { + "Sintel-Test-Cleanpass": {"epe": 1.94}, + "Sintel-Test-Finalpass": {"epe": 3.18}, }, "_docs": """ These weights were ported from the original paper. They are trained on Chairs + Things and fine-tuned on @@ -578,9 +576,9 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", - "metrics": { - "sintel_test_cleanpass_epe": 1.819, - "sintel_test_finalpass_epe": 3.067, + "_metrics": { + "Sintel-Test-Cleanpass": {"epe": 1.819}, + "Sintel-Test-Finalpass": {"epe": 3.067}, }, "_docs": """ These weights were trained from scratch on Chairs + Things and fine-tuned on Sintel (C+T+S+K+H). @@ -596,8 +594,8 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", - "metrics": { - "kitti_test_fl_all": 5.10, + "_metrics": { + "Kitti-Test": {"fl_all": 5.10}, }, "_docs": """ These weights were ported from the original paper. They are trained on Chairs + Things, fine-tuned on @@ -613,8 +611,8 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", - "metrics": { - "kitti_test_fl_all": 5.19, + "_metrics": { + "Kitti-Test": {"fl_all": 5.19}, }, "_docs": """ These weights were trained from scratch on Chairs + Things, fine-tuned on Sintel and then on Kitti. @@ -634,11 +632,10 @@ class Raft_Small_Weights(WeightsEnum): **_COMMON_META, "num_params": 990162, "recipe": "https://github.com/princeton-vl/RAFT", - "metrics": { - "sintel_train_cleanpass_epe": 2.1231, - "sintel_train_finalpass_epe": 3.2790, - "kitti_train_per_image_epe": 7.6557, - "kitti_train_fl_all": 25.2801, + "_metrics": { + "Sintel-Train-Cleanpass": {"epe": 2.1231}, + "Sintel-Train-Finalpass": {"epe": 3.2790}, + "Kitti-Train": {"per_image_epe": 7.6557, "fl_all": 25.2801}, }, "_docs": """These weights were ported from the original paper. They are trained on Chairs + Things.""", }, @@ -650,11 +647,10 @@ class Raft_Small_Weights(WeightsEnum): **_COMMON_META, "num_params": 990162, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", - "metrics": { - "sintel_train_cleanpass_epe": 1.9901, - "sintel_train_finalpass_epe": 3.2831, - "kitti_train_per_image_epe": 7.5978, - "kitti_train_fl_all": 25.2369, + "_metrics": { + "Sintel-Train-Cleanpass": {"epe": 1.9901}, + "Sintel-Train-Finalpass": {"epe": 3.2831}, + "Kitti-Train": {"per_image_epe": 7.5978, "fl_all": 25.2369}, }, "_docs": """These weights were trained from scratch on Chairs + Things.""", }, diff --git a/torchvision/models/quantization/googlenet.py b/torchvision/models/quantization/googlenet.py index c95e5ec0a9b..644df8ae496 100644 --- a/torchvision/models/quantization/googlenet.py +++ b/torchvision/models/quantization/googlenet.py @@ -117,9 +117,11 @@ class GoogLeNet_QuantizedWeights(WeightsEnum): "backend": "fbgemm", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models", "unquantized": GoogLeNet_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 69.826, - "acc@5": 89.404, + "_metrics": { + "ImageNet-1K": { + "acc@1": 69.826, + "acc@5": 89.404, + } }, "_docs": """ These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized diff --git a/torchvision/models/quantization/inception.py b/torchvision/models/quantization/inception.py index e535c32e3d8..ba4b21d4112 100644 --- a/torchvision/models/quantization/inception.py +++ b/torchvision/models/quantization/inception.py @@ -183,9 +183,11 @@ class Inception_V3_QuantizedWeights(WeightsEnum): "backend": "fbgemm", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models", "unquantized": Inception_V3_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 77.176, - "acc@5": 93.354, + "_metrics": { + "ImageNet-1K": { + "acc@1": 77.176, + "acc@5": 93.354, + } }, "_docs": """ These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized diff --git a/torchvision/models/quantization/mobilenetv2.py b/torchvision/models/quantization/mobilenetv2.py index 5169609aeba..936e9bcc1b1 100644 --- a/torchvision/models/quantization/mobilenetv2.py +++ b/torchvision/models/quantization/mobilenetv2.py @@ -75,9 +75,11 @@ class MobileNet_V2_QuantizedWeights(WeightsEnum): "backend": "qnnpack", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#qat-mobilenetv2", "unquantized": MobileNet_V2_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 71.658, - "acc@5": 90.150, + "_metrics": { + "ImageNet-1K": { + "acc@1": 71.658, + "acc@5": 90.150, + } }, "_docs": """ These weights were produced by doing Quantization Aware Training (eager mode) on top of the unquantized diff --git a/torchvision/models/quantization/mobilenetv3.py b/torchvision/models/quantization/mobilenetv3.py index 1f3edb05f91..94036143138 100644 --- a/torchvision/models/quantization/mobilenetv3.py +++ b/torchvision/models/quantization/mobilenetv3.py @@ -169,9 +169,11 @@ class MobileNet_V3_Large_QuantizedWeights(WeightsEnum): "backend": "qnnpack", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#qat-mobilenetv3", "unquantized": MobileNet_V3_Large_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 73.004, - "acc@5": 90.858, + "_metrics": { + "ImageNet-1K": { + "acc@1": 73.004, + "acc@5": 90.858, + } }, "_docs": """ These weights were produced by doing Quantization Aware Training (eager mode) on top of the unquantized diff --git a/torchvision/models/quantization/resnet.py b/torchvision/models/quantization/resnet.py index 5af87a1e586..891b608ce01 100644 --- a/torchvision/models/quantization/resnet.py +++ b/torchvision/models/quantization/resnet.py @@ -169,9 +169,11 @@ class ResNet18_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 11689512, "unquantized": ResNet18_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 69.494, - "acc@5": 88.882, + "_metrics": { + "ImageNet-1K": { + "acc@1": 69.494, + "acc@5": 88.882, + } }, }, ) @@ -186,9 +188,11 @@ class ResNet50_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 25557032, "unquantized": ResNet50_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 75.920, - "acc@5": 92.814, + "_metrics": { + "ImageNet-1K": { + "acc@1": 75.920, + "acc@5": 92.814, + } }, }, ) @@ -199,9 +203,11 @@ class ResNet50_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 25557032, "unquantized": ResNet50_Weights.IMAGENET1K_V2, - "metrics": { - "acc@1": 80.282, - "acc@5": 94.976, + "_metrics": { + "ImageNet-1K": { + "acc@1": 80.282, + "acc@5": 94.976, + } }, }, ) @@ -216,9 +222,11 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 88791336, "unquantized": ResNeXt101_32X8D_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 78.986, - "acc@5": 94.480, + "_metrics": { + "ImageNet-1K": { + "acc@1": 78.986, + "acc@5": 94.480, + } }, }, ) @@ -229,9 +237,11 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 88791336, "unquantized": ResNeXt101_32X8D_Weights.IMAGENET1K_V2, - "metrics": { - "acc@1": 82.574, - "acc@5": 96.132, + "_metrics": { + "ImageNet-1K": { + "acc@1": 82.574, + "acc@5": 96.132, + } }, }, ) @@ -247,9 +257,11 @@ class ResNeXt101_64X4D_QuantizedWeights(WeightsEnum): "num_params": 83455272, "recipe": "https://github.com/pytorch/vision/pull/5935", "unquantized": ResNeXt101_64X4D_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 82.898, - "acc@5": 96.326, + "_metrics": { + "ImageNet-1K": { + "acc@1": 82.898, + "acc@5": 96.326, + } }, }, ) diff --git a/torchvision/models/quantization/shufflenetv2.py b/torchvision/models/quantization/shufflenetv2.py index 523f8739b2e..781591ae118 100644 --- a/torchvision/models/quantization/shufflenetv2.py +++ b/torchvision/models/quantization/shufflenetv2.py @@ -133,9 +133,11 @@ class ShuffleNet_V2_X0_5_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 1366792, "unquantized": ShuffleNet_V2_X0_5_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 57.972, - "acc@5": 79.780, + "_metrics": { + "ImageNet-1K": { + "acc@1": 57.972, + "acc@5": 79.780, + } }, }, ) @@ -150,9 +152,11 @@ class ShuffleNet_V2_X1_0_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 2278604, "unquantized": ShuffleNet_V2_X1_0_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 68.360, - "acc@5": 87.582, + "_metrics": { + "ImageNet-1K": { + "acc@1": 68.360, + "acc@5": 87.582, + } }, }, ) @@ -168,9 +172,11 @@ class ShuffleNet_V2_X1_5_QuantizedWeights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5906", "num_params": 3503624, "unquantized": ShuffleNet_V2_X1_5_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 72.052, - "acc@5": 90.700, + "_metrics": { + "ImageNet-1K": { + "acc@1": 72.052, + "acc@5": 90.700, + } }, }, ) @@ -186,9 +192,11 @@ class ShuffleNet_V2_X2_0_QuantizedWeights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5906", "num_params": 7393996, "unquantized": ShuffleNet_V2_X2_0_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 75.354, - "acc@5": 92.488, + "_metrics": { + "ImageNet-1K": { + "acc@1": 75.354, + "acc@5": 92.488, + } }, }, ) diff --git a/torchvision/models/regnet.py b/torchvision/models/regnet.py index be80c9acf35..d2958e8686c 100644 --- a/torchvision/models/regnet.py +++ b/torchvision/models/regnet.py @@ -422,9 +422,11 @@ class RegNet_Y_400MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 4344144, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "metrics": { - "acc@1": 74.046, - "acc@5": 91.716, + "_metrics": { + "ImageNet-1K": { + "acc@1": 74.046, + "acc@5": 91.716, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -436,9 +438,11 @@ class RegNet_Y_400MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 4344144, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 75.804, - "acc@5": 92.742, + "_metrics": { + "ImageNet-1K": { + "acc@1": 75.804, + "acc@5": 92.742, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -458,9 +462,11 @@ class RegNet_Y_800MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 6432512, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "metrics": { - "acc@1": 76.420, - "acc@5": 93.136, + "_metrics": { + "ImageNet-1K": { + "acc@1": 76.420, + "acc@5": 93.136, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -472,9 +478,11 @@ class RegNet_Y_800MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 6432512, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 78.828, - "acc@5": 94.502, + "_metrics": { + "ImageNet-1K": { + "acc@1": 78.828, + "acc@5": 94.502, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -494,9 +502,11 @@ class RegNet_Y_1_6GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 11202430, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "metrics": { - "acc@1": 77.950, - "acc@5": 93.966, + "_metrics": { + "ImageNet-1K": { + "acc@1": 77.950, + "acc@5": 93.966, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -508,9 +518,11 @@ class RegNet_Y_1_6GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 11202430, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 80.876, - "acc@5": 95.444, + "_metrics": { + "ImageNet-1K": { + "acc@1": 80.876, + "acc@5": 95.444, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -530,9 +542,11 @@ class RegNet_Y_3_2GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 19436338, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "metrics": { - "acc@1": 78.948, - "acc@5": 94.576, + "_metrics": { + "ImageNet-1K": { + "acc@1": 78.948, + "acc@5": 94.576, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -544,9 +558,11 @@ class RegNet_Y_3_2GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 19436338, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 81.982, - "acc@5": 95.972, + "_metrics": { + "ImageNet-1K": { + "acc@1": 81.982, + "acc@5": 95.972, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -566,9 +582,11 @@ class RegNet_Y_8GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 39381472, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "metrics": { - "acc@1": 80.032, - "acc@5": 95.048, + "_metrics": { + "ImageNet-1K": { + "acc@1": 80.032, + "acc@5": 95.048, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -580,9 +598,11 @@ class RegNet_Y_8GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 39381472, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 82.828, - "acc@5": 96.330, + "_metrics": { + "ImageNet-1K": { + "acc@1": 82.828, + "acc@5": 96.330, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -602,9 +622,11 @@ class RegNet_Y_16GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 83590140, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#large-models", - "metrics": { - "acc@1": 80.424, - "acc@5": 95.240, + "_metrics": { + "ImageNet-1K": { + "acc@1": 80.424, + "acc@5": 95.240, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -616,9 +638,11 @@ class RegNet_Y_16GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 83590140, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 82.886, - "acc@5": 96.328, + "_metrics": { + "ImageNet-1K": { + "acc@1": 82.886, + "acc@5": 96.328, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -635,9 +659,11 @@ class RegNet_Y_16GF_Weights(WeightsEnum): meta={ **_COMMON_SWAG_META, "num_params": 83590140, - "metrics": { - "acc@1": 86.012, - "acc@5": 98.054, + "_metrics": { + "ImageNet-1K": { + "acc@1": 86.012, + "acc@5": 98.054, + } }, "_docs": """ These weights are learnt via transfer learning by end-to-end fine-tuning the original @@ -654,9 +680,11 @@ class RegNet_Y_16GF_Weights(WeightsEnum): **_COMMON_SWAG_META, "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 83590140, - "metrics": { - "acc@1": 83.976, - "acc@5": 97.244, + "_metrics": { + "ImageNet-1K": { + "acc@1": 83.976, + "acc@5": 97.244, + } }, "_docs": """ These weights are composed of the original frozen `SWAG `_ trunk @@ -675,9 +703,11 @@ class RegNet_Y_32GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 145046770, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#large-models", - "metrics": { - "acc@1": 80.878, - "acc@5": 95.340, + "_metrics": { + "ImageNet-1K": { + "acc@1": 80.878, + "acc@5": 95.340, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -689,9 +719,11 @@ class RegNet_Y_32GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 145046770, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 83.368, - "acc@5": 96.498, + "_metrics": { + "ImageNet-1K": { + "acc@1": 83.368, + "acc@5": 96.498, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -708,9 +740,11 @@ class RegNet_Y_32GF_Weights(WeightsEnum): meta={ **_COMMON_SWAG_META, "num_params": 145046770, - "metrics": { - "acc@1": 86.838, - "acc@5": 98.362, + "_metrics": { + "ImageNet-1K": { + "acc@1": 86.838, + "acc@5": 98.362, + } }, "_docs": """ These weights are learnt via transfer learning by end-to-end fine-tuning the original @@ -727,9 +761,11 @@ class RegNet_Y_32GF_Weights(WeightsEnum): **_COMMON_SWAG_META, "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 145046770, - "metrics": { - "acc@1": 84.622, - "acc@5": 97.480, + "_metrics": { + "ImageNet-1K": { + "acc@1": 84.622, + "acc@5": 97.480, + } }, "_docs": """ These weights are composed of the original frozen `SWAG `_ trunk @@ -749,9 +785,11 @@ class RegNet_Y_128GF_Weights(WeightsEnum): meta={ **_COMMON_SWAG_META, "num_params": 644812894, - "metrics": { - "acc@1": 88.228, - "acc@5": 98.682, + "_metrics": { + "ImageNet-1K": { + "acc@1": 88.228, + "acc@5": 98.682, + } }, "_docs": """ These weights are learnt via transfer learning by end-to-end fine-tuning the original @@ -768,9 +806,11 @@ class RegNet_Y_128GF_Weights(WeightsEnum): **_COMMON_SWAG_META, "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 644812894, - "metrics": { - "acc@1": 86.068, - "acc@5": 97.844, + "_metrics": { + "ImageNet-1K": { + "acc@1": 86.068, + "acc@5": 97.844, + } }, "_docs": """ These weights are composed of the original frozen `SWAG `_ trunk @@ -789,9 +829,11 @@ class RegNet_X_400MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 5495976, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "metrics": { - "acc@1": 72.834, - "acc@5": 90.950, + "_metrics": { + "ImageNet-1K": { + "acc@1": 72.834, + "acc@5": 90.950, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -803,9 +845,11 @@ class RegNet_X_400MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 5495976, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "metrics": { - "acc@1": 74.864, - "acc@5": 92.322, + "_metrics": { + "ImageNet-1K": { + "acc@1": 74.864, + "acc@5": 92.322, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -825,9 +869,11 @@ class RegNet_X_800MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 7259656, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "metrics": { - "acc@1": 75.212, - "acc@5": 92.348, + "_metrics": { + "ImageNet-1K": { + "acc@1": 75.212, + "acc@5": 92.348, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -839,9 +885,11 @@ class RegNet_X_800MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 7259656, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "metrics": { - "acc@1": 77.522, - "acc@5": 93.826, + "_metrics": { + "ImageNet-1K": { + "acc@1": 77.522, + "acc@5": 93.826, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -861,9 +909,11 @@ class RegNet_X_1_6GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 9190136, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "metrics": { - "acc@1": 77.040, - "acc@5": 93.440, + "_metrics": { + "ImageNet-1K": { + "acc@1": 77.040, + "acc@5": 93.440, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -875,9 +925,11 @@ class RegNet_X_1_6GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 9190136, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "metrics": { - "acc@1": 79.668, - "acc@5": 94.922, + "_metrics": { + "ImageNet-1K": { + "acc@1": 79.668, + "acc@5": 94.922, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -897,9 +949,11 @@ class RegNet_X_3_2GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 15296552, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "metrics": { - "acc@1": 78.364, - "acc@5": 93.992, + "_metrics": { + "ImageNet-1K": { + "acc@1": 78.364, + "acc@5": 93.992, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -911,9 +965,11 @@ class RegNet_X_3_2GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 15296552, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 81.196, - "acc@5": 95.430, + "_metrics": { + "ImageNet-1K": { + "acc@1": 81.196, + "acc@5": 95.430, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -933,9 +989,11 @@ class RegNet_X_8GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 39572648, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "metrics": { - "acc@1": 79.344, - "acc@5": 94.686, + "_metrics": { + "ImageNet-1K": { + "acc@1": 79.344, + "acc@5": 94.686, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -947,9 +1005,11 @@ class RegNet_X_8GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 39572648, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 81.682, - "acc@5": 95.678, + "_metrics": { + "ImageNet-1K": { + "acc@1": 81.682, + "acc@5": 95.678, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -969,9 +1029,11 @@ class RegNet_X_16GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 54278536, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "metrics": { - "acc@1": 80.058, - "acc@5": 94.944, + "_metrics": { + "ImageNet-1K": { + "acc@1": 80.058, + "acc@5": 94.944, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -983,9 +1045,11 @@ class RegNet_X_16GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 54278536, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 82.716, - "acc@5": 96.196, + "_metrics": { + "ImageNet-1K": { + "acc@1": 82.716, + "acc@5": 96.196, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -1005,9 +1069,11 @@ class RegNet_X_32GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 107811560, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#large-models", - "metrics": { - "acc@1": 80.622, - "acc@5": 95.248, + "_metrics": { + "ImageNet-1K": { + "acc@1": 80.622, + "acc@5": 95.248, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -1019,9 +1085,11 @@ class RegNet_X_32GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 107811560, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 83.014, - "acc@5": 96.288, + "_metrics": { + "ImageNet-1K": { + "acc@1": 83.014, + "acc@5": 96.288, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py index b46c07ef600..39662b1cc0a 100644 --- a/torchvision/models/resnet.py +++ b/torchvision/models/resnet.py @@ -317,9 +317,11 @@ class ResNet18_Weights(WeightsEnum): **_COMMON_META, "num_params": 11689512, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "metrics": { - "acc@1": 69.758, - "acc@5": 89.078, + "_metrics": { + "ImageNet-1K": { + "acc@1": 69.758, + "acc@5": 89.078, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -335,9 +337,11 @@ class ResNet34_Weights(WeightsEnum): **_COMMON_META, "num_params": 21797672, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "metrics": { - "acc@1": 73.314, - "acc@5": 91.420, + "_metrics": { + "ImageNet-1K": { + "acc@1": 73.314, + "acc@5": 91.420, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -353,9 +357,11 @@ class ResNet50_Weights(WeightsEnum): **_COMMON_META, "num_params": 25557032, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "metrics": { - "acc@1": 76.130, - "acc@5": 92.862, + "_metrics": { + "ImageNet-1K": { + "acc@1": 76.130, + "acc@5": 92.862, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -367,9 +373,11 @@ class ResNet50_Weights(WeightsEnum): **_COMMON_META, "num_params": 25557032, "recipe": "https://github.com/pytorch/vision/issues/3995#issuecomment-1013906621", - "metrics": { - "acc@1": 80.858, - "acc@5": 95.434, + "_metrics": { + "ImageNet-1K": { + "acc@1": 80.858, + "acc@5": 95.434, + } }, "_docs": """ These weights improve upon the results of the original paper by using TorchVision's `new training recipe @@ -388,9 +396,11 @@ class ResNet101_Weights(WeightsEnum): **_COMMON_META, "num_params": 44549160, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "metrics": { - "acc@1": 77.374, - "acc@5": 93.546, + "_metrics": { + "ImageNet-1K": { + "acc@1": 77.374, + "acc@5": 93.546, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -402,9 +412,11 @@ class ResNet101_Weights(WeightsEnum): **_COMMON_META, "num_params": 44549160, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 81.886, - "acc@5": 95.780, + "_metrics": { + "ImageNet-1K": { + "acc@1": 81.886, + "acc@5": 95.780, + } }, "_docs": """ These weights improve upon the results of the original paper by using TorchVision's `new training recipe @@ -423,9 +435,11 @@ class ResNet152_Weights(WeightsEnum): **_COMMON_META, "num_params": 60192808, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "metrics": { - "acc@1": 78.312, - "acc@5": 94.046, + "_metrics": { + "ImageNet-1K": { + "acc@1": 78.312, + "acc@5": 94.046, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -437,9 +451,11 @@ class ResNet152_Weights(WeightsEnum): **_COMMON_META, "num_params": 60192808, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 82.284, - "acc@5": 96.002, + "_metrics": { + "ImageNet-1K": { + "acc@1": 82.284, + "acc@5": 96.002, + } }, "_docs": """ These weights improve upon the results of the original paper by using TorchVision's `new training recipe @@ -458,9 +474,11 @@ class ResNeXt50_32X4D_Weights(WeightsEnum): **_COMMON_META, "num_params": 25028904, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnext", - "metrics": { - "acc@1": 77.618, - "acc@5": 93.698, + "_metrics": { + "ImageNet-1K": { + "acc@1": 77.618, + "acc@5": 93.698, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -472,9 +490,11 @@ class ResNeXt50_32X4D_Weights(WeightsEnum): **_COMMON_META, "num_params": 25028904, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 81.198, - "acc@5": 95.340, + "_metrics": { + "ImageNet-1K": { + "acc@1": 81.198, + "acc@5": 95.340, + } }, "_docs": """ These weights improve upon the results of the original paper by using TorchVision's `new training recipe @@ -493,9 +513,11 @@ class ResNeXt101_32X8D_Weights(WeightsEnum): **_COMMON_META, "num_params": 88791336, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnext", - "metrics": { - "acc@1": 79.312, - "acc@5": 94.526, + "_metrics": { + "ImageNet-1K": { + "acc@1": 79.312, + "acc@5": 94.526, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -507,9 +529,11 @@ class ResNeXt101_32X8D_Weights(WeightsEnum): **_COMMON_META, "num_params": 88791336, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "metrics": { - "acc@1": 82.834, - "acc@5": 96.228, + "_metrics": { + "ImageNet-1K": { + "acc@1": 82.834, + "acc@5": 96.228, + } }, "_docs": """ These weights improve upon the results of the original paper by using TorchVision's `new training recipe @@ -528,9 +552,11 @@ class ResNeXt101_64X4D_Weights(WeightsEnum): **_COMMON_META, "num_params": 83455272, "recipe": "https://github.com/pytorch/vision/pull/5935", - "metrics": { - "acc@1": 83.246, - "acc@5": 96.454, + "_metrics": { + "ImageNet-1K": { + "acc@1": 83.246, + "acc@5": 96.454, + } }, "_docs": """ These weights were trained from scratch by using TorchVision's `new training recipe @@ -549,9 +575,11 @@ class Wide_ResNet50_2_Weights(WeightsEnum): **_COMMON_META, "num_params": 68883240, "recipe": "https://github.com/pytorch/vision/pull/912#issue-445437439", - "metrics": { - "acc@1": 78.468, - "acc@5": 94.086, + "_metrics": { + "ImageNet-1K": { + "acc@1": 78.468, + "acc@5": 94.086, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -563,9 +591,11 @@ class Wide_ResNet50_2_Weights(WeightsEnum): **_COMMON_META, "num_params": 68883240, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "metrics": { - "acc@1": 81.602, - "acc@5": 95.758, + "_metrics": { + "ImageNet-1K": { + "acc@1": 81.602, + "acc@5": 95.758, + } }, "_docs": """ These weights improve upon the results of the original paper by using TorchVision's `new training recipe @@ -584,9 +614,11 @@ class Wide_ResNet101_2_Weights(WeightsEnum): **_COMMON_META, "num_params": 126886696, "recipe": "https://github.com/pytorch/vision/pull/912#issue-445437439", - "metrics": { - "acc@1": 78.848, - "acc@5": 94.284, + "_metrics": { + "ImageNet-1K": { + "acc@1": 78.848, + "acc@5": 94.284, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -598,9 +630,11 @@ class Wide_ResNet101_2_Weights(WeightsEnum): **_COMMON_META, "num_params": 126886696, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 82.510, - "acc@5": 96.020, + "_metrics": { + "ImageNet-1K": { + "acc@1": 82.510, + "acc@5": 96.020, + } }, "_docs": """ These weights improve upon the results of the original paper by using TorchVision's `new training recipe diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index fc7cdb36cb0..78f54cdc6d7 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -146,9 +146,11 @@ class DeepLabV3_ResNet50_Weights(WeightsEnum): **_COMMON_META, "num_params": 42004074, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_resnet50", - "metrics": { - "miou": 66.4, - "pixel_acc": 92.4, + "_metrics": { + "COCO-val2017-VOC-labels": { + "miou": 66.4, + "pixel_acc": 92.4, + } }, }, ) @@ -163,9 +165,11 @@ class DeepLabV3_ResNet101_Weights(WeightsEnum): **_COMMON_META, "num_params": 60996202, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#fcn_resnet101", - "metrics": { - "miou": 67.4, - "pixel_acc": 92.4, + "_metrics": { + "COCO-val2017-VOC-labels": { + "miou": 67.4, + "pixel_acc": 92.4, + } }, }, ) @@ -180,9 +184,11 @@ class DeepLabV3_MobileNet_V3_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 11029328, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_mobilenet_v3_large", - "metrics": { - "miou": 60.3, - "pixel_acc": 91.2, + "_metrics": { + "COCO-val2017-VOC-labels": { + "miou": 60.3, + "pixel_acc": 91.2, + } }, }, ) diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index 27c931bfc18..a1dd48c2f58 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -65,9 +65,11 @@ class FCN_ResNet50_Weights(WeightsEnum): **_COMMON_META, "num_params": 35322218, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#fcn_resnet50", - "metrics": { - "miou": 60.5, - "pixel_acc": 91.4, + "_metrics": { + "COCO-val2017-VOC-labels": { + "miou": 60.5, + "pixel_acc": 91.4, + } }, }, ) @@ -82,9 +84,11 @@ class FCN_ResNet101_Weights(WeightsEnum): **_COMMON_META, "num_params": 54314346, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_resnet101", - "metrics": { - "miou": 63.7, - "pixel_acc": 91.9, + "_metrics": { + "COCO-val2017-VOC-labels": { + "miou": 63.7, + "pixel_acc": 91.9, + } }, }, ) diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py index ed36d881ee9..ec4eba6eefc 100644 --- a/torchvision/models/segmentation/lraspp.py +++ b/torchvision/models/segmentation/lraspp.py @@ -102,9 +102,11 @@ class LRASPP_MobileNet_V3_Large_Weights(WeightsEnum): "categories": _VOC_CATEGORIES, "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#lraspp_mobilenet_v3_large", - "metrics": { - "miou": 57.9, - "pixel_acc": 91.2, + "_metrics": { + "COCO-val2017-VOC-labels": { + "miou": 57.9, + "pixel_acc": 91.2, + } }, "_docs": """ These weights were trained on a subset of COCO, using only the 20 categories that are present in the diff --git a/torchvision/models/shufflenetv2.py b/torchvision/models/shufflenetv2.py index 4e163573655..48695c70193 100644 --- a/torchvision/models/shufflenetv2.py +++ b/torchvision/models/shufflenetv2.py @@ -198,9 +198,11 @@ class ShuffleNet_V2_X0_5_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 1366792, - "metrics": { - "acc@1": 60.552, - "acc@5": 81.746, + "_metrics": { + "ImageNet-1K": { + "acc@1": 60.552, + "acc@5": 81.746, + } }, "_docs": """These weights were trained from scratch to reproduce closely the results of the paper.""", }, @@ -216,9 +218,11 @@ class ShuffleNet_V2_X1_0_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 2278604, - "metrics": { - "acc@1": 69.362, - "acc@5": 88.316, + "_metrics": { + "ImageNet-1K": { + "acc@1": 69.362, + "acc@5": 88.316, + } }, "_docs": """These weights were trained from scratch to reproduce closely the results of the paper.""", }, @@ -234,9 +238,11 @@ class ShuffleNet_V2_X1_5_Weights(WeightsEnum): **_COMMON_META, "recipe": "https://github.com/pytorch/vision/pull/5906", "num_params": 3503624, - "metrics": { - "acc@1": 72.996, - "acc@5": 91.086, + "_metrics": { + "ImageNet-1K": { + "acc@1": 72.996, + "acc@5": 91.086, + } }, "_docs": """ These weights were trained from scratch by using TorchVision's `new training recipe @@ -255,9 +261,11 @@ class ShuffleNet_V2_X2_0_Weights(WeightsEnum): **_COMMON_META, "recipe": "https://github.com/pytorch/vision/pull/5906", "num_params": 7393996, - "metrics": { - "acc@1": 76.230, - "acc@5": 93.006, + "_metrics": { + "ImageNet-1K": { + "acc@1": 76.230, + "acc@5": 93.006, + } }, "_docs": """ These weights were trained from scratch by using TorchVision's `new training recipe diff --git a/torchvision/models/squeezenet.py b/torchvision/models/squeezenet.py index a93a06fc80d..dbc0f54fb77 100644 --- a/torchvision/models/squeezenet.py +++ b/torchvision/models/squeezenet.py @@ -129,9 +129,11 @@ class SqueezeNet1_0_Weights(WeightsEnum): **_COMMON_META, "min_size": (21, 21), "num_params": 1248424, - "metrics": { - "acc@1": 58.092, - "acc@5": 80.420, + "_metrics": { + "ImageNet-1K": { + "acc@1": 58.092, + "acc@5": 80.420, + } }, }, ) @@ -146,9 +148,11 @@ class SqueezeNet1_1_Weights(WeightsEnum): **_COMMON_META, "min_size": (17, 17), "num_params": 1235496, - "metrics": { - "acc@1": 58.178, - "acc@5": 80.624, + "_metrics": { + "ImageNet-1K": { + "acc@1": 58.178, + "acc@5": 80.624, + } }, }, ) diff --git a/torchvision/models/swin_transformer.py b/torchvision/models/swin_transformer.py index f60db5096a7..69a0d5fd2fd 100644 --- a/torchvision/models/swin_transformer.py +++ b/torchvision/models/swin_transformer.py @@ -417,9 +417,11 @@ class Swin_T_Weights(WeightsEnum): "num_params": 28288354, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swintransformer", - "metrics": { - "acc@1": 81.358, - "acc@5": 95.526, + "_metrics": { + "ImageNet-1K": { + "acc@1": 81.358, + "acc@5": 95.526, + } }, "_docs": """These weights reproduce closely the results of the paper using its training recipe.""", }, diff --git a/torchvision/models/vgg.py b/torchvision/models/vgg.py index 9fb77d9bf97..937458b48cd 100644 --- a/torchvision/models/vgg.py +++ b/torchvision/models/vgg.py @@ -121,9 +121,11 @@ class VGG11_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 132863336, - "metrics": { - "acc@1": 69.020, - "acc@5": 88.628, + "_metrics": { + "ImageNet-1K": { + "acc@1": 69.020, + "acc@5": 88.628, + } }, }, ) @@ -137,9 +139,11 @@ class VGG11_BN_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 132868840, - "metrics": { - "acc@1": 70.370, - "acc@5": 89.810, + "_metrics": { + "ImageNet-1K": { + "acc@1": 70.370, + "acc@5": 89.810, + } }, }, ) @@ -153,9 +157,11 @@ class VGG13_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 133047848, - "metrics": { - "acc@1": 69.928, - "acc@5": 89.246, + "_metrics": { + "ImageNet-1K": { + "acc@1": 69.928, + "acc@5": 89.246, + } }, }, ) @@ -169,9 +175,11 @@ class VGG13_BN_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 133053736, - "metrics": { - "acc@1": 71.586, - "acc@5": 90.374, + "_metrics": { + "ImageNet-1K": { + "acc@1": 71.586, + "acc@5": 90.374, + } }, }, ) @@ -185,9 +193,11 @@ class VGG16_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 138357544, - "metrics": { - "acc@1": 71.592, - "acc@5": 90.382, + "_metrics": { + "ImageNet-1K": { + "acc@1": 71.592, + "acc@5": 90.382, + } }, }, ) @@ -205,9 +215,11 @@ class VGG16_Weights(WeightsEnum): "num_params": 138357544, "categories": None, "recipe": "https://github.com/amdegroot/ssd.pytorch#training-ssd", - "metrics": { - "acc@1": float("nan"), - "acc@5": float("nan"), + "_metrics": { + "ImageNet-1K": { + "acc@1": float("nan"), + "acc@5": float("nan"), + } }, "_docs": """ These weights can't be used for classification because they are missing values in the `classifier` @@ -226,9 +238,11 @@ class VGG16_BN_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 138365992, - "metrics": { - "acc@1": 73.360, - "acc@5": 91.516, + "_metrics": { + "ImageNet-1K": { + "acc@1": 73.360, + "acc@5": 91.516, + } }, }, ) @@ -242,9 +256,11 @@ class VGG19_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 143667240, - "metrics": { - "acc@1": 72.376, - "acc@5": 90.876, + "_metrics": { + "ImageNet-1K": { + "acc@1": 72.376, + "acc@5": 90.876, + } }, }, ) @@ -258,9 +274,11 @@ class VGG19_BN_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 143678248, - "metrics": { - "acc@1": 74.218, - "acc@5": 91.842, + "_metrics": { + "ImageNet-1K": { + "acc@1": 74.218, + "acc@5": 91.842, + } }, }, ) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index ec45092f532..8eb47418365 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -323,9 +323,11 @@ class R3D_18_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 33371472, - "metrics": { - "acc@1": 52.75, - "acc@5": 75.45, + "_metrics": { + "Kinetics-400": { + "acc@1": 52.75, + "acc@5": 75.45, + } }, }, ) @@ -339,9 +341,11 @@ class MC3_18_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 11695440, - "metrics": { - "acc@1": 53.90, - "acc@5": 76.29, + "_metrics": { + "Kinetics-400": { + "acc@1": 53.90, + "acc@5": 76.29, + } }, }, ) @@ -355,9 +359,11 @@ class R2Plus1D_18_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 31505325, - "metrics": { - "acc@1": 57.50, - "acc@5": 78.81, + "_metrics": { + "Kinetics-400": { + "acc@1": 57.50, + "acc@5": 78.81, + } }, }, ) diff --git a/torchvision/models/vision_transformer.py b/torchvision/models/vision_transformer.py index dc54b8735f0..dad2804e626 100644 --- a/torchvision/models/vision_transformer.py +++ b/torchvision/models/vision_transformer.py @@ -328,9 +328,11 @@ class ViT_B_16_Weights(WeightsEnum): "num_params": 86567656, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_16", - "metrics": { - "acc@1": 81.072, - "acc@5": 95.318, + "_metrics": { + "ImageNet-1K": { + "acc@1": 81.072, + "acc@5": 95.318, + } }, "_docs": """ These weights were trained from scratch by using a modified version of `DeIT @@ -350,9 +352,11 @@ class ViT_B_16_Weights(WeightsEnum): **_COMMON_SWAG_META, "num_params": 86859496, "min_size": (384, 384), - "metrics": { - "acc@1": 85.304, - "acc@5": 97.650, + "_metrics": { + "ImageNet-1K": { + "acc@1": 85.304, + "acc@5": 97.650, + } }, "_docs": """ These weights are learnt via transfer learning by end-to-end fine-tuning the original @@ -373,9 +377,11 @@ class ViT_B_16_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 86567656, "min_size": (224, 224), - "metrics": { - "acc@1": 81.886, - "acc@5": 96.180, + "_metrics": { + "ImageNet-1K": { + "acc@1": 81.886, + "acc@5": 96.180, + } }, "_docs": """ These weights are composed of the original frozen `SWAG `_ trunk @@ -395,9 +401,11 @@ class ViT_B_32_Weights(WeightsEnum): "num_params": 88224232, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_32", - "metrics": { - "acc@1": 75.912, - "acc@5": 92.466, + "_metrics": { + "ImageNet-1K": { + "acc@1": 75.912, + "acc@5": 92.466, + } }, "_docs": """ These weights were trained from scratch by using a modified version of `DeIT @@ -417,9 +425,11 @@ class ViT_L_16_Weights(WeightsEnum): "num_params": 304326632, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_16", - "metrics": { - "acc@1": 79.662, - "acc@5": 94.638, + "_metrics": { + "ImageNet-1K": { + "acc@1": 79.662, + "acc@5": 94.638, + } }, "_docs": """ These weights were trained from scratch by using a modified version of TorchVision's @@ -440,9 +450,11 @@ class ViT_L_16_Weights(WeightsEnum): **_COMMON_SWAG_META, "num_params": 305174504, "min_size": (512, 512), - "metrics": { - "acc@1": 88.064, - "acc@5": 98.512, + "_metrics": { + "ImageNet-1K": { + "acc@1": 88.064, + "acc@5": 98.512, + } }, "_docs": """ These weights are learnt via transfer learning by end-to-end fine-tuning the original @@ -463,9 +475,11 @@ class ViT_L_16_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 304326632, "min_size": (224, 224), - "metrics": { - "acc@1": 85.146, - "acc@5": 97.422, + "_metrics": { + "ImageNet-1K": { + "acc@1": 85.146, + "acc@5": 97.422, + } }, "_docs": """ These weights are composed of the original frozen `SWAG `_ trunk @@ -485,9 +499,11 @@ class ViT_L_32_Weights(WeightsEnum): "num_params": 306535400, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_32", - "metrics": { - "acc@1": 76.972, - "acc@5": 93.07, + "_metrics": { + "ImageNet-1K": { + "acc@1": 76.972, + "acc@5": 93.07, + } }, "_docs": """ These weights were trained from scratch by using a modified version of `DeIT @@ -511,9 +527,11 @@ class ViT_H_14_Weights(WeightsEnum): **_COMMON_SWAG_META, "num_params": 633470440, "min_size": (518, 518), - "metrics": { - "acc@1": 88.552, - "acc@5": 98.694, + "_metrics": { + "ImageNet-1K": { + "acc@1": 88.552, + "acc@5": 98.694, + } }, "_docs": """ These weights are learnt via transfer learning by end-to-end fine-tuning the original @@ -534,9 +552,11 @@ class ViT_H_14_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 632045800, "min_size": (224, 224), - "metrics": { - "acc@1": 85.708, - "acc@5": 97.730, + "_metrics": { + "ImageNet-1K": { + "acc@1": 85.708, + "acc@5": 97.730, + } }, "_docs": """ These weights are composed of the original frozen `SWAG `_ trunk