From be6b81238c9d428d5014082cb76d3dbf4de2ab53 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 18 May 2022 12:01:43 +0100 Subject: [PATCH 01/13] Classif models --- torchvision/models/alexnet.py | 8 +- torchvision/models/convnext.py | 32 ++- torchvision/models/densenet.py | 32 ++- torchvision/models/efficientnet.py | 96 +++++--- torchvision/models/googlenet.py | 8 +- torchvision/models/inception.py | 8 +- torchvision/models/mnasnet.py | 32 ++- torchvision/models/mobilenetv2.py | 16 +- torchvision/models/mobilenetv3.py | 24 +- torchvision/models/regnet.py | 272 ++++++++++++++--------- torchvision/models/resnet.py | 136 +++++++----- torchvision/models/shufflenetv2.py | 32 ++- torchvision/models/squeezenet.py | 16 +- torchvision/models/swin_transformer.py | 8 +- torchvision/models/vgg.py | 72 +++--- torchvision/models/vision_transformer.py | 80 ++++--- 16 files changed, 545 insertions(+), 327 deletions(-) diff --git a/torchvision/models/alexnet.py b/torchvision/models/alexnet.py index 733928bb5e3..fd7c57b0509 100644 --- a/torchvision/models/alexnet.py +++ b/torchvision/models/alexnet.py @@ -61,9 +61,11 @@ class AlexNet_Weights(WeightsEnum): "min_size": (63, 63), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#alexnet-and-vgg", - "metrics": { - "acc@1": 56.522, - "acc@5": 79.066, + "_metrics": { + "ImageNetV1": { + "acc@1": 56.522, + "acc@5": 79.066, + } }, "_docs": """ These weights reproduce closely the results of the paper using a simplified training recipe. diff --git a/torchvision/models/convnext.py b/torchvision/models/convnext.py index f9aa8d3b1c9..958c8e54262 100644 --- a/torchvision/models/convnext.py +++ b/torchvision/models/convnext.py @@ -222,9 +222,11 @@ class ConvNeXt_Tiny_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 28589128, - "metrics": { - "acc@1": 82.520, - "acc@5": 96.146, + "_metrics": { + "ImageNetV1": { + "acc@1": 82.520, + "acc@5": 96.146, + } }, }, ) @@ -238,9 +240,11 @@ class ConvNeXt_Small_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 50223688, - "metrics": { - "acc@1": 83.616, - "acc@5": 96.650, + "_metrics": { + "ImageNetV1": { + "acc@1": 83.616, + "acc@5": 96.650, + } }, }, ) @@ -254,9 +258,11 @@ class ConvNeXt_Base_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 88591464, - "metrics": { - "acc@1": 84.062, - "acc@5": 96.870, + "_metrics": { + "ImageNetV1": { + "acc@1": 84.062, + "acc@5": 96.870, + } }, }, ) @@ -270,9 +276,11 @@ class ConvNeXt_Large_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 197767336, - "metrics": { - "acc@1": 84.414, - "acc@5": 96.976, + "_metrics": { + "ImageNetV1": { + "acc@1": 84.414, + "acc@5": 96.976, + } }, }, ) diff --git a/torchvision/models/densenet.py b/torchvision/models/densenet.py index fc354a0c437..add2a82ee3b 100644 --- a/torchvision/models/densenet.py +++ b/torchvision/models/densenet.py @@ -272,9 +272,11 @@ class DenseNet121_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 7978856, - "metrics": { - "acc@1": 74.434, - "acc@5": 91.972, + "_metrics": { + "ImageNetV1": { + "acc@1": 74.434, + "acc@5": 91.972, + } }, }, ) @@ -288,9 +290,11 @@ class DenseNet161_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 28681000, - "metrics": { - "acc@1": 77.138, - "acc@5": 93.560, + "_metrics": { + "ImageNetV1": { + "acc@1": 77.138, + "acc@5": 93.560, + } }, }, ) @@ -304,9 +308,11 @@ class DenseNet169_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 14149480, - "metrics": { - "acc@1": 75.600, - "acc@5": 92.806, + "_metrics": { + "ImageNetV1": { + "acc@1": 75.600, + "acc@5": 92.806, + } }, }, ) @@ -320,9 +326,11 @@ class DenseNet201_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 20013928, - "metrics": { - "acc@1": 76.896, - "acc@5": 93.370, + "_metrics": { + "ImageNetV1": { + "acc@1": 76.896, + "acc@5": 93.370, + } }, }, ) diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index 4b911dbfaba..8ffcc5e444f 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -458,9 +458,11 @@ class EfficientNet_B0_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 5288548, - "metrics": { - "acc@1": 77.692, - "acc@5": 93.532, + "_metrics": { + "ImageNetV1": { + "acc@1": 77.692, + "acc@5": 93.532, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -478,9 +480,11 @@ class EfficientNet_B1_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 7794184, - "metrics": { - "acc@1": 78.642, - "acc@5": 94.186, + "_metrics": { + "ImageNetV1": { + "acc@1": 78.642, + "acc@5": 94.186, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -494,9 +498,11 @@ class EfficientNet_B1_Weights(WeightsEnum): **_COMMON_META_V1, "num_params": 7794184, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-lr-wd-crop-tuning", - "metrics": { - "acc@1": 79.838, - "acc@5": 94.934, + "_metrics": { + "ImageNetV1": { + "acc@1": 79.838, + "acc@5": 94.934, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -518,9 +524,11 @@ class EfficientNet_B2_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 9109994, - "metrics": { - "acc@1": 80.608, - "acc@5": 95.310, + "_metrics": { + "ImageNetV1": { + "acc@1": 80.608, + "acc@5": 95.310, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -538,9 +546,11 @@ class EfficientNet_B3_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 12233232, - "metrics": { - "acc@1": 82.008, - "acc@5": 96.054, + "_metrics": { + "ImageNetV1": { + "acc@1": 82.008, + "acc@5": 96.054, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -558,9 +568,11 @@ class EfficientNet_B4_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 19341616, - "metrics": { - "acc@1": 83.384, - "acc@5": 96.594, + "_metrics": { + "ImageNetV1": { + "acc@1": 83.384, + "acc@5": 96.594, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -578,9 +590,11 @@ class EfficientNet_B5_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 30389784, - "metrics": { - "acc@1": 83.444, - "acc@5": 96.628, + "_metrics": { + "ImageNetV1": { + "acc@1": 83.444, + "acc@5": 96.628, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -598,9 +612,11 @@ class EfficientNet_B6_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 43040704, - "metrics": { - "acc@1": 84.008, - "acc@5": 96.916, + "_metrics": { + "ImageNetV1": { + "acc@1": 84.008, + "acc@5": 96.916, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -618,9 +634,11 @@ class EfficientNet_B7_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 66347960, - "metrics": { - "acc@1": 84.122, - "acc@5": 96.908, + "_metrics": { + "ImageNetV1": { + "acc@1": 84.122, + "acc@5": 96.908, + } }, "_docs": """These weights are ported from the original paper.""", }, @@ -640,9 +658,11 @@ class EfficientNet_V2_S_Weights(WeightsEnum): meta={ **_COMMON_META_V2, "num_params": 21458488, - "metrics": { - "acc@1": 84.228, - "acc@5": 96.878, + "_metrics": { + "ImageNetV1": { + "acc@1": 84.228, + "acc@5": 96.878, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -666,9 +686,11 @@ class EfficientNet_V2_M_Weights(WeightsEnum): meta={ **_COMMON_META_V2, "num_params": 54139356, - "metrics": { - "acc@1": 85.112, - "acc@5": 97.156, + "_metrics": { + "ImageNetV1": { + "acc@1": 85.112, + "acc@5": 97.156, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -695,9 +717,11 @@ class EfficientNet_V2_L_Weights(WeightsEnum): meta={ **_COMMON_META_V2, "num_params": 118515272, - "metrics": { - "acc@1": 85.808, - "acc@5": 97.788, + "_metrics": { + "ImageNetV1": { + "acc@1": 85.808, + "acc@5": 97.788, + } }, "_docs": """These weights are ported from the original paper.""", }, diff --git a/torchvision/models/googlenet.py b/torchvision/models/googlenet.py index 94923dd2a48..809c178ae65 100644 --- a/torchvision/models/googlenet.py +++ b/torchvision/models/googlenet.py @@ -284,9 +284,11 @@ class GoogLeNet_Weights(WeightsEnum): "min_size": (15, 15), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#googlenet", - "metrics": { - "acc@1": 69.778, - "acc@5": 89.530, + "_metrics": { + "ImageNetV1": { + "acc@1": 69.778, + "acc@5": 89.530, + } }, "_docs": """These weights are ported from the original paper.""", }, diff --git a/torchvision/models/inception.py b/torchvision/models/inception.py index b034d3aa79a..6c8596a159d 100644 --- a/torchvision/models/inception.py +++ b/torchvision/models/inception.py @@ -416,9 +416,11 @@ class Inception_V3_Weights(WeightsEnum): "min_size": (75, 75), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#inception-v3", - "metrics": { - "acc@1": 77.294, - "acc@5": 93.450, + "_metrics": { + "ImageNetV1": { + "acc@1": 77.294, + "acc@5": 93.450, + } }, "_docs": """These weights are ported from the original paper.""", }, diff --git a/torchvision/models/mnasnet.py b/torchvision/models/mnasnet.py index eb26a7ea5c0..c36f18eea00 100644 --- a/torchvision/models/mnasnet.py +++ b/torchvision/models/mnasnet.py @@ -225,9 +225,11 @@ class MNASNet0_5_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 2218512, - "metrics": { - "acc@1": 67.734, - "acc@5": 87.490, + "_metrics": { + "ImageNetV1": { + "acc@1": 67.734, + "acc@5": 87.490, + } }, "_docs": """These weights reproduce closely the results of the paper.""", }, @@ -243,9 +245,11 @@ class MNASNet0_75_Weights(WeightsEnum): **_COMMON_META, "recipe": "https://github.com/pytorch/vision/pull/6019", "num_params": 3170208, - "metrics": { - "acc@1": 71.180, - "acc@5": 90.496, + "_metrics": { + "ImageNetV1": { + "acc@1": 71.180, + "acc@5": 90.496, + } }, "_docs": """ These weights were trained from scratch by using TorchVision's `new training recipe @@ -263,9 +267,11 @@ class MNASNet1_0_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 4383312, - "metrics": { - "acc@1": 73.456, - "acc@5": 91.510, + "_metrics": { + "ImageNetV1": { + "acc@1": 73.456, + "acc@5": 91.510, + } }, "_docs": """These weights reproduce closely the results of the paper.""", }, @@ -281,9 +287,11 @@ class MNASNet1_3_Weights(WeightsEnum): **_COMMON_META, "recipe": "https://github.com/pytorch/vision/pull/6019", "num_params": 6282256, - "metrics": { - "acc@1": 76.506, - "acc@5": 93.522, + "_metrics": { + "ImageNetV1": { + "acc@1": 76.506, + "acc@5": 93.522, + } }, "_docs": """ These weights were trained from scratch by using TorchVision's `new training recipe diff --git a/torchvision/models/mobilenetv2.py b/torchvision/models/mobilenetv2.py index b27d305dc82..69c22ba4201 100644 --- a/torchvision/models/mobilenetv2.py +++ b/torchvision/models/mobilenetv2.py @@ -208,9 +208,11 @@ class MobileNet_V2_Weights(WeightsEnum): meta={ **_COMMON_META, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv2", - "metrics": { - "acc@1": 71.878, - "acc@5": 90.286, + "_metrics": { + "ImageNetV1": { + "acc@1": 71.878, + "acc@5": 90.286, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -221,9 +223,11 @@ class MobileNet_V2_Weights(WeightsEnum): meta={ **_COMMON_META, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-reg-tuning", - "metrics": { - "acc@1": 72.154, - "acc@5": 90.822, + "_metrics": { + "ImageNetV1": { + "acc@1": 72.154, + "acc@5": 90.822, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py index 465d4e58ac8..74b91d69508 100644 --- a/torchvision/models/mobilenetv3.py +++ b/torchvision/models/mobilenetv3.py @@ -317,9 +317,11 @@ class MobileNet_V3_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5483032, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv3-large--small", - "metrics": { - "acc@1": 74.042, - "acc@5": 91.340, + "_metrics": { + "ImageNetV1": { + "acc@1": 74.042, + "acc@5": 91.340, + } }, "_docs": """These weights were trained from scratch by using a simple training recipe.""", }, @@ -331,9 +333,11 @@ class MobileNet_V3_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5483032, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-reg-tuning", - "metrics": { - "acc@1": 75.274, - "acc@5": 92.566, + "_metrics": { + "ImageNetV1": { + "acc@1": 75.274, + "acc@5": 92.566, + } }, "_docs": """ These weights improve marginally upon the results of the original paper by using a modified version of @@ -353,9 +357,11 @@ class MobileNet_V3_Small_Weights(WeightsEnum): **_COMMON_META, "num_params": 2542856, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv3-large--small", - "metrics": { - "acc@1": 67.668, - "acc@5": 87.402, + "_metrics": { + "ImageNetV1": { + "acc@1": 67.668, + "acc@5": 87.402, + } }, "_docs": """ These weights improve upon the results of the original paper by using a simple training recipe. diff --git a/torchvision/models/regnet.py b/torchvision/models/regnet.py index be80c9acf35..1bf4572103e 100644 --- a/torchvision/models/regnet.py +++ b/torchvision/models/regnet.py @@ -422,9 +422,11 @@ class RegNet_Y_400MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 4344144, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "metrics": { - "acc@1": 74.046, - "acc@5": 91.716, + "_metrics": { + "ImageNetV1": { + "acc@1": 74.046, + "acc@5": 91.716, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -436,9 +438,11 @@ class RegNet_Y_400MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 4344144, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 75.804, - "acc@5": 92.742, + "_metrics": { + "ImageNetV1": { + "acc@1": 75.804, + "acc@5": 92.742, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -458,9 +462,11 @@ class RegNet_Y_800MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 6432512, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "metrics": { - "acc@1": 76.420, - "acc@5": 93.136, + "_metrics": { + "ImageNetV1": { + "acc@1": 76.420, + "acc@5": 93.136, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -472,9 +478,11 @@ class RegNet_Y_800MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 6432512, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 78.828, - "acc@5": 94.502, + "_metrics": { + "ImageNetV1": { + "acc@1": 78.828, + "acc@5": 94.502, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -494,9 +502,11 @@ class RegNet_Y_1_6GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 11202430, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "metrics": { - "acc@1": 77.950, - "acc@5": 93.966, + "_metrics": { + "ImageNetV1": { + "acc@1": 77.950, + "acc@5": 93.966, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -508,9 +518,11 @@ class RegNet_Y_1_6GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 11202430, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 80.876, - "acc@5": 95.444, + "_metrics": { + "ImageNetV1": { + "acc@1": 80.876, + "acc@5": 95.444, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -530,9 +542,11 @@ class RegNet_Y_3_2GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 19436338, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "metrics": { - "acc@1": 78.948, - "acc@5": 94.576, + "_metrics": { + "ImageNetV1": { + "acc@1": 78.948, + "acc@5": 94.576, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -544,9 +558,11 @@ class RegNet_Y_3_2GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 19436338, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 81.982, - "acc@5": 95.972, + "_metrics": { + "ImageNetV1": { + "acc@1": 81.982, + "acc@5": 95.972, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -566,9 +582,11 @@ class RegNet_Y_8GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 39381472, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "metrics": { - "acc@1": 80.032, - "acc@5": 95.048, + "_metrics": { + "ImageNetV1": { + "acc@1": 80.032, + "acc@5": 95.048, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -580,9 +598,11 @@ class RegNet_Y_8GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 39381472, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 82.828, - "acc@5": 96.330, + "_metrics": { + "ImageNetV1": { + "acc@1": 82.828, + "acc@5": 96.330, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -602,9 +622,11 @@ class RegNet_Y_16GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 83590140, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#large-models", - "metrics": { - "acc@1": 80.424, - "acc@5": 95.240, + "_metrics": { + "ImageNetV1": { + "acc@1": 80.424, + "acc@5": 95.240, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -616,9 +638,11 @@ class RegNet_Y_16GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 83590140, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 82.886, - "acc@5": 96.328, + "_metrics": { + "ImageNetV1": { + "acc@1": 82.886, + "acc@5": 96.328, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -635,9 +659,11 @@ class RegNet_Y_16GF_Weights(WeightsEnum): meta={ **_COMMON_SWAG_META, "num_params": 83590140, - "metrics": { - "acc@1": 86.012, - "acc@5": 98.054, + "_metrics": { + "ImageNetV1": { + "acc@1": 86.012, + "acc@5": 98.054, + } }, "_docs": """ These weights are learnt via transfer learning by end-to-end fine-tuning the original @@ -654,9 +680,11 @@ class RegNet_Y_16GF_Weights(WeightsEnum): **_COMMON_SWAG_META, "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 83590140, - "metrics": { - "acc@1": 83.976, - "acc@5": 97.244, + "_metrics": { + "ImageNetV1": { + "acc@1": 83.976, + "acc@5": 97.244, + } }, "_docs": """ These weights are composed of the original frozen `SWAG `_ trunk @@ -675,9 +703,11 @@ class RegNet_Y_32GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 145046770, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#large-models", - "metrics": { - "acc@1": 80.878, - "acc@5": 95.340, + "_metrics": { + "ImageNetV1": { + "acc@1": 80.878, + "acc@5": 95.340, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -689,9 +719,11 @@ class RegNet_Y_32GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 145046770, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 83.368, - "acc@5": 96.498, + "_metrics": { + "ImageNetV1": { + "acc@1": 83.368, + "acc@5": 96.498, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -708,9 +740,11 @@ class RegNet_Y_32GF_Weights(WeightsEnum): meta={ **_COMMON_SWAG_META, "num_params": 145046770, - "metrics": { - "acc@1": 86.838, - "acc@5": 98.362, + "_metrics": { + "ImageNetV1": { + "acc@1": 86.838, + "acc@5": 98.362, + } }, "_docs": """ These weights are learnt via transfer learning by end-to-end fine-tuning the original @@ -727,9 +761,11 @@ class RegNet_Y_32GF_Weights(WeightsEnum): **_COMMON_SWAG_META, "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 145046770, - "metrics": { - "acc@1": 84.622, - "acc@5": 97.480, + "_metrics": { + "ImageNetV1": { + "acc@1": 84.622, + "acc@5": 97.480, + } }, "_docs": """ These weights are composed of the original frozen `SWAG `_ trunk @@ -749,9 +785,11 @@ class RegNet_Y_128GF_Weights(WeightsEnum): meta={ **_COMMON_SWAG_META, "num_params": 644812894, - "metrics": { - "acc@1": 88.228, - "acc@5": 98.682, + "_metrics": { + "ImageNetV1": { + "acc@1": 88.228, + "acc@5": 98.682, + } }, "_docs": """ These weights are learnt via transfer learning by end-to-end fine-tuning the original @@ -768,9 +806,11 @@ class RegNet_Y_128GF_Weights(WeightsEnum): **_COMMON_SWAG_META, "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 644812894, - "metrics": { - "acc@1": 86.068, - "acc@5": 97.844, + "_metrics": { + "ImageNetV1": { + "acc@1": 86.068, + "acc@5": 97.844, + } }, "_docs": """ These weights are composed of the original frozen `SWAG `_ trunk @@ -789,9 +829,11 @@ class RegNet_X_400MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 5495976, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "metrics": { - "acc@1": 72.834, - "acc@5": 90.950, + "_metrics": { + "ImageNetV1": { + "acc@1": 72.834, + "acc@5": 90.950, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -803,9 +845,11 @@ class RegNet_X_400MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 5495976, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "metrics": { - "acc@1": 74.864, - "acc@5": 92.322, + "_metrics": { + "ImageNetV1": { + "acc@1": 74.864, + "acc@5": 92.322, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -825,9 +869,11 @@ class RegNet_X_800MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 7259656, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "metrics": { - "acc@1": 75.212, - "acc@5": 92.348, + "_metrics": { + "ImageNetV1": { + "acc@1": 75.212, + "acc@5": 92.348, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -839,9 +885,11 @@ class RegNet_X_800MF_Weights(WeightsEnum): **_COMMON_META, "num_params": 7259656, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "metrics": { - "acc@1": 77.522, - "acc@5": 93.826, + "_metrics": { + "ImageNetV1": { + "acc@1": 77.522, + "acc@5": 93.826, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -861,9 +909,11 @@ class RegNet_X_1_6GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 9190136, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", - "metrics": { - "acc@1": 77.040, - "acc@5": 93.440, + "_metrics": { + "ImageNetV1": { + "acc@1": 77.040, + "acc@5": 93.440, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -875,9 +925,11 @@ class RegNet_X_1_6GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 9190136, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "metrics": { - "acc@1": 79.668, - "acc@5": 94.922, + "_metrics": { + "ImageNetV1": { + "acc@1": 79.668, + "acc@5": 94.922, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -897,9 +949,11 @@ class RegNet_X_3_2GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 15296552, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "metrics": { - "acc@1": 78.364, - "acc@5": 93.992, + "_metrics": { + "ImageNetV1": { + "acc@1": 78.364, + "acc@5": 93.992, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -911,9 +965,11 @@ class RegNet_X_3_2GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 15296552, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 81.196, - "acc@5": 95.430, + "_metrics": { + "ImageNetV1": { + "acc@1": 81.196, + "acc@5": 95.430, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -933,9 +989,11 @@ class RegNet_X_8GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 39572648, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "metrics": { - "acc@1": 79.344, - "acc@5": 94.686, + "_metrics": { + "ImageNetV1": { + "acc@1": 79.344, + "acc@5": 94.686, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -947,9 +1005,11 @@ class RegNet_X_8GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 39572648, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 81.682, - "acc@5": 95.678, + "_metrics": { + "ImageNetV1": { + "acc@1": 81.682, + "acc@5": 95.678, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -969,9 +1029,11 @@ class RegNet_X_16GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 54278536, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", - "metrics": { - "acc@1": 80.058, - "acc@5": 94.944, + "_metrics": { + "ImageNetV1": { + "acc@1": 80.058, + "acc@5": 94.944, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -983,9 +1045,11 @@ class RegNet_X_16GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 54278536, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 82.716, - "acc@5": 96.196, + "_metrics": { + "ImageNetV1": { + "acc@1": 82.716, + "acc@5": 96.196, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's @@ -1005,9 +1069,11 @@ class RegNet_X_32GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 107811560, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#large-models", - "metrics": { - "acc@1": 80.622, - "acc@5": 95.248, + "_metrics": { + "ImageNetV1": { + "acc@1": 80.622, + "acc@5": 95.248, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -1019,9 +1085,11 @@ class RegNet_X_32GF_Weights(WeightsEnum): **_COMMON_META, "num_params": 107811560, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 83.014, - "acc@5": 96.288, + "_metrics": { + "ImageNetV1": { + "acc@1": 83.014, + "acc@5": 96.288, + } }, "_docs": """ These weights improve upon the results of the original paper by using a modified version of TorchVision's diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py index b46c07ef600..3c9b007cc70 100644 --- a/torchvision/models/resnet.py +++ b/torchvision/models/resnet.py @@ -317,9 +317,11 @@ class ResNet18_Weights(WeightsEnum): **_COMMON_META, "num_params": 11689512, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "metrics": { - "acc@1": 69.758, - "acc@5": 89.078, + "_metrics": { + "ImageNetV1": { + "acc@1": 69.758, + "acc@5": 89.078, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -335,9 +337,11 @@ class ResNet34_Weights(WeightsEnum): **_COMMON_META, "num_params": 21797672, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "metrics": { - "acc@1": 73.314, - "acc@5": 91.420, + "_metrics": { + "ImageNetV1": { + "acc@1": 73.314, + "acc@5": 91.420, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -353,9 +357,11 @@ class ResNet50_Weights(WeightsEnum): **_COMMON_META, "num_params": 25557032, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "metrics": { - "acc@1": 76.130, - "acc@5": 92.862, + "_metrics": { + "ImageNetV1": { + "acc@1": 76.130, + "acc@5": 92.862, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -367,9 +373,11 @@ class ResNet50_Weights(WeightsEnum): **_COMMON_META, "num_params": 25557032, "recipe": "https://github.com/pytorch/vision/issues/3995#issuecomment-1013906621", - "metrics": { - "acc@1": 80.858, - "acc@5": 95.434, + "_metrics": { + "ImageNetV1": { + "acc@1": 80.858, + "acc@5": 95.434, + } }, "_docs": """ These weights improve upon the results of the original paper by using TorchVision's `new training recipe @@ -388,9 +396,11 @@ class ResNet101_Weights(WeightsEnum): **_COMMON_META, "num_params": 44549160, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "metrics": { - "acc@1": 77.374, - "acc@5": 93.546, + "_metrics": { + "ImageNetV1": { + "acc@1": 77.374, + "acc@5": 93.546, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -402,9 +412,11 @@ class ResNet101_Weights(WeightsEnum): **_COMMON_META, "num_params": 44549160, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 81.886, - "acc@5": 95.780, + "_metrics": { + "ImageNetV1": { + "acc@1": 81.886, + "acc@5": 95.780, + } }, "_docs": """ These weights improve upon the results of the original paper by using TorchVision's `new training recipe @@ -423,9 +435,11 @@ class ResNet152_Weights(WeightsEnum): **_COMMON_META, "num_params": 60192808, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", - "metrics": { - "acc@1": 78.312, - "acc@5": 94.046, + "_metrics": { + "ImageNetV1": { + "acc@1": 78.312, + "acc@5": 94.046, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -437,9 +451,11 @@ class ResNet152_Weights(WeightsEnum): **_COMMON_META, "num_params": 60192808, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 82.284, - "acc@5": 96.002, + "_metrics": { + "ImageNetV1": { + "acc@1": 82.284, + "acc@5": 96.002, + } }, "_docs": """ These weights improve upon the results of the original paper by using TorchVision's `new training recipe @@ -458,9 +474,11 @@ class ResNeXt50_32X4D_Weights(WeightsEnum): **_COMMON_META, "num_params": 25028904, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnext", - "metrics": { - "acc@1": 77.618, - "acc@5": 93.698, + "_metrics": { + "ImageNetV1": { + "acc@1": 77.618, + "acc@5": 93.698, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -472,9 +490,11 @@ class ResNeXt50_32X4D_Weights(WeightsEnum): **_COMMON_META, "num_params": 25028904, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 81.198, - "acc@5": 95.340, + "_metrics": { + "ImageNetV1": { + "acc@1": 81.198, + "acc@5": 95.340, + } }, "_docs": """ These weights improve upon the results of the original paper by using TorchVision's `new training recipe @@ -493,9 +513,11 @@ class ResNeXt101_32X8D_Weights(WeightsEnum): **_COMMON_META, "num_params": 88791336, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnext", - "metrics": { - "acc@1": 79.312, - "acc@5": 94.526, + "_metrics": { + "ImageNetV1": { + "acc@1": 79.312, + "acc@5": 94.526, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -507,9 +529,11 @@ class ResNeXt101_32X8D_Weights(WeightsEnum): **_COMMON_META, "num_params": 88791336, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "metrics": { - "acc@1": 82.834, - "acc@5": 96.228, + "_metrics": { + "ImageNetV1": { + "acc@1": 82.834, + "acc@5": 96.228, + } }, "_docs": """ These weights improve upon the results of the original paper by using TorchVision's `new training recipe @@ -528,9 +552,11 @@ class ResNeXt101_64X4D_Weights(WeightsEnum): **_COMMON_META, "num_params": 83455272, "recipe": "https://github.com/pytorch/vision/pull/5935", - "metrics": { - "acc@1": 83.246, - "acc@5": 96.454, + "_metrics": { + "ImageNetV1": { + "acc@1": 83.246, + "acc@5": 96.454, + } }, "_docs": """ These weights were trained from scratch by using TorchVision's `new training recipe @@ -549,9 +575,11 @@ class Wide_ResNet50_2_Weights(WeightsEnum): **_COMMON_META, "num_params": 68883240, "recipe": "https://github.com/pytorch/vision/pull/912#issue-445437439", - "metrics": { - "acc@1": 78.468, - "acc@5": 94.086, + "_metrics": { + "ImageNetV1": { + "acc@1": 78.468, + "acc@5": 94.086, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -563,9 +591,11 @@ class Wide_ResNet50_2_Weights(WeightsEnum): **_COMMON_META, "num_params": 68883240, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", - "metrics": { - "acc@1": 81.602, - "acc@5": 95.758, + "_metrics": { + "ImageNetV1": { + "acc@1": 81.602, + "acc@5": 95.758, + } }, "_docs": """ These weights improve upon the results of the original paper by using TorchVision's `new training recipe @@ -584,9 +614,11 @@ class Wide_ResNet101_2_Weights(WeightsEnum): **_COMMON_META, "num_params": 126886696, "recipe": "https://github.com/pytorch/vision/pull/912#issue-445437439", - "metrics": { - "acc@1": 78.848, - "acc@5": 94.284, + "_metrics": { + "ImageNetV1": { + "acc@1": 78.848, + "acc@5": 94.284, + } }, "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, @@ -598,9 +630,11 @@ class Wide_ResNet101_2_Weights(WeightsEnum): **_COMMON_META, "num_params": 126886696, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", - "metrics": { - "acc@1": 82.510, - "acc@5": 96.020, + "_metrics": { + "ImageNetV1": { + "acc@1": 82.510, + "acc@5": 96.020, + } }, "_docs": """ These weights improve upon the results of the original paper by using TorchVision's `new training recipe diff --git a/torchvision/models/shufflenetv2.py b/torchvision/models/shufflenetv2.py index 4e163573655..6a32df71eea 100644 --- a/torchvision/models/shufflenetv2.py +++ b/torchvision/models/shufflenetv2.py @@ -198,9 +198,11 @@ class ShuffleNet_V2_X0_5_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 1366792, - "metrics": { - "acc@1": 60.552, - "acc@5": 81.746, + "_metrics": { + "ImageNetV1": { + "acc@1": 60.552, + "acc@5": 81.746, + } }, "_docs": """These weights were trained from scratch to reproduce closely the results of the paper.""", }, @@ -216,9 +218,11 @@ class ShuffleNet_V2_X1_0_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 2278604, - "metrics": { - "acc@1": 69.362, - "acc@5": 88.316, + "_metrics": { + "ImageNetV1": { + "acc@1": 69.362, + "acc@5": 88.316, + } }, "_docs": """These weights were trained from scratch to reproduce closely the results of the paper.""", }, @@ -234,9 +238,11 @@ class ShuffleNet_V2_X1_5_Weights(WeightsEnum): **_COMMON_META, "recipe": "https://github.com/pytorch/vision/pull/5906", "num_params": 3503624, - "metrics": { - "acc@1": 72.996, - "acc@5": 91.086, + "_metrics": { + "ImageNetV1": { + "acc@1": 72.996, + "acc@5": 91.086, + } }, "_docs": """ These weights were trained from scratch by using TorchVision's `new training recipe @@ -255,9 +261,11 @@ class ShuffleNet_V2_X2_0_Weights(WeightsEnum): **_COMMON_META, "recipe": "https://github.com/pytorch/vision/pull/5906", "num_params": 7393996, - "metrics": { - "acc@1": 76.230, - "acc@5": 93.006, + "_metrics": { + "ImageNetV1": { + "acc@1": 76.230, + "acc@5": 93.006, + } }, "_docs": """ These weights were trained from scratch by using TorchVision's `new training recipe diff --git a/torchvision/models/squeezenet.py b/torchvision/models/squeezenet.py index a93a06fc80d..78fa4547e8e 100644 --- a/torchvision/models/squeezenet.py +++ b/torchvision/models/squeezenet.py @@ -129,9 +129,11 @@ class SqueezeNet1_0_Weights(WeightsEnum): **_COMMON_META, "min_size": (21, 21), "num_params": 1248424, - "metrics": { - "acc@1": 58.092, - "acc@5": 80.420, + "_metrics": { + "ImageNetV1": { + "acc@1": 58.092, + "acc@5": 80.420, + } }, }, ) @@ -146,9 +148,11 @@ class SqueezeNet1_1_Weights(WeightsEnum): **_COMMON_META, "min_size": (17, 17), "num_params": 1235496, - "metrics": { - "acc@1": 58.178, - "acc@5": 80.624, + "_metrics": { + "ImageNetV1": { + "acc@1": 58.178, + "acc@5": 80.624, + } }, }, ) diff --git a/torchvision/models/swin_transformer.py b/torchvision/models/swin_transformer.py index f60db5096a7..fa64e400408 100644 --- a/torchvision/models/swin_transformer.py +++ b/torchvision/models/swin_transformer.py @@ -417,9 +417,11 @@ class Swin_T_Weights(WeightsEnum): "num_params": 28288354, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swintransformer", - "metrics": { - "acc@1": 81.358, - "acc@5": 95.526, + "_metrics": { + "ImageNetV1": { + "acc@1": 81.358, + "acc@5": 95.526, + } }, "_docs": """These weights reproduce closely the results of the paper using its training recipe.""", }, diff --git a/torchvision/models/vgg.py b/torchvision/models/vgg.py index 9fb77d9bf97..06dc751d3f9 100644 --- a/torchvision/models/vgg.py +++ b/torchvision/models/vgg.py @@ -121,9 +121,11 @@ class VGG11_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 132863336, - "metrics": { - "acc@1": 69.020, - "acc@5": 88.628, + "_metrics": { + "ImageNetV1": { + "acc@1": 69.020, + "acc@5": 88.628, + } }, }, ) @@ -137,9 +139,11 @@ class VGG11_BN_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 132868840, - "metrics": { - "acc@1": 70.370, - "acc@5": 89.810, + "_metrics": { + "ImageNetV1": { + "acc@1": 70.370, + "acc@5": 89.810, + } }, }, ) @@ -153,9 +157,11 @@ class VGG13_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 133047848, - "metrics": { - "acc@1": 69.928, - "acc@5": 89.246, + "_metrics": { + "ImageNetV1": { + "acc@1": 69.928, + "acc@5": 89.246, + } }, }, ) @@ -169,9 +175,11 @@ class VGG13_BN_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 133053736, - "metrics": { - "acc@1": 71.586, - "acc@5": 90.374, + "_metrics": { + "ImageNetV1": { + "acc@1": 71.586, + "acc@5": 90.374, + } }, }, ) @@ -185,9 +193,11 @@ class VGG16_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 138357544, - "metrics": { - "acc@1": 71.592, - "acc@5": 90.382, + "_metrics": { + "ImageNetV1": { + "acc@1": 71.592, + "acc@5": 90.382, + } }, }, ) @@ -205,9 +215,11 @@ class VGG16_Weights(WeightsEnum): "num_params": 138357544, "categories": None, "recipe": "https://github.com/amdegroot/ssd.pytorch#training-ssd", - "metrics": { - "acc@1": float("nan"), - "acc@5": float("nan"), + "_metrics": { + "ImageNetV1": { + "acc@1": float("nan"), + "acc@5": float("nan"), + } }, "_docs": """ These weights can't be used for classification because they are missing values in the `classifier` @@ -226,9 +238,11 @@ class VGG16_BN_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 138365992, - "metrics": { - "acc@1": 73.360, - "acc@5": 91.516, + "_metrics": { + "ImageNetV1": { + "acc@1": 73.360, + "acc@5": 91.516, + } }, }, ) @@ -242,9 +256,11 @@ class VGG19_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 143667240, - "metrics": { - "acc@1": 72.376, - "acc@5": 90.876, + "_metrics": { + "ImageNetV1": { + "acc@1": 72.376, + "acc@5": 90.876, + } }, }, ) @@ -258,9 +274,11 @@ class VGG19_BN_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 143678248, - "metrics": { - "acc@1": 74.218, - "acc@5": 91.842, + "_metrics": { + "ImageNetV1": { + "acc@1": 74.218, + "acc@5": 91.842, + } }, }, ) diff --git a/torchvision/models/vision_transformer.py b/torchvision/models/vision_transformer.py index dc54b8735f0..cb213261f92 100644 --- a/torchvision/models/vision_transformer.py +++ b/torchvision/models/vision_transformer.py @@ -328,9 +328,11 @@ class ViT_B_16_Weights(WeightsEnum): "num_params": 86567656, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_16", - "metrics": { - "acc@1": 81.072, - "acc@5": 95.318, + "_metrics": { + "ImageNetV1": { + "acc@1": 81.072, + "acc@5": 95.318, + } }, "_docs": """ These weights were trained from scratch by using a modified version of `DeIT @@ -350,9 +352,11 @@ class ViT_B_16_Weights(WeightsEnum): **_COMMON_SWAG_META, "num_params": 86859496, "min_size": (384, 384), - "metrics": { - "acc@1": 85.304, - "acc@5": 97.650, + "_metrics": { + "ImageNetV1": { + "acc@1": 85.304, + "acc@5": 97.650, + } }, "_docs": """ These weights are learnt via transfer learning by end-to-end fine-tuning the original @@ -373,9 +377,11 @@ class ViT_B_16_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 86567656, "min_size": (224, 224), - "metrics": { - "acc@1": 81.886, - "acc@5": 96.180, + "_metrics": { + "ImageNetV1": { + "acc@1": 81.886, + "acc@5": 96.180, + } }, "_docs": """ These weights are composed of the original frozen `SWAG `_ trunk @@ -395,9 +401,11 @@ class ViT_B_32_Weights(WeightsEnum): "num_params": 88224232, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_32", - "metrics": { - "acc@1": 75.912, - "acc@5": 92.466, + "_metrics": { + "ImageNetV1": { + "acc@1": 75.912, + "acc@5": 92.466, + } }, "_docs": """ These weights were trained from scratch by using a modified version of `DeIT @@ -417,9 +425,11 @@ class ViT_L_16_Weights(WeightsEnum): "num_params": 304326632, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_16", - "metrics": { - "acc@1": 79.662, - "acc@5": 94.638, + "_metrics": { + "ImageNetV1": { + "acc@1": 79.662, + "acc@5": 94.638, + } }, "_docs": """ These weights were trained from scratch by using a modified version of TorchVision's @@ -440,9 +450,11 @@ class ViT_L_16_Weights(WeightsEnum): **_COMMON_SWAG_META, "num_params": 305174504, "min_size": (512, 512), - "metrics": { - "acc@1": 88.064, - "acc@5": 98.512, + "_metrics": { + "ImageNetV1": { + "acc@1": 88.064, + "acc@5": 98.512, + } }, "_docs": """ These weights are learnt via transfer learning by end-to-end fine-tuning the original @@ -463,9 +475,11 @@ class ViT_L_16_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 304326632, "min_size": (224, 224), - "metrics": { - "acc@1": 85.146, - "acc@5": 97.422, + "_metrics": { + "ImageNetV1": { + "acc@1": 85.146, + "acc@5": 97.422, + } }, "_docs": """ These weights are composed of the original frozen `SWAG `_ trunk @@ -485,9 +499,11 @@ class ViT_L_32_Weights(WeightsEnum): "num_params": 306535400, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_32", - "metrics": { - "acc@1": 76.972, - "acc@5": 93.07, + "_metrics": { + "ImageNetV1": { + "acc@1": 76.972, + "acc@5": 93.07, + } }, "_docs": """ These weights were trained from scratch by using a modified version of `DeIT @@ -511,9 +527,11 @@ class ViT_H_14_Weights(WeightsEnum): **_COMMON_SWAG_META, "num_params": 633470440, "min_size": (518, 518), - "metrics": { - "acc@1": 88.552, - "acc@5": 98.694, + "_metrics": { + "ImageNetV1": { + "acc@1": 88.552, + "acc@5": 98.694, + } }, "_docs": """ These weights are learnt via transfer learning by end-to-end fine-tuning the original @@ -534,9 +552,11 @@ class ViT_H_14_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 632045800, "min_size": (224, 224), - "metrics": { - "acc@1": 85.708, - "acc@5": 97.730, + "_metrics": { + "ImageNetV1": { + "acc@1": 85.708, + "acc@5": 97.730, + } }, "_docs": """ These weights are composed of the original frozen `SWAG `_ trunk From 31a9aa70101e1a771922306b55c3d908469e8eb7 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 18 May 2022 12:06:17 +0100 Subject: [PATCH 02/13] Detection --- torchvision/models/detection/faster_rcnn.py | 24 ++++++++++++------- torchvision/models/detection/fcos.py | 6 +++-- torchvision/models/detection/keypoint_rcnn.py | 16 ++++++++----- torchvision/models/detection/mask_rcnn.py | 16 ++++++++----- torchvision/models/detection/retinanet.py | 12 ++++++---- torchvision/models/detection/ssd.py | 6 +++-- torchvision/models/detection/ssdlite.py | 6 +++-- 7 files changed, 56 insertions(+), 30 deletions(-) diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index afe66ead646..92934d71b92 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -383,8 +383,10 @@ class FasterRCNN_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 41755286, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-resnet-50-fpn", - "metrics": { - "box_map": 37.0, + "_metrics": { + "COCO": { + "box_map": 37.0, + } }, }, ) @@ -399,8 +401,10 @@ class FasterRCNN_ResNet50_FPN_V2_Weights(WeightsEnum): **_COMMON_META, "num_params": 43712278, "recipe": "https://github.com/pytorch/vision/pull/5763", - "metrics": { - "box_map": 46.7, + "_metrics": { + "COCO": { + "box_map": 46.7, + } }, }, ) @@ -415,8 +419,10 @@ class FasterRCNN_MobileNet_V3_Large_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 19386354, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-fpn", - "metrics": { - "box_map": 32.8, + "_metrics": { + "COCO": { + "box_map": 32.8, + } }, }, ) @@ -431,8 +437,10 @@ class FasterRCNN_MobileNet_V3_Large_320_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 19386354, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-320-fpn", - "metrics": { - "box_map": 22.8, + "_metrics": { + "COCO": { + "box_map": 22.8, + } }, }, ) diff --git a/torchvision/models/detection/fcos.py b/torchvision/models/detection/fcos.py index 3544ea3117e..04d1ac14396 100644 --- a/torchvision/models/detection/fcos.py +++ b/torchvision/models/detection/fcos.py @@ -658,8 +658,10 @@ class FCOS_ResNet50_FPN_Weights(WeightsEnum): "categories": _COCO_CATEGORIES, "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#fcos-resnet-50-fpn", - "metrics": { - "box_map": 39.2, + "_metrics": { + "COCO": { + "box_map": 39.2, + } }, }, ) diff --git a/torchvision/models/detection/keypoint_rcnn.py b/torchvision/models/detection/keypoint_rcnn.py index 410c53d60b7..40a8e72492f 100644 --- a/torchvision/models/detection/keypoint_rcnn.py +++ b/torchvision/models/detection/keypoint_rcnn.py @@ -322,9 +322,11 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 59137258, "recipe": "https://github.com/pytorch/vision/issues/1606", - "metrics": { - "box_map": 50.6, - "kp_map": 61.1, + "_metrics": { + "COCO": { + "box_map": 50.6, + "kp_map": 61.1, + } }, }, ) @@ -335,9 +337,11 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 59137258, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#keypoint-r-cnn", - "metrics": { - "box_map": 54.6, - "kp_map": 65.0, + "_metrics": { + "COCO": { + "box_map": 54.6, + "kp_map": 65.0, + } }, }, ) diff --git a/torchvision/models/detection/mask_rcnn.py b/torchvision/models/detection/mask_rcnn.py index 6b1ba04a195..33f6c34a2ed 100644 --- a/torchvision/models/detection/mask_rcnn.py +++ b/torchvision/models/detection/mask_rcnn.py @@ -364,9 +364,11 @@ class MaskRCNN_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 44401393, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#mask-r-cnn", - "metrics": { - "box_map": 37.9, - "mask_map": 34.6, + "_metrics": { + "COCO": { + "box_map": 37.9, + "mask_map": 34.6, + } }, }, ) @@ -381,9 +383,11 @@ class MaskRCNN_ResNet50_FPN_V2_Weights(WeightsEnum): **_COMMON_META, "num_params": 46359409, "recipe": "https://github.com/pytorch/vision/pull/5773", - "metrics": { - "box_map": 47.4, - "mask_map": 41.8, + "_metrics": { + "COCO": { + "box_map": 47.4, + "mask_map": 41.8, + } }, }, ) diff --git a/torchvision/models/detection/retinanet.py b/torchvision/models/detection/retinanet.py index 0cb4979d332..37fb1bda9ef 100644 --- a/torchvision/models/detection/retinanet.py +++ b/torchvision/models/detection/retinanet.py @@ -687,8 +687,10 @@ class RetinaNet_ResNet50_FPN_Weights(WeightsEnum): **_COMMON_META, "num_params": 34014999, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#retinanet", - "metrics": { - "box_map": 36.4, + "_metrics": { + "COCO": { + "box_map": 36.4, + } }, }, ) @@ -703,8 +705,10 @@ class RetinaNet_ResNet50_FPN_V2_Weights(WeightsEnum): **_COMMON_META, "num_params": 38198935, "recipe": "https://github.com/pytorch/vision/pull/5756", - "metrics": { - "box_map": 41.5, + "_metrics": { + "COCO": { + "box_map": 41.5, + } }, }, ) diff --git a/torchvision/models/detection/ssd.py b/torchvision/models/detection/ssd.py index 7e5625329be..61661afbba3 100644 --- a/torchvision/models/detection/ssd.py +++ b/torchvision/models/detection/ssd.py @@ -34,8 +34,10 @@ class SSD300_VGG16_Weights(WeightsEnum): "categories": _COCO_CATEGORIES, "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssd300-vgg16", - "metrics": { - "box_map": 25.1, + "_metrics": { + "COCO": { + "box_map": 25.1, + } }, }, ) diff --git a/torchvision/models/detection/ssdlite.py b/torchvision/models/detection/ssdlite.py index f94758cb166..03944284672 100644 --- a/torchvision/models/detection/ssdlite.py +++ b/torchvision/models/detection/ssdlite.py @@ -193,8 +193,10 @@ class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum): "categories": _COCO_CATEGORIES, "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssdlite320-mobilenetv3-large", - "metrics": { - "box_map": 21.3, + "_metrics": { + "COCO": { + "box_map": 21.3, + } }, }, ) From 8b03134f3d2767c124cc15e7a22e228186b41119 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 18 May 2022 12:11:00 +0100 Subject: [PATCH 03/13] Segmentation --- torchvision/models/segmentation/deeplabv3.py | 24 ++++++++++++-------- torchvision/models/segmentation/fcn.py | 16 ++++++++----- torchvision/models/segmentation/lraspp.py | 8 ++++--- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index 57ccc377c2d..2dc2a188e33 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -142,9 +142,11 @@ class DeepLabV3_ResNet50_Weights(WeightsEnum): **_COMMON_META, "num_params": 42004074, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_resnet50", - "metrics": { - "miou": 66.4, - "pixel_acc": 92.4, + "_metrics": { + "COCO": { + "miou": 66.4, + "pixel_acc": 92.4, + } }, }, ) @@ -159,9 +161,11 @@ class DeepLabV3_ResNet101_Weights(WeightsEnum): **_COMMON_META, "num_params": 60996202, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#fcn_resnet101", - "metrics": { - "miou": 67.4, - "pixel_acc": 92.4, + "_metrics": { + "COCO": { + "miou": 67.4, + "pixel_acc": 92.4, + } }, }, ) @@ -176,9 +180,11 @@ class DeepLabV3_MobileNet_V3_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 11029328, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_mobilenet_v3_large", - "metrics": { - "miou": 60.3, - "pixel_acc": 91.2, + "_metrics": { + "COCO": { + "miou": 60.3, + "pixel_acc": 91.2, + } }, }, ) diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index fe8c82d8d4d..d4995e6b0eb 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -61,9 +61,11 @@ class FCN_ResNet50_Weights(WeightsEnum): **_COMMON_META, "num_params": 35322218, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#fcn_resnet50", - "metrics": { - "miou": 60.5, - "pixel_acc": 91.4, + "_metrics": { + "COCO": { + "miou": 60.5, + "pixel_acc": 91.4, + } }, }, ) @@ -78,9 +80,11 @@ class FCN_ResNet101_Weights(WeightsEnum): **_COMMON_META, "num_params": 54314346, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_resnet101", - "metrics": { - "miou": 63.7, - "pixel_acc": 91.9, + "_metrics": { + "COCO": { + "miou": 63.7, + "pixel_acc": 91.9, + } }, }, ) diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py index bcbba7f14fe..b590ca8ba0d 100644 --- a/torchvision/models/segmentation/lraspp.py +++ b/torchvision/models/segmentation/lraspp.py @@ -102,9 +102,11 @@ class LRASPP_MobileNet_V3_Large_Weights(WeightsEnum): "categories": _VOC_CATEGORIES, "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#lraspp_mobilenet_v3_large", - "metrics": { - "miou": 57.9, - "pixel_acc": 91.2, + "_metrics": { + "COCO": { + "miou": 57.9, + "pixel_acc": 91.2, + } }, }, ) From 79dfc67be42792f79f6738dd6232d1effe78e4ae Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 18 May 2022 14:07:30 +0100 Subject: [PATCH 04/13] quantization --- torchvision/models/quantization/googlenet.py | 8 ++-- torchvision/models/quantization/inception.py | 8 ++-- .../models/quantization/mobilenetv2.py | 8 ++-- .../models/quantization/mobilenetv3.py | 8 ++-- torchvision/models/quantization/resnet.py | 48 ++++++++++++------- .../models/quantization/shufflenetv2.py | 32 ++++++++----- 6 files changed, 70 insertions(+), 42 deletions(-) diff --git a/torchvision/models/quantization/googlenet.py b/torchvision/models/quantization/googlenet.py index 5df391044ff..856b4257d6b 100644 --- a/torchvision/models/quantization/googlenet.py +++ b/torchvision/models/quantization/googlenet.py @@ -117,9 +117,11 @@ class GoogLeNet_QuantizedWeights(WeightsEnum): "backend": "fbgemm", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models", "unquantized": GoogLeNet_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 69.826, - "acc@5": 89.404, + "_metrics": { + "ImageNetV1": { + "acc@1": 69.826, + "acc@5": 89.404, + } }, }, ) diff --git a/torchvision/models/quantization/inception.py b/torchvision/models/quantization/inception.py index 1fbfb00fe75..b19c1f49bf8 100644 --- a/torchvision/models/quantization/inception.py +++ b/torchvision/models/quantization/inception.py @@ -183,9 +183,11 @@ class Inception_V3_QuantizedWeights(WeightsEnum): "backend": "fbgemm", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models", "unquantized": Inception_V3_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 77.176, - "acc@5": 93.354, + "_metrics": { + "ImageNetV1": { + "acc@1": 77.176, + "acc@5": 93.354, + } }, }, ) diff --git a/torchvision/models/quantization/mobilenetv2.py b/torchvision/models/quantization/mobilenetv2.py index 0d2e35c8566..863b740f40e 100644 --- a/torchvision/models/quantization/mobilenetv2.py +++ b/torchvision/models/quantization/mobilenetv2.py @@ -75,9 +75,11 @@ class MobileNet_V2_QuantizedWeights(WeightsEnum): "backend": "qnnpack", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#qat-mobilenetv2", "unquantized": MobileNet_V2_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 71.658, - "acc@5": 90.150, + "_metrics": { + "ImageNetV1": { + "acc@1": 71.658, + "acc@5": 90.150, + } }, }, ) diff --git a/torchvision/models/quantization/mobilenetv3.py b/torchvision/models/quantization/mobilenetv3.py index 804e0c77bc9..313fb72339a 100644 --- a/torchvision/models/quantization/mobilenetv3.py +++ b/torchvision/models/quantization/mobilenetv3.py @@ -169,9 +169,11 @@ class MobileNet_V3_Large_QuantizedWeights(WeightsEnum): "backend": "qnnpack", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#qat-mobilenetv3", "unquantized": MobileNet_V3_Large_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 73.004, - "acc@5": 90.858, + "_metrics": { + "ImageNetV1": { + "acc@1": 73.004, + "acc@5": 90.858, + } }, }, ) diff --git a/torchvision/models/quantization/resnet.py b/torchvision/models/quantization/resnet.py index dc3ee4c35c5..880986d2f3f 100644 --- a/torchvision/models/quantization/resnet.py +++ b/torchvision/models/quantization/resnet.py @@ -165,9 +165,11 @@ class ResNet18_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 11689512, "unquantized": ResNet18_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 69.494, - "acc@5": 88.882, + "_metrics": { + "ImageNetV1": { + "acc@1": 69.494, + "acc@5": 88.882, + } }, }, ) @@ -182,9 +184,11 @@ class ResNet50_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 25557032, "unquantized": ResNet50_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 75.920, - "acc@5": 92.814, + "_metrics": { + "ImageNetV1": { + "acc@1": 75.920, + "acc@5": 92.814, + } }, }, ) @@ -195,9 +199,11 @@ class ResNet50_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 25557032, "unquantized": ResNet50_Weights.IMAGENET1K_V2, - "metrics": { - "acc@1": 80.282, - "acc@5": 94.976, + "_metrics": { + "ImageNetV1": { + "acc@1": 80.282, + "acc@5": 94.976, + } }, }, ) @@ -212,9 +218,11 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 88791336, "unquantized": ResNeXt101_32X8D_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 78.986, - "acc@5": 94.480, + "_metrics": { + "ImageNetV1": { + "acc@1": 78.986, + "acc@5": 94.480, + } }, }, ) @@ -225,9 +233,11 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 88791336, "unquantized": ResNeXt101_32X8D_Weights.IMAGENET1K_V2, - "metrics": { - "acc@1": 82.574, - "acc@5": 96.132, + "_metrics": { + "ImageNetV1": { + "acc@1": 82.574, + "acc@5": 96.132, + } }, }, ) @@ -243,9 +253,11 @@ class ResNeXt101_64X4D_QuantizedWeights(WeightsEnum): "num_params": 83455272, "recipe": "https://github.com/pytorch/vision/pull/5935", "unquantized": ResNeXt101_64X4D_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 82.898, - "acc@5": 96.326, + "_metrics": { + "ImageNetV1": { + "acc@1": 82.898, + "acc@5": 96.326, + } }, }, ) diff --git a/torchvision/models/quantization/shufflenetv2.py b/torchvision/models/quantization/shufflenetv2.py index 5672d850cf2..0d37421cdf5 100644 --- a/torchvision/models/quantization/shufflenetv2.py +++ b/torchvision/models/quantization/shufflenetv2.py @@ -129,9 +129,11 @@ class ShuffleNet_V2_X0_5_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 1366792, "unquantized": ShuffleNet_V2_X0_5_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 57.972, - "acc@5": 79.780, + "_metrics": { + "ImageNetV1": { + "acc@1": 57.972, + "acc@5": 79.780, + } }, }, ) @@ -146,9 +148,11 @@ class ShuffleNet_V2_X1_0_QuantizedWeights(WeightsEnum): **_COMMON_META, "num_params": 2278604, "unquantized": ShuffleNet_V2_X1_0_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 68.360, - "acc@5": 87.582, + "_metrics": { + "ImageNetV1": { + "acc@1": 68.360, + "acc@5": 87.582, + } }, }, ) @@ -164,9 +168,11 @@ class ShuffleNet_V2_X1_5_QuantizedWeights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5906", "num_params": 3503624, "unquantized": ShuffleNet_V2_X1_5_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 72.052, - "acc@5": 90.700, + "_metrics": { + "ImageNetV1": { + "acc@1": 72.052, + "acc@5": 90.700, + } }, }, ) @@ -182,9 +188,11 @@ class ShuffleNet_V2_X2_0_QuantizedWeights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5906", "num_params": 7393996, "unquantized": ShuffleNet_V2_X2_0_Weights.IMAGENET1K_V1, - "metrics": { - "acc@1": 75.354, - "acc@5": 92.488, + "_metrics": { + "ImageNetV1": { + "acc@1": 75.354, + "acc@5": 92.488, + } }, }, ) From a4cf1a93ee43882b54de062d6a7ecfcee932e4ee Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 18 May 2022 14:08:42 +0100 Subject: [PATCH 05/13] Video --- torchvision/models/video/resnet.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index 320df6576ac..42e09e084ab 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -322,9 +322,11 @@ class R3D_18_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 33371472, - "metrics": { - "acc@1": 52.75, - "acc@5": 75.45, + "_metrics": { + "ImageNetV1": { + "acc@1": 52.75, + "acc@5": 75.45, + } }, }, ) @@ -338,9 +340,11 @@ class MC3_18_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 11695440, - "metrics": { - "acc@1": 53.90, - "acc@5": 76.29, + "_metrics": { + "ImageNetV1": { + "acc@1": 53.90, + "acc@5": 76.29, + } }, }, ) @@ -354,9 +358,11 @@ class R2Plus1D_18_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 31505325, - "metrics": { - "acc@1": 57.50, - "acc@5": 78.81, + "_metrics": { + "ImageNetV1": { + "acc@1": 57.50, + "acc@5": 78.81, + } }, }, ) From cccac1753466d1d0c666e5f1e48aa7aea0b11015 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 18 May 2022 14:14:47 +0100 Subject: [PATCH 06/13] optical flow --- torchvision/models/optical_flow/raft.py | 46 +++++++++++-------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/torchvision/models/optical_flow/raft.py b/torchvision/models/optical_flow/raft.py index 869477f0d81..9945d55280d 100644 --- a/torchvision/models/optical_flow/raft.py +++ b/torchvision/models/optical_flow/raft.py @@ -525,11 +525,10 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", - "metrics": { - "sintel_train_cleanpass_epe": 1.4411, - "sintel_train_finalpass_epe": 2.7894, - "kitti_train_per_image_epe": 5.0172, - "kitti_train_fl_all": 17.4506, + "_metrics": { + "sintel_train_cleanpass": {"epe": 1.4411}, + "sintel_train_finalpass": {"epe": 2.7894}, + "kitti_train": {"per_image_epe": 5.0172, "fl_all": 17.4506}, }, }, ) @@ -542,11 +541,10 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", - "metrics": { - "sintel_train_cleanpass_epe": 1.3822, - "sintel_train_finalpass_epe": 2.7161, - "kitti_train_per_image_epe": 4.5118, - "kitti_train_fl_all": 16.0679, + "_metrics": { + "sintel_train_cleanpass": {"epe": 1.3822}, + "sintel_train_finalpass": {"epe": 2.7161}, + "kitti_train": {"per_image_epe": 4.5118, "fl_all": 16.0679}, }, }, ) @@ -560,8 +558,8 @@ class Raft_Large_Weights(WeightsEnum): "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", "metrics": { - "sintel_test_cleanpass_epe": 1.94, - "sintel_test_finalpass_epe": 3.18, + "sintel_test_cleanpass": {"epe": 1.94}, + "sintel_test_finalpass": {"epe": 3.18}, }, }, ) @@ -577,8 +575,8 @@ class Raft_Large_Weights(WeightsEnum): "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", "metrics": { - "sintel_test_cleanpass_epe": 1.819, - "sintel_test_finalpass_epe": 3.067, + "sintel_test_cleanpass": {"epe": 1.819}, + "sintel_test_finalpass": {"epe": 3.067}, }, }, ) @@ -592,7 +590,7 @@ class Raft_Large_Weights(WeightsEnum): "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", "metrics": { - "kitti_test_fl_all": 5.10, + "kitti_test": {"fl_all": 5.10}, }, }, ) @@ -609,7 +607,7 @@ class Raft_Large_Weights(WeightsEnum): "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", "metrics": { - "kitti_test_fl_all": 5.19, + "kitti_test": {"fl_all": 5.19}, }, }, ) @@ -627,10 +625,9 @@ class Raft_Small_Weights(WeightsEnum): "num_params": 990162, "recipe": "https://github.com/princeton-vl/RAFT", "metrics": { - "sintel_train_cleanpass_epe": 2.1231, - "sintel_train_finalpass_epe": 3.2790, - "kitti_train_per_image_epe": 7.6557, - "kitti_train_fl_all": 25.2801, + "sintel_train_cleanpass": {"epe": 2.1231}, + "sintel_train_finalpass": {"epe": 3.2790}, + "kitti_train": {"per_image_epe": 7.6557, "fl_all": 25.2801}, }, }, ) @@ -642,11 +639,10 @@ class Raft_Small_Weights(WeightsEnum): **_COMMON_META, "num_params": 990162, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", - "metrics": { - "sintel_train_cleanpass_epe": 1.9901, - "sintel_train_finalpass_epe": 3.2831, - "kitti_train_per_image_epe": 7.5978, - "kitti_train_fl_all": 25.2369, + "_metrics": { + "sintel_train_cleanpass": {"epe": 1.9901}, + "sintel_train_finalpass": {"epe": 3.2831}, + "kitti_train": {"per_image_epe": 7.5978, "fl_all": 25.2369}, }, }, ) From c54c2cf352612494cbcb7b97477abdaf572fccf4 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 18 May 2022 14:16:46 +0100 Subject: [PATCH 07/13] tests --- test/test_extended_models.py | 12 ++++++------ torchvision/models/optical_flow/raft.py | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/test/test_extended_models.py b/test/test_extended_models.py index e3f79e28af4..987fe3ac0bd 100644 --- a/test/test_extended_models.py +++ b/test/test_extended_models.py @@ -85,7 +85,7 @@ def test_schema_meta_validation(model_fn): "categories", "keypoint_names", "license", - "metrics", + "_metrics", "min_size", "num_params", "recipe", @@ -93,13 +93,13 @@ def test_schema_meta_validation(model_fn): "_docs", } # mandatory fields for each computer vision task - classification_fields = {"categories", ("metrics", "acc@1"), ("metrics", "acc@5")} + classification_fields = {"categories", ("_metrics", "acc@1"), ("_metrics", "acc@5")} defaults = { - "all": {"metrics", "min_size", "num_params", "recipe"}, + "all": {"_metrics", "min_size", "num_params", "recipe"}, "models": classification_fields | {"_docs"}, - "detection": {"categories", ("metrics", "box_map")}, + "detection": {"categories", ("_metrics", "box_map")}, "quantization": classification_fields | {"backend", "unquantized"}, - "segmentation": {"categories", ("metrics", "miou"), ("metrics", "pixel_acc")}, + "segmentation": {"categories", ("_metrics", "miou"), ("_metrics", "pixel_acc")}, "video": classification_fields, "optical_flow": set(), } @@ -115,7 +115,7 @@ def test_schema_meta_validation(model_fn): incorrect_params = [] bad_names = [] for w in weights_enum: - missing_fields = fields - (set(w.meta.keys()) | set(("metrics", x) for x in w.meta.get("metrics", {}).keys())) + missing_fields = fields - (set(w.meta.keys()) | set(("_metrics", x) for x in w.meta.get("_metrics", {}).keys())) unsupported_fields = set(w.meta.keys()) - permitted_fields if missing_fields or unsupported_fields: problematic_weights[w] = {"missing": missing_fields, "unsupported": unsupported_fields} diff --git a/torchvision/models/optical_flow/raft.py b/torchvision/models/optical_flow/raft.py index 9945d55280d..095ac480611 100644 --- a/torchvision/models/optical_flow/raft.py +++ b/torchvision/models/optical_flow/raft.py @@ -557,7 +557,7 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", - "metrics": { + "_metrics": { "sintel_test_cleanpass": {"epe": 1.94}, "sintel_test_finalpass": {"epe": 3.18}, }, @@ -574,7 +574,7 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", - "metrics": { + "_metrics": { "sintel_test_cleanpass": {"epe": 1.819}, "sintel_test_finalpass": {"epe": 3.067}, }, @@ -589,7 +589,7 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", - "metrics": { + "_metrics": { "kitti_test": {"fl_all": 5.10}, }, }, @@ -606,7 +606,7 @@ class Raft_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", - "metrics": { + "_metrics": { "kitti_test": {"fl_all": 5.19}, }, }, @@ -624,7 +624,7 @@ class Raft_Small_Weights(WeightsEnum): **_COMMON_META, "num_params": 990162, "recipe": "https://github.com/princeton-vl/RAFT", - "metrics": { + "_metrics": { "sintel_train_cleanpass": {"epe": 2.1231}, "sintel_train_finalpass": {"epe": 3.2790}, "kitti_train": {"per_image_epe": 7.6557, "fl_all": 25.2801}, From 244e46d4401aba87292b803a01be048f8fd544a2 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 18 May 2022 15:35:51 +0100 Subject: [PATCH 08/13] Fix docs --- docs/source/conf.py | 51 ++++++++++++++++++++++-------------- test/test_extended_models.py | 6 ++--- 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index e4db34c3889..713e2d017ed 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -334,25 +334,22 @@ def inject_weight_metadata(app, what, name, obj, options, lines): lines.append("") for field in obj: - lines += [f"**{str(field)}**:", ""] - - table = [] - - # the `meta` dict contains another embedded `metrics` dict. To - # simplify the table generation below, we create the - # `meta_with_metrics` dict, where the metrics dict has been "flattened" meta = copy(field.meta) - metrics = meta.pop("metrics", {}) - meta_with_metrics = dict(meta, **metrics) - lines += [meta_with_metrics.pop("_docs")] + lines += [f"**{str(field)}**:", ""] + lines += [meta.pop("_docs")] if field == obj.DEFAULT: lines += [f"Also available as ``{obj.__name__}.DEFAULT``."] - lines += [""] - for k, v in meta_with_metrics.items(): + table = [] + metrics = meta.pop("_metrics") + for dataset, dataset_metrics in metrics.items(): + for metric_name, metric_value in dataset_metrics.items(): + table.append((f"{metric_name} (on {dataset})", str(metric_value))) + + for k, v in meta.items(): if k in {"recipe", "license"}: v = f"`link <{v}>`__" elif k == "min_size": @@ -374,7 +371,7 @@ def inject_weight_metadata(app, what, name, obj, options, lines): lines.append("") -def generate_weights_table(module, table_name, metrics, include_patterns=None, exclude_patterns=None): +def generate_weights_table(module, table_name, metrics, dataset, include_patterns=None, exclude_patterns=None): weights_endswith = "_QuantizedWeights" if module.__name__.split(".")[-1] == "quantization" else "_Weights" weight_enums = [getattr(module, name) for name in dir(module) if name.endswith(weights_endswith)] weights = [w for weight_enum in weight_enums for w in weight_enum] @@ -391,7 +388,7 @@ def generate_weights_table(module, table_name, metrics, include_patterns=None, e content = [ ( f":class:`{w} <{type(w).__name__}>`", - *(w.meta["metrics"][metric] for metric in metrics_keys), + *(w.meta["_metrics"][dataset][metric] for metric in metrics_keys), f"{w.meta['num_params']/1e6:.1f}M", f"`link <{w.meta['recipe']}>`__", ) @@ -408,29 +405,45 @@ def generate_weights_table(module, table_name, metrics, include_patterns=None, e table_file.write(f"{textwrap.indent(table, ' ' * 4)}\n\n") -generate_weights_table(module=M, table_name="classification", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")]) generate_weights_table( - module=M.quantization, table_name="classification_quant", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")] + module=M, table_name="classification", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], dataset="ImageNet1K" +) +generate_weights_table( + module=M.quantization, + table_name="classification_quant", + metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], + dataset="ImageNet1K", ) generate_weights_table( - module=M.detection, table_name="detection", metrics=[("box_map", "Box MAP")], exclude_patterns=["Mask", "Keypoint"] + module=M.detection, + table_name="detection", + metrics=[("box_map", "Box MAP")], + exclude_patterns=["Mask", "Keypoint"], + dataset="COCO-val2017", ) generate_weights_table( module=M.detection, table_name="instance_segmentation", metrics=[("box_map", "Box MAP"), ("mask_map", "Mask MAP")], + dataset="COCO-val2017", include_patterns=["Mask"], ) generate_weights_table( module=M.detection, table_name="detection_keypoint", metrics=[("box_map", "Box MAP"), ("kp_map", "Keypoint MAP")], + dataset="COCO-val2017", include_patterns=["Keypoint"], ) generate_weights_table( - module=M.segmentation, table_name="segmentation", metrics=[("miou", "Mean IoU"), ("pixel_acc", "pixelwise Acc")] + module=M.segmentation, + table_name="segmentation", + metrics=[("miou", "Mean IoU"), ("pixel_acc", "pixelwise Acc")], + dataset="COCO-val2017", +) +generate_weights_table( + module=M.video, table_name="video", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], dataset="ImageNet1K" ) -generate_weights_table(module=M.video, table_name="video", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")]) def setup(app): diff --git a/test/test_extended_models.py b/test/test_extended_models.py index db7bff3d090..5fc9ad80f40 100644 --- a/test/test_extended_models.py +++ b/test/test_extended_models.py @@ -117,9 +117,9 @@ def test_schema_meta_validation(model_fn): for w in weights_enum: actual_fields = set(w.meta.keys()) actual_fields |= set( - ("_metrics", ds, metric_key) - for ds in w.meta.get("_metrics", {}).keys() - for metric_key in w.meta.get("_metrics", {}).get(ds, {}).keys() + ("_metrics", dataset, metric_key) + for dataset in w.meta.get("_metrics", {}).keys() + for metric_key in w.meta.get("_metrics", {}).get(dataset, {}).keys() ) missing_fields = expected_fields - actual_fields unsupported_fields = set(w.meta.keys()) - permitted_fields From 405e38a0a164b81ef7d41020e74d5d6c904559cd Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 18 May 2022 15:41:24 +0100 Subject: [PATCH 09/13] Fix Video dataset --- docs/source/conf.py | 2 +- test/test_extended_models.py | 2 +- torchvision/models/video/resnet.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 713e2d017ed..225affe6224 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -442,7 +442,7 @@ def generate_weights_table(module, table_name, metrics, dataset, include_pattern dataset="COCO-val2017", ) generate_weights_table( - module=M.video, table_name="video", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], dataset="ImageNet1K" + module=M.video, table_name="video", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], dataset="Kinetics-400" ) diff --git a/test/test_extended_models.py b/test/test_extended_models.py index 5fc9ad80f40..1d762e687c7 100644 --- a/test/test_extended_models.py +++ b/test/test_extended_models.py @@ -100,7 +100,7 @@ def test_schema_meta_validation(model_fn): "detection": {"categories", ("_metrics", "COCO-val2017", "box_map")}, "quantization": classification_fields | {"backend", "unquantized"}, "segmentation": {"categories", ("_metrics", "COCO-val2017", "miou"), ("_metrics", "COCO-val2017", "pixel_acc")}, - "video": classification_fields, + "video": {"categories", ("_metrics", "Kinetics-400", "acc@1"), ("_metrics", "Kinetics-400", "acc@5")}, "optical_flow": set(), } model_name = model_fn.__name__ diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index a5580199341..8eb47418365 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -324,7 +324,7 @@ class R3D_18_Weights(WeightsEnum): **_COMMON_META, "num_params": 33371472, "_metrics": { - "ImageNet1K": { + "Kinetics-400": { "acc@1": 52.75, "acc@5": 75.45, } @@ -342,7 +342,7 @@ class MC3_18_Weights(WeightsEnum): **_COMMON_META, "num_params": 11695440, "_metrics": { - "ImageNet1K": { + "Kinetics-400": { "acc@1": 53.90, "acc@5": 76.29, } @@ -360,7 +360,7 @@ class R2Plus1D_18_Weights(WeightsEnum): **_COMMON_META, "num_params": 31505325, "_metrics": { - "ImageNet1K": { + "Kinetics-400": { "acc@1": 57.50, "acc@5": 78.81, } From 3571cec5b7d2cd14c3b3000334faff503b95bd6e Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 18 May 2022 15:58:36 +0100 Subject: [PATCH 10/13] Consistency for RAFT dataset names --- torchvision/models/optical_flow/raft.py | 36 ++++++++++++------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/torchvision/models/optical_flow/raft.py b/torchvision/models/optical_flow/raft.py index 345a26a78c2..b382906517d 100644 --- a/torchvision/models/optical_flow/raft.py +++ b/torchvision/models/optical_flow/raft.py @@ -526,9 +526,9 @@ class Raft_Large_Weights(WeightsEnum): "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", "_metrics": { - "sintel_train_cleanpass": {"epe": 1.4411}, - "sintel_train_finalpass": {"epe": 2.7894}, - "kitti_train": {"per_image_epe": 5.0172, "fl_all": 17.4506}, + "Sintel-Train-Cleanpass": {"epe": 1.4411}, + "Sintel-Train-Finalpass": {"epe": 2.7894}, + "Kitti-Train": {"per_image_epe": 5.0172, "fl_all": 17.4506}, }, "_docs": """These weights were ported from the original paper. They are trained on Chairs + Things.""", }, @@ -542,9 +542,9 @@ class Raft_Large_Weights(WeightsEnum): "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", "_metrics": { - "sintel_train_cleanpass": {"epe": 1.3822}, - "sintel_train_finalpass": {"epe": 2.7161}, - "kitti_train": {"per_image_epe": 4.5118, "fl_all": 16.0679}, + "Sintel-Train-Cleanpass": {"epe": 1.3822}, + "Sintel-Train-Finalpass": {"epe": 2.7161}, + "Kitti-Train": {"per_image_epe": 4.5118, "fl_all": 16.0679}, }, "_docs": """These weights were trained from scratch on Chairs + Things.""", }, @@ -559,8 +559,8 @@ class Raft_Large_Weights(WeightsEnum): "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", "_metrics": { - "sintel_test_cleanpass": {"epe": 1.94}, - "sintel_test_finalpass": {"epe": 3.18}, + "Sintel-Test-Cleanpass": {"epe": 1.94}, + "Sintel-Test-Finalpass": {"epe": 3.18}, }, "_docs": """ These weights were ported from the original paper. They are trained on Chairs + Things and fine-tuned on @@ -577,8 +577,8 @@ class Raft_Large_Weights(WeightsEnum): "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", "_metrics": { - "sintel_test_cleanpass": {"epe": 1.819}, - "sintel_test_finalpass": {"epe": 3.067}, + "Sintel-Test-Cleanpass": {"epe": 1.819}, + "Sintel-Test-Finalpass": {"epe": 3.067}, }, "_docs": """ These weights were trained from scratch on Chairs + Things and fine-tuned on Sintel (C+T+S+K+H). @@ -595,7 +595,7 @@ class Raft_Large_Weights(WeightsEnum): "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", "_metrics": { - "kitti_test": {"fl_all": 5.10}, + "Kitti-Test": {"fl_all": 5.10}, }, "_docs": """ These weights were ported from the original paper. They are trained on Chairs + Things, fine-tuned on @@ -612,7 +612,7 @@ class Raft_Large_Weights(WeightsEnum): "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", "_metrics": { - "kitti_test": {"fl_all": 5.19}, + "Kitti-Test": {"fl_all": 5.19}, }, "_docs": """ These weights were trained from scratch on Chairs + Things, fine-tuned on Sintel and then on Kitti. @@ -633,9 +633,9 @@ class Raft_Small_Weights(WeightsEnum): "num_params": 990162, "recipe": "https://github.com/princeton-vl/RAFT", "_metrics": { - "sintel_train_cleanpass": {"epe": 2.1231}, - "sintel_train_finalpass": {"epe": 3.2790}, - "kitti_train": {"per_image_epe": 7.6557, "fl_all": 25.2801}, + "Sintel-Train-Cleanpass": {"epe": 2.1231}, + "Sintel-Train-Finalpass": {"epe": 3.2790}, + "Kitti-Train": {"per_image_epe": 7.6557, "fl_all": 25.2801}, }, "_docs": """These weights were ported from the original paper. They are trained on Chairs + Things.""", }, @@ -648,9 +648,9 @@ class Raft_Small_Weights(WeightsEnum): "num_params": 990162, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", "_metrics": { - "sintel_train_cleanpass": {"epe": 1.9901}, - "sintel_train_finalpass": {"epe": 3.2831}, - "kitti_train": {"per_image_epe": 7.5978, "fl_all": 25.2369}, + "Sintel-Train-Cleanpass": {"epe": 1.9901}, + "Sintel-Train-Finalpass": {"epe": 3.2831}, + "Kitti-Train": {"per_image_epe": 7.5978, "fl_all": 25.2369}, }, "_docs": """These weights were trained from scratch on Chairs + Things.""", }, From bb2339d58f9305f010809030e16dc0fe46f2c35e Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 18 May 2022 15:59:49 +0100 Subject: [PATCH 11/13] use ImageNet-1K --- docs/source/conf.py | 4 +- test/test_extended_models.py | 2 +- torchvision/models/alexnet.py | 2 +- torchvision/models/convnext.py | 8 +-- torchvision/models/densenet.py | 8 +-- torchvision/models/efficientnet.py | 24 +++---- torchvision/models/googlenet.py | 2 +- torchvision/models/inception.py | 2 +- torchvision/models/mnasnet.py | 8 +-- torchvision/models/mobilenetv2.py | 4 +- torchvision/models/mobilenetv3.py | 6 +- torchvision/models/quantization/googlenet.py | 2 +- torchvision/models/quantization/inception.py | 2 +- .../models/quantization/mobilenetv2.py | 2 +- .../models/quantization/mobilenetv3.py | 2 +- torchvision/models/quantization/resnet.py | 12 ++-- .../models/quantization/shufflenetv2.py | 8 +-- torchvision/models/regnet.py | 68 +++++++++---------- torchvision/models/resnet.py | 34 +++++----- torchvision/models/shufflenetv2.py | 8 +-- torchvision/models/squeezenet.py | 4 +- torchvision/models/swin_transformer.py | 2 +- torchvision/models/vgg.py | 18 ++--- torchvision/models/vision_transformer.py | 20 +++--- 24 files changed, 126 insertions(+), 126 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 225affe6224..e4453f76c03 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -406,13 +406,13 @@ def generate_weights_table(module, table_name, metrics, dataset, include_pattern generate_weights_table( - module=M, table_name="classification", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], dataset="ImageNet1K" + module=M, table_name="classification", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], dataset="ImageNet-1K" ) generate_weights_table( module=M.quantization, table_name="classification_quant", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], - dataset="ImageNet1K", + dataset="ImageNet-1K", ) generate_weights_table( module=M.detection, diff --git a/test/test_extended_models.py b/test/test_extended_models.py index 1d762e687c7..c4e201b764b 100644 --- a/test/test_extended_models.py +++ b/test/test_extended_models.py @@ -93,7 +93,7 @@ def test_schema_meta_validation(model_fn): "_docs", } # mandatory fields for each computer vision task - classification_fields = {"categories", ("_metrics", "ImageNet1K", "acc@1"), ("_metrics", "ImageNet1K", "acc@5")} + classification_fields = {"categories", ("_metrics", "ImageNet-1K", "acc@1"), ("_metrics", "ImageNet-1K", "acc@5")} defaults = { "all": {"_metrics", "min_size", "num_params", "recipe", "_docs"}, "models": classification_fields, diff --git a/torchvision/models/alexnet.py b/torchvision/models/alexnet.py index 808eb3cd3ec..6c461a501c9 100644 --- a/torchvision/models/alexnet.py +++ b/torchvision/models/alexnet.py @@ -62,7 +62,7 @@ class AlexNet_Weights(WeightsEnum): "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#alexnet-and-vgg", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 56.522, "acc@5": 79.066, } diff --git a/torchvision/models/convnext.py b/torchvision/models/convnext.py index 9ad0efe07f6..4cd75690df4 100644 --- a/torchvision/models/convnext.py +++ b/torchvision/models/convnext.py @@ -223,7 +223,7 @@ class ConvNeXt_Tiny_Weights(WeightsEnum): **_COMMON_META, "num_params": 28589128, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 82.520, "acc@5": 96.146, } @@ -241,7 +241,7 @@ class ConvNeXt_Small_Weights(WeightsEnum): **_COMMON_META, "num_params": 50223688, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 83.616, "acc@5": 96.650, } @@ -259,7 +259,7 @@ class ConvNeXt_Base_Weights(WeightsEnum): **_COMMON_META, "num_params": 88591464, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 84.062, "acc@5": 96.870, } @@ -277,7 +277,7 @@ class ConvNeXt_Large_Weights(WeightsEnum): **_COMMON_META, "num_params": 197767336, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 84.414, "acc@5": 96.976, } diff --git a/torchvision/models/densenet.py b/torchvision/models/densenet.py index 276747791a9..e8a66f5771b 100644 --- a/torchvision/models/densenet.py +++ b/torchvision/models/densenet.py @@ -273,7 +273,7 @@ class DenseNet121_Weights(WeightsEnum): **_COMMON_META, "num_params": 7978856, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 74.434, "acc@5": 91.972, } @@ -291,7 +291,7 @@ class DenseNet161_Weights(WeightsEnum): **_COMMON_META, "num_params": 28681000, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 77.138, "acc@5": 93.560, } @@ -309,7 +309,7 @@ class DenseNet169_Weights(WeightsEnum): **_COMMON_META, "num_params": 14149480, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 75.600, "acc@5": 92.806, } @@ -327,7 +327,7 @@ class DenseNet201_Weights(WeightsEnum): **_COMMON_META, "num_params": 20013928, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 76.896, "acc@5": 93.370, } diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index 6574e2ef2bf..bfd59aee951 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -459,7 +459,7 @@ class EfficientNet_B0_Weights(WeightsEnum): **_COMMON_META_V1, "num_params": 5288548, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 77.692, "acc@5": 93.532, } @@ -481,7 +481,7 @@ class EfficientNet_B1_Weights(WeightsEnum): **_COMMON_META_V1, "num_params": 7794184, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 78.642, "acc@5": 94.186, } @@ -499,7 +499,7 @@ class EfficientNet_B1_Weights(WeightsEnum): "num_params": 7794184, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-lr-wd-crop-tuning", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 79.838, "acc@5": 94.934, } @@ -525,7 +525,7 @@ class EfficientNet_B2_Weights(WeightsEnum): **_COMMON_META_V1, "num_params": 9109994, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 80.608, "acc@5": 95.310, } @@ -547,7 +547,7 @@ class EfficientNet_B3_Weights(WeightsEnum): **_COMMON_META_V1, "num_params": 12233232, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 82.008, "acc@5": 96.054, } @@ -569,7 +569,7 @@ class EfficientNet_B4_Weights(WeightsEnum): **_COMMON_META_V1, "num_params": 19341616, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 83.384, "acc@5": 96.594, } @@ -591,7 +591,7 @@ class EfficientNet_B5_Weights(WeightsEnum): **_COMMON_META_V1, "num_params": 30389784, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 83.444, "acc@5": 96.628, } @@ -613,7 +613,7 @@ class EfficientNet_B6_Weights(WeightsEnum): **_COMMON_META_V1, "num_params": 43040704, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 84.008, "acc@5": 96.916, } @@ -635,7 +635,7 @@ class EfficientNet_B7_Weights(WeightsEnum): **_COMMON_META_V1, "num_params": 66347960, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 84.122, "acc@5": 96.908, } @@ -659,7 +659,7 @@ class EfficientNet_V2_S_Weights(WeightsEnum): **_COMMON_META_V2, "num_params": 21458488, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 84.228, "acc@5": 96.878, } @@ -687,7 +687,7 @@ class EfficientNet_V2_M_Weights(WeightsEnum): **_COMMON_META_V2, "num_params": 54139356, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 85.112, "acc@5": 97.156, } @@ -718,7 +718,7 @@ class EfficientNet_V2_L_Weights(WeightsEnum): **_COMMON_META_V2, "num_params": 118515272, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 85.808, "acc@5": 97.788, } diff --git a/torchvision/models/googlenet.py b/torchvision/models/googlenet.py index decea7316fc..5b0a91d4791 100644 --- a/torchvision/models/googlenet.py +++ b/torchvision/models/googlenet.py @@ -285,7 +285,7 @@ class GoogLeNet_Weights(WeightsEnum): "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#googlenet", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 69.778, "acc@5": 89.530, } diff --git a/torchvision/models/inception.py b/torchvision/models/inception.py index 64ba7b79d36..9207485085f 100644 --- a/torchvision/models/inception.py +++ b/torchvision/models/inception.py @@ -417,7 +417,7 @@ class Inception_V3_Weights(WeightsEnum): "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#inception-v3", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 77.294, "acc@5": 93.450, } diff --git a/torchvision/models/mnasnet.py b/torchvision/models/mnasnet.py index 8f64474ff84..8286674d232 100644 --- a/torchvision/models/mnasnet.py +++ b/torchvision/models/mnasnet.py @@ -226,7 +226,7 @@ class MNASNet0_5_Weights(WeightsEnum): **_COMMON_META, "num_params": 2218512, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 67.734, "acc@5": 87.490, } @@ -246,7 +246,7 @@ class MNASNet0_75_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/6019", "num_params": 3170208, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 71.180, "acc@5": 90.496, } @@ -268,7 +268,7 @@ class MNASNet1_0_Weights(WeightsEnum): **_COMMON_META, "num_params": 4383312, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 73.456, "acc@5": 91.510, } @@ -288,7 +288,7 @@ class MNASNet1_3_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/6019", "num_params": 6282256, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 76.506, "acc@5": 93.522, } diff --git a/torchvision/models/mobilenetv2.py b/torchvision/models/mobilenetv2.py index eef904b83fe..4c4a7d1e293 100644 --- a/torchvision/models/mobilenetv2.py +++ b/torchvision/models/mobilenetv2.py @@ -209,7 +209,7 @@ class MobileNet_V2_Weights(WeightsEnum): **_COMMON_META, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv2", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 71.878, "acc@5": 90.286, } @@ -224,7 +224,7 @@ class MobileNet_V2_Weights(WeightsEnum): **_COMMON_META, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-reg-tuning", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 72.154, "acc@5": 90.822, } diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py index c8c7a6b3830..dfdd529bfc2 100644 --- a/torchvision/models/mobilenetv3.py +++ b/torchvision/models/mobilenetv3.py @@ -318,7 +318,7 @@ class MobileNet_V3_Large_Weights(WeightsEnum): "num_params": 5483032, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv3-large--small", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 74.042, "acc@5": 91.340, } @@ -334,7 +334,7 @@ class MobileNet_V3_Large_Weights(WeightsEnum): "num_params": 5483032, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-reg-tuning", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 75.274, "acc@5": 92.566, } @@ -358,7 +358,7 @@ class MobileNet_V3_Small_Weights(WeightsEnum): "num_params": 2542856, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv3-large--small", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 67.668, "acc@5": 87.402, } diff --git a/torchvision/models/quantization/googlenet.py b/torchvision/models/quantization/googlenet.py index fc2acb55cb3..644df8ae496 100644 --- a/torchvision/models/quantization/googlenet.py +++ b/torchvision/models/quantization/googlenet.py @@ -118,7 +118,7 @@ class GoogLeNet_QuantizedWeights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models", "unquantized": GoogLeNet_Weights.IMAGENET1K_V1, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 69.826, "acc@5": 89.404, } diff --git a/torchvision/models/quantization/inception.py b/torchvision/models/quantization/inception.py index 5f6540c658a..ba4b21d4112 100644 --- a/torchvision/models/quantization/inception.py +++ b/torchvision/models/quantization/inception.py @@ -184,7 +184,7 @@ class Inception_V3_QuantizedWeights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models", "unquantized": Inception_V3_Weights.IMAGENET1K_V1, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 77.176, "acc@5": 93.354, } diff --git a/torchvision/models/quantization/mobilenetv2.py b/torchvision/models/quantization/mobilenetv2.py index 73d45a46b0e..936e9bcc1b1 100644 --- a/torchvision/models/quantization/mobilenetv2.py +++ b/torchvision/models/quantization/mobilenetv2.py @@ -76,7 +76,7 @@ class MobileNet_V2_QuantizedWeights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#qat-mobilenetv2", "unquantized": MobileNet_V2_Weights.IMAGENET1K_V1, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 71.658, "acc@5": 90.150, } diff --git a/torchvision/models/quantization/mobilenetv3.py b/torchvision/models/quantization/mobilenetv3.py index ebf572e4300..94036143138 100644 --- a/torchvision/models/quantization/mobilenetv3.py +++ b/torchvision/models/quantization/mobilenetv3.py @@ -170,7 +170,7 @@ class MobileNet_V3_Large_QuantizedWeights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#qat-mobilenetv3", "unquantized": MobileNet_V3_Large_Weights.IMAGENET1K_V1, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 73.004, "acc@5": 90.858, } diff --git a/torchvision/models/quantization/resnet.py b/torchvision/models/quantization/resnet.py index 8a2d712730a..891b608ce01 100644 --- a/torchvision/models/quantization/resnet.py +++ b/torchvision/models/quantization/resnet.py @@ -170,7 +170,7 @@ class ResNet18_QuantizedWeights(WeightsEnum): "num_params": 11689512, "unquantized": ResNet18_Weights.IMAGENET1K_V1, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 69.494, "acc@5": 88.882, } @@ -189,7 +189,7 @@ class ResNet50_QuantizedWeights(WeightsEnum): "num_params": 25557032, "unquantized": ResNet50_Weights.IMAGENET1K_V1, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 75.920, "acc@5": 92.814, } @@ -204,7 +204,7 @@ class ResNet50_QuantizedWeights(WeightsEnum): "num_params": 25557032, "unquantized": ResNet50_Weights.IMAGENET1K_V2, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 80.282, "acc@5": 94.976, } @@ -223,7 +223,7 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum): "num_params": 88791336, "unquantized": ResNeXt101_32X8D_Weights.IMAGENET1K_V1, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 78.986, "acc@5": 94.480, } @@ -238,7 +238,7 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum): "num_params": 88791336, "unquantized": ResNeXt101_32X8D_Weights.IMAGENET1K_V2, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 82.574, "acc@5": 96.132, } @@ -258,7 +258,7 @@ class ResNeXt101_64X4D_QuantizedWeights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5935", "unquantized": ResNeXt101_64X4D_Weights.IMAGENET1K_V1, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 82.898, "acc@5": 96.326, } diff --git a/torchvision/models/quantization/shufflenetv2.py b/torchvision/models/quantization/shufflenetv2.py index 55f70ba4d87..781591ae118 100644 --- a/torchvision/models/quantization/shufflenetv2.py +++ b/torchvision/models/quantization/shufflenetv2.py @@ -134,7 +134,7 @@ class ShuffleNet_V2_X0_5_QuantizedWeights(WeightsEnum): "num_params": 1366792, "unquantized": ShuffleNet_V2_X0_5_Weights.IMAGENET1K_V1, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 57.972, "acc@5": 79.780, } @@ -153,7 +153,7 @@ class ShuffleNet_V2_X1_0_QuantizedWeights(WeightsEnum): "num_params": 2278604, "unquantized": ShuffleNet_V2_X1_0_Weights.IMAGENET1K_V1, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 68.360, "acc@5": 87.582, } @@ -173,7 +173,7 @@ class ShuffleNet_V2_X1_5_QuantizedWeights(WeightsEnum): "num_params": 3503624, "unquantized": ShuffleNet_V2_X1_5_Weights.IMAGENET1K_V1, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 72.052, "acc@5": 90.700, } @@ -193,7 +193,7 @@ class ShuffleNet_V2_X2_0_QuantizedWeights(WeightsEnum): "num_params": 7393996, "unquantized": ShuffleNet_V2_X2_0_Weights.IMAGENET1K_V1, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 75.354, "acc@5": 92.488, } diff --git a/torchvision/models/regnet.py b/torchvision/models/regnet.py index 88a19cbf4fe..d2958e8686c 100644 --- a/torchvision/models/regnet.py +++ b/torchvision/models/regnet.py @@ -423,7 +423,7 @@ class RegNet_Y_400MF_Weights(WeightsEnum): "num_params": 4344144, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 74.046, "acc@5": 91.716, } @@ -439,7 +439,7 @@ class RegNet_Y_400MF_Weights(WeightsEnum): "num_params": 4344144, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 75.804, "acc@5": 92.742, } @@ -463,7 +463,7 @@ class RegNet_Y_800MF_Weights(WeightsEnum): "num_params": 6432512, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 76.420, "acc@5": 93.136, } @@ -479,7 +479,7 @@ class RegNet_Y_800MF_Weights(WeightsEnum): "num_params": 6432512, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 78.828, "acc@5": 94.502, } @@ -503,7 +503,7 @@ class RegNet_Y_1_6GF_Weights(WeightsEnum): "num_params": 11202430, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 77.950, "acc@5": 93.966, } @@ -519,7 +519,7 @@ class RegNet_Y_1_6GF_Weights(WeightsEnum): "num_params": 11202430, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 80.876, "acc@5": 95.444, } @@ -543,7 +543,7 @@ class RegNet_Y_3_2GF_Weights(WeightsEnum): "num_params": 19436338, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 78.948, "acc@5": 94.576, } @@ -559,7 +559,7 @@ class RegNet_Y_3_2GF_Weights(WeightsEnum): "num_params": 19436338, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 81.982, "acc@5": 95.972, } @@ -583,7 +583,7 @@ class RegNet_Y_8GF_Weights(WeightsEnum): "num_params": 39381472, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 80.032, "acc@5": 95.048, } @@ -599,7 +599,7 @@ class RegNet_Y_8GF_Weights(WeightsEnum): "num_params": 39381472, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 82.828, "acc@5": 96.330, } @@ -623,7 +623,7 @@ class RegNet_Y_16GF_Weights(WeightsEnum): "num_params": 83590140, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#large-models", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 80.424, "acc@5": 95.240, } @@ -639,7 +639,7 @@ class RegNet_Y_16GF_Weights(WeightsEnum): "num_params": 83590140, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 82.886, "acc@5": 96.328, } @@ -660,7 +660,7 @@ class RegNet_Y_16GF_Weights(WeightsEnum): **_COMMON_SWAG_META, "num_params": 83590140, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 86.012, "acc@5": 98.054, } @@ -681,7 +681,7 @@ class RegNet_Y_16GF_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 83590140, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 83.976, "acc@5": 97.244, } @@ -704,7 +704,7 @@ class RegNet_Y_32GF_Weights(WeightsEnum): "num_params": 145046770, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#large-models", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 80.878, "acc@5": 95.340, } @@ -720,7 +720,7 @@ class RegNet_Y_32GF_Weights(WeightsEnum): "num_params": 145046770, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 83.368, "acc@5": 96.498, } @@ -741,7 +741,7 @@ class RegNet_Y_32GF_Weights(WeightsEnum): **_COMMON_SWAG_META, "num_params": 145046770, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 86.838, "acc@5": 98.362, } @@ -762,7 +762,7 @@ class RegNet_Y_32GF_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 145046770, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 84.622, "acc@5": 97.480, } @@ -786,7 +786,7 @@ class RegNet_Y_128GF_Weights(WeightsEnum): **_COMMON_SWAG_META, "num_params": 644812894, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 88.228, "acc@5": 98.682, } @@ -807,7 +807,7 @@ class RegNet_Y_128GF_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 644812894, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 86.068, "acc@5": 97.844, } @@ -830,7 +830,7 @@ class RegNet_X_400MF_Weights(WeightsEnum): "num_params": 5495976, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 72.834, "acc@5": 90.950, } @@ -846,7 +846,7 @@ class RegNet_X_400MF_Weights(WeightsEnum): "num_params": 5495976, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 74.864, "acc@5": 92.322, } @@ -870,7 +870,7 @@ class RegNet_X_800MF_Weights(WeightsEnum): "num_params": 7259656, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 75.212, "acc@5": 92.348, } @@ -886,7 +886,7 @@ class RegNet_X_800MF_Weights(WeightsEnum): "num_params": 7259656, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 77.522, "acc@5": 93.826, } @@ -910,7 +910,7 @@ class RegNet_X_1_6GF_Weights(WeightsEnum): "num_params": 9190136, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#small-models", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 77.040, "acc@5": 93.440, } @@ -926,7 +926,7 @@ class RegNet_X_1_6GF_Weights(WeightsEnum): "num_params": 9190136, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 79.668, "acc@5": 94.922, } @@ -950,7 +950,7 @@ class RegNet_X_3_2GF_Weights(WeightsEnum): "num_params": 15296552, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 78.364, "acc@5": 93.992, } @@ -966,7 +966,7 @@ class RegNet_X_3_2GF_Weights(WeightsEnum): "num_params": 15296552, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 81.196, "acc@5": 95.430, } @@ -990,7 +990,7 @@ class RegNet_X_8GF_Weights(WeightsEnum): "num_params": 39572648, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 79.344, "acc@5": 94.686, } @@ -1006,7 +1006,7 @@ class RegNet_X_8GF_Weights(WeightsEnum): "num_params": 39572648, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 81.682, "acc@5": 95.678, } @@ -1030,7 +1030,7 @@ class RegNet_X_16GF_Weights(WeightsEnum): "num_params": 54278536, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#medium-models", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 80.058, "acc@5": 94.944, } @@ -1046,7 +1046,7 @@ class RegNet_X_16GF_Weights(WeightsEnum): "num_params": 54278536, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 82.716, "acc@5": 96.196, } @@ -1070,7 +1070,7 @@ class RegNet_X_32GF_Weights(WeightsEnum): "num_params": 107811560, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#large-models", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 80.622, "acc@5": 95.248, } @@ -1086,7 +1086,7 @@ class RegNet_X_32GF_Weights(WeightsEnum): "num_params": 107811560, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 83.014, "acc@5": 96.288, } diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py index 9ff97cbdc79..39662b1cc0a 100644 --- a/torchvision/models/resnet.py +++ b/torchvision/models/resnet.py @@ -318,7 +318,7 @@ class ResNet18_Weights(WeightsEnum): "num_params": 11689512, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 69.758, "acc@5": 89.078, } @@ -338,7 +338,7 @@ class ResNet34_Weights(WeightsEnum): "num_params": 21797672, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 73.314, "acc@5": 91.420, } @@ -358,7 +358,7 @@ class ResNet50_Weights(WeightsEnum): "num_params": 25557032, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 76.130, "acc@5": 92.862, } @@ -374,7 +374,7 @@ class ResNet50_Weights(WeightsEnum): "num_params": 25557032, "recipe": "https://github.com/pytorch/vision/issues/3995#issuecomment-1013906621", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 80.858, "acc@5": 95.434, } @@ -397,7 +397,7 @@ class ResNet101_Weights(WeightsEnum): "num_params": 44549160, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 77.374, "acc@5": 93.546, } @@ -413,7 +413,7 @@ class ResNet101_Weights(WeightsEnum): "num_params": 44549160, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 81.886, "acc@5": 95.780, } @@ -436,7 +436,7 @@ class ResNet152_Weights(WeightsEnum): "num_params": 60192808, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 78.312, "acc@5": 94.046, } @@ -452,7 +452,7 @@ class ResNet152_Weights(WeightsEnum): "num_params": 60192808, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 82.284, "acc@5": 96.002, } @@ -475,7 +475,7 @@ class ResNeXt50_32X4D_Weights(WeightsEnum): "num_params": 25028904, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnext", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 77.618, "acc@5": 93.698, } @@ -491,7 +491,7 @@ class ResNeXt50_32X4D_Weights(WeightsEnum): "num_params": 25028904, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 81.198, "acc@5": 95.340, } @@ -514,7 +514,7 @@ class ResNeXt101_32X8D_Weights(WeightsEnum): "num_params": 88791336, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnext", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 79.312, "acc@5": 94.526, } @@ -530,7 +530,7 @@ class ResNeXt101_32X8D_Weights(WeightsEnum): "num_params": 88791336, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 82.834, "acc@5": 96.228, } @@ -553,7 +553,7 @@ class ResNeXt101_64X4D_Weights(WeightsEnum): "num_params": 83455272, "recipe": "https://github.com/pytorch/vision/pull/5935", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 83.246, "acc@5": 96.454, } @@ -576,7 +576,7 @@ class Wide_ResNet50_2_Weights(WeightsEnum): "num_params": 68883240, "recipe": "https://github.com/pytorch/vision/pull/912#issue-445437439", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 78.468, "acc@5": 94.086, } @@ -592,7 +592,7 @@ class Wide_ResNet50_2_Weights(WeightsEnum): "num_params": 68883240, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 81.602, "acc@5": 95.758, } @@ -615,7 +615,7 @@ class Wide_ResNet101_2_Weights(WeightsEnum): "num_params": 126886696, "recipe": "https://github.com/pytorch/vision/pull/912#issue-445437439", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 78.848, "acc@5": 94.284, } @@ -631,7 +631,7 @@ class Wide_ResNet101_2_Weights(WeightsEnum): "num_params": 126886696, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 82.510, "acc@5": 96.020, } diff --git a/torchvision/models/shufflenetv2.py b/torchvision/models/shufflenetv2.py index 6c4a1a1b555..48695c70193 100644 --- a/torchvision/models/shufflenetv2.py +++ b/torchvision/models/shufflenetv2.py @@ -199,7 +199,7 @@ class ShuffleNet_V2_X0_5_Weights(WeightsEnum): **_COMMON_META, "num_params": 1366792, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 60.552, "acc@5": 81.746, } @@ -219,7 +219,7 @@ class ShuffleNet_V2_X1_0_Weights(WeightsEnum): **_COMMON_META, "num_params": 2278604, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 69.362, "acc@5": 88.316, } @@ -239,7 +239,7 @@ class ShuffleNet_V2_X1_5_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5906", "num_params": 3503624, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 72.996, "acc@5": 91.086, } @@ -262,7 +262,7 @@ class ShuffleNet_V2_X2_0_Weights(WeightsEnum): "recipe": "https://github.com/pytorch/vision/pull/5906", "num_params": 7393996, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 76.230, "acc@5": 93.006, } diff --git a/torchvision/models/squeezenet.py b/torchvision/models/squeezenet.py index 52411c6d1dc..dbc0f54fb77 100644 --- a/torchvision/models/squeezenet.py +++ b/torchvision/models/squeezenet.py @@ -130,7 +130,7 @@ class SqueezeNet1_0_Weights(WeightsEnum): "min_size": (21, 21), "num_params": 1248424, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 58.092, "acc@5": 80.420, } @@ -149,7 +149,7 @@ class SqueezeNet1_1_Weights(WeightsEnum): "min_size": (17, 17), "num_params": 1235496, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 58.178, "acc@5": 80.624, } diff --git a/torchvision/models/swin_transformer.py b/torchvision/models/swin_transformer.py index a9d8660ff60..69a0d5fd2fd 100644 --- a/torchvision/models/swin_transformer.py +++ b/torchvision/models/swin_transformer.py @@ -418,7 +418,7 @@ class Swin_T_Weights(WeightsEnum): "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swintransformer", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 81.358, "acc@5": 95.526, } diff --git a/torchvision/models/vgg.py b/torchvision/models/vgg.py index 1c3abaeb238..937458b48cd 100644 --- a/torchvision/models/vgg.py +++ b/torchvision/models/vgg.py @@ -122,7 +122,7 @@ class VGG11_Weights(WeightsEnum): **_COMMON_META, "num_params": 132863336, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 69.020, "acc@5": 88.628, } @@ -140,7 +140,7 @@ class VGG11_BN_Weights(WeightsEnum): **_COMMON_META, "num_params": 132868840, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 70.370, "acc@5": 89.810, } @@ -158,7 +158,7 @@ class VGG13_Weights(WeightsEnum): **_COMMON_META, "num_params": 133047848, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 69.928, "acc@5": 89.246, } @@ -176,7 +176,7 @@ class VGG13_BN_Weights(WeightsEnum): **_COMMON_META, "num_params": 133053736, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 71.586, "acc@5": 90.374, } @@ -194,7 +194,7 @@ class VGG16_Weights(WeightsEnum): **_COMMON_META, "num_params": 138357544, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 71.592, "acc@5": 90.382, } @@ -216,7 +216,7 @@ class VGG16_Weights(WeightsEnum): "categories": None, "recipe": "https://github.com/amdegroot/ssd.pytorch#training-ssd", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": float("nan"), "acc@5": float("nan"), } @@ -239,7 +239,7 @@ class VGG16_BN_Weights(WeightsEnum): **_COMMON_META, "num_params": 138365992, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 73.360, "acc@5": 91.516, } @@ -257,7 +257,7 @@ class VGG19_Weights(WeightsEnum): **_COMMON_META, "num_params": 143667240, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 72.376, "acc@5": 90.876, } @@ -275,7 +275,7 @@ class VGG19_BN_Weights(WeightsEnum): **_COMMON_META, "num_params": 143678248, "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 74.218, "acc@5": 91.842, } diff --git a/torchvision/models/vision_transformer.py b/torchvision/models/vision_transformer.py index be79994cda8..dad2804e626 100644 --- a/torchvision/models/vision_transformer.py +++ b/torchvision/models/vision_transformer.py @@ -329,7 +329,7 @@ class ViT_B_16_Weights(WeightsEnum): "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_16", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 81.072, "acc@5": 95.318, } @@ -353,7 +353,7 @@ class ViT_B_16_Weights(WeightsEnum): "num_params": 86859496, "min_size": (384, 384), "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 85.304, "acc@5": 97.650, } @@ -378,7 +378,7 @@ class ViT_B_16_Weights(WeightsEnum): "num_params": 86567656, "min_size": (224, 224), "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 81.886, "acc@5": 96.180, } @@ -402,7 +402,7 @@ class ViT_B_32_Weights(WeightsEnum): "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_32", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 75.912, "acc@5": 92.466, } @@ -426,7 +426,7 @@ class ViT_L_16_Weights(WeightsEnum): "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_16", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 79.662, "acc@5": 94.638, } @@ -451,7 +451,7 @@ class ViT_L_16_Weights(WeightsEnum): "num_params": 305174504, "min_size": (512, 512), "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 88.064, "acc@5": 98.512, } @@ -476,7 +476,7 @@ class ViT_L_16_Weights(WeightsEnum): "num_params": 304326632, "min_size": (224, 224), "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 85.146, "acc@5": 97.422, } @@ -500,7 +500,7 @@ class ViT_L_32_Weights(WeightsEnum): "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_32", "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 76.972, "acc@5": 93.07, } @@ -528,7 +528,7 @@ class ViT_H_14_Weights(WeightsEnum): "num_params": 633470440, "min_size": (518, 518), "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 88.552, "acc@5": 98.694, } @@ -553,7 +553,7 @@ class ViT_H_14_Weights(WeightsEnum): "num_params": 632045800, "min_size": (224, 224), "_metrics": { - "ImageNet1K": { + "ImageNet-1K": { "acc@1": 85.708, "acc@5": 97.730, } From b39fbdb26bb9d7d13696fc025d95ebb5d1880115 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 18 May 2022 16:01:50 +0100 Subject: [PATCH 12/13] Use COCO-val2017-VOC-labels for segmentation --- docs/source/conf.py | 2 +- test/test_extended_models.py | 2 +- torchvision/models/segmentation/deeplabv3.py | 6 +++--- torchvision/models/segmentation/fcn.py | 4 ++-- torchvision/models/segmentation/lraspp.py | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index e4453f76c03..014eb3c3ae9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -439,7 +439,7 @@ def generate_weights_table(module, table_name, metrics, dataset, include_pattern module=M.segmentation, table_name="segmentation", metrics=[("miou", "Mean IoU"), ("pixel_acc", "pixelwise Acc")], - dataset="COCO-val2017", + dataset="COCO-val2017-VOC-labels", ) generate_weights_table( module=M.video, table_name="video", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")], dataset="Kinetics-400" diff --git a/test/test_extended_models.py b/test/test_extended_models.py index c4e201b764b..0e9c121616b 100644 --- a/test/test_extended_models.py +++ b/test/test_extended_models.py @@ -99,7 +99,7 @@ def test_schema_meta_validation(model_fn): "models": classification_fields, "detection": {"categories", ("_metrics", "COCO-val2017", "box_map")}, "quantization": classification_fields | {"backend", "unquantized"}, - "segmentation": {"categories", ("_metrics", "COCO-val2017", "miou"), ("_metrics", "COCO-val2017", "pixel_acc")}, + "segmentation": {"categories", ("_metrics", "COCO-val2017-VOC-labels", "miou"), ("_metrics", "COCO-val2017-VOC-labels", "pixel_acc")}, "video": {"categories", ("_metrics", "Kinetics-400", "acc@1"), ("_metrics", "Kinetics-400", "acc@5")}, "optical_flow": set(), } diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index 8f3b7630992..78f54cdc6d7 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -147,7 +147,7 @@ class DeepLabV3_ResNet50_Weights(WeightsEnum): "num_params": 42004074, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_resnet50", "_metrics": { - "COCO-val2017": { + "COCO-val2017-VOC-labels": { "miou": 66.4, "pixel_acc": 92.4, } @@ -166,7 +166,7 @@ class DeepLabV3_ResNet101_Weights(WeightsEnum): "num_params": 60996202, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#fcn_resnet101", "_metrics": { - "COCO-val2017": { + "COCO-val2017-VOC-labels": { "miou": 67.4, "pixel_acc": 92.4, } @@ -185,7 +185,7 @@ class DeepLabV3_MobileNet_V3_Large_Weights(WeightsEnum): "num_params": 11029328, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_mobilenet_v3_large", "_metrics": { - "COCO-val2017": { + "COCO-val2017-VOC-labels": { "miou": 60.3, "pixel_acc": 91.2, } diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index 9690734003f..a1dd48c2f58 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -66,7 +66,7 @@ class FCN_ResNet50_Weights(WeightsEnum): "num_params": 35322218, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#fcn_resnet50", "_metrics": { - "COCO-val2017": { + "COCO-val2017-VOC-labels": { "miou": 60.5, "pixel_acc": 91.4, } @@ -85,7 +85,7 @@ class FCN_ResNet101_Weights(WeightsEnum): "num_params": 54314346, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_resnet101", "_metrics": { - "COCO-val2017": { + "COCO-val2017-VOC-labels": { "miou": 63.7, "pixel_acc": 91.9, } diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py index 33d005bdec3..ec4eba6eefc 100644 --- a/torchvision/models/segmentation/lraspp.py +++ b/torchvision/models/segmentation/lraspp.py @@ -103,7 +103,7 @@ class LRASPP_MobileNet_V3_Large_Weights(WeightsEnum): "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#lraspp_mobilenet_v3_large", "_metrics": { - "COCO-val2017": { + "COCO-val2017-VOC-labels": { "miou": 57.9, "pixel_acc": 91.2, } From 69339b125ea6a825e23794b82b16188711013760 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 18 May 2022 16:03:25 +0100 Subject: [PATCH 13/13] formatting --- test/test_extended_models.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_extended_models.py b/test/test_extended_models.py index 0e9c121616b..408a8c0514c 100644 --- a/test/test_extended_models.py +++ b/test/test_extended_models.py @@ -99,7 +99,11 @@ def test_schema_meta_validation(model_fn): "models": classification_fields, "detection": {"categories", ("_metrics", "COCO-val2017", "box_map")}, "quantization": classification_fields | {"backend", "unquantized"}, - "segmentation": {"categories", ("_metrics", "COCO-val2017-VOC-labels", "miou"), ("_metrics", "COCO-val2017-VOC-labels", "pixel_acc")}, + "segmentation": { + "categories", + ("_metrics", "COCO-val2017-VOC-labels", "miou"), + ("_metrics", "COCO-val2017-VOC-labels", "pixel_acc"), + }, "video": {"categories", ("_metrics", "Kinetics-400", "acc@1"), ("_metrics", "Kinetics-400", "acc@5")}, "optical_flow": set(), }