From 1efef05a079dbfcd476b13062478fe7c3ef8cccd Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 21 Apr 2022 12:46:36 +0100 Subject: [PATCH 01/10] Removing `task`, `architecture` and `quantization` --- test/test_extended_models.py | 4 ++-- torchvision/models/alexnet.py | 2 -- torchvision/models/convnext.py | 2 -- torchvision/models/densenet.py | 2 -- torchvision/models/detection/faster_rcnn.py | 2 -- torchvision/models/detection/fcos.py | 2 -- torchvision/models/detection/keypoint_rcnn.py | 2 -- torchvision/models/detection/mask_rcnn.py | 2 -- torchvision/models/detection/retinanet.py | 2 -- torchvision/models/detection/ssd.py | 2 -- torchvision/models/detection/ssdlite.py | 2 -- torchvision/models/efficientnet.py | 3 --- torchvision/models/googlenet.py | 2 -- torchvision/models/inception.py | 2 -- torchvision/models/mnasnet.py | 2 -- torchvision/models/mobilenetv2.py | 2 -- torchvision/models/mobilenetv3.py | 2 -- torchvision/models/optical_flow/raft.py | 14 -------------- torchvision/models/quantization/googlenet.py | 3 --- torchvision/models/quantization/inception.py | 3 --- torchvision/models/quantization/mobilenetv2.py | 3 --- torchvision/models/quantization/mobilenetv3.py | 3 --- torchvision/models/quantization/resnet.py | 7 ------- torchvision/models/quantization/shufflenetv2.py | 3 --- torchvision/models/regnet.py | 2 -- torchvision/models/resnet.py | 17 ----------------- torchvision/models/segmentation/deeplabv3.py | 2 -- torchvision/models/segmentation/fcn.py | 2 -- torchvision/models/segmentation/lraspp.py | 2 -- torchvision/models/shufflenetv2.py | 2 -- torchvision/models/squeezenet.py | 2 -- torchvision/models/vgg.py | 2 -- torchvision/models/video/resnet.py | 4 ---- torchvision/models/vision_transformer.py | 2 -- 34 files changed, 2 insertions(+), 108 deletions(-) diff --git a/test/test_extended_models.py b/test/test_extended_models.py index a07f741c9f7..f30e04fe433 100644 --- a/test/test_extended_models.py +++ b/test/test_extended_models.py @@ -81,10 +81,10 @@ def test_naming_conventions(model_fn): def test_schema_meta_validation(model_fn): classification_fields = ["size", "categories", "acc@1", "acc@5", "min_size"] defaults = { - "all": ["task", "architecture", "recipe", "num_params"], + "all": ["recipe", "num_params"], "models": classification_fields, "detection": ["categories", "map"], - "quantization": classification_fields + ["backend", "quantization", "unquantized"], + "quantization": classification_fields + ["backend", "unquantized"], "segmentation": ["categories", "mIoU", "acc"], "video": classification_fields, "optical_flow": [], diff --git a/torchvision/models/alexnet.py b/torchvision/models/alexnet.py index 0128b82b08a..f1107405fa2 100644 --- a/torchvision/models/alexnet.py +++ b/torchvision/models/alexnet.py @@ -57,8 +57,6 @@ class AlexNet_Weights(WeightsEnum): url="https://download.pytorch.org/models/alexnet-owt-7be5be79.pth", transforms=partial(ImageClassification, crop_size=224), meta={ - "task": "image_classification", - "architecture": "AlexNet", "num_params": 61100840, "size": (224, 224), "min_size": (63, 63), diff --git a/torchvision/models/convnext.py b/torchvision/models/convnext.py index 8720c590ede..29e5c6a2d24 100644 --- a/torchvision/models/convnext.py +++ b/torchvision/models/convnext.py @@ -204,8 +204,6 @@ def _convnext( _COMMON_META = { - "task": "image_classification", - "architecture": "ConvNeXt", "size": (224, 224), "min_size": (32, 32), "categories": _IMAGENET_CATEGORIES, diff --git a/torchvision/models/densenet.py b/torchvision/models/densenet.py index 7bc53ad2679..42e7a80d341 100644 --- a/torchvision/models/densenet.py +++ b/torchvision/models/densenet.py @@ -266,8 +266,6 @@ def _densenet( _COMMON_META = { - "task": "image_classification", - "architecture": "DenseNet", "size": (224, 224), "min_size": (29, 29), "categories": _IMAGENET_CATEGORIES, diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index ce794b0ed76..a0ae56965ab 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -370,8 +370,6 @@ def forward(self, x): _COMMON_META = { - "task": "image_object_detection", - "architecture": "FasterRCNN", "categories": _COCO_CATEGORIES, } diff --git a/torchvision/models/detection/fcos.py b/torchvision/models/detection/fcos.py index 3bb9a35f517..5cd8eab2c88 100644 --- a/torchvision/models/detection/fcos.py +++ b/torchvision/models/detection/fcos.py @@ -651,8 +651,6 @@ class FCOS_ResNet50_FPN_Weights(WeightsEnum): url="https://download.pytorch.org/models/fcos_resnet50_fpn_coco-99b0c9b7.pth", transforms=ObjectDetection, meta={ - "task": "image_object_detection", - "architecture": "FCOS", "num_params": 32269600, "categories": _COCO_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#fcos-resnet-50-fpn", diff --git a/torchvision/models/detection/keypoint_rcnn.py b/torchvision/models/detection/keypoint_rcnn.py index 469d97b3f2f..44d8133c496 100644 --- a/torchvision/models/detection/keypoint_rcnn.py +++ b/torchvision/models/detection/keypoint_rcnn.py @@ -308,8 +308,6 @@ def forward(self, x): _COMMON_META = { - "task": "image_object_detection", - "architecture": "KeypointRCNN", "categories": _COCO_PERSON_CATEGORIES, "keypoint_names": _COCO_PERSON_KEYPOINT_NAMES, } diff --git a/torchvision/models/detection/mask_rcnn.py b/torchvision/models/detection/mask_rcnn.py index ae6b0aa81f0..e9b3a69be0e 100644 --- a/torchvision/models/detection/mask_rcnn.py +++ b/torchvision/models/detection/mask_rcnn.py @@ -351,8 +351,6 @@ def __init__(self, in_channels, dim_reduced, num_classes): _COMMON_META = { - "task": "image_object_detection", - "architecture": "MaskRCNN", "categories": _COCO_CATEGORIES, } diff --git a/torchvision/models/detection/retinanet.py b/torchvision/models/detection/retinanet.py index 247e9cdc0d4..5c74bb429ff 100644 --- a/torchvision/models/detection/retinanet.py +++ b/torchvision/models/detection/retinanet.py @@ -674,8 +674,6 @@ def forward(self, images, targets=None): _COMMON_META = { - "task": "image_object_detection", - "architecture": "RetinaNet", "categories": _COCO_CATEGORIES, } diff --git a/torchvision/models/detection/ssd.py b/torchvision/models/detection/ssd.py index d4e26e1517b..0da6e6444b9 100644 --- a/torchvision/models/detection/ssd.py +++ b/torchvision/models/detection/ssd.py @@ -30,8 +30,6 @@ class SSD300_VGG16_Weights(WeightsEnum): url="https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth", transforms=ObjectDetection, meta={ - "task": "image_object_detection", - "architecture": "SSD", "num_params": 35641826, "size": (300, 300), "categories": _COCO_CATEGORIES, diff --git a/torchvision/models/detection/ssdlite.py b/torchvision/models/detection/ssdlite.py index d55991f48d5..1e6b3f8dd1f 100644 --- a/torchvision/models/detection/ssdlite.py +++ b/torchvision/models/detection/ssdlite.py @@ -189,8 +189,6 @@ class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum): url="https://download.pytorch.org/models/ssdlite320_mobilenet_v3_large_coco-a79551df.pth", transforms=ObjectDetection, meta={ - "task": "image_object_detection", - "architecture": "SSDLite", "num_params": 3440060, "size": (320, 320), "categories": _COCO_CATEGORIES, diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index a5519c60824..6bff3342f51 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -430,7 +430,6 @@ def _efficientnet_conf( _COMMON_META = { - "task": "image_classification", "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet", } @@ -438,14 +437,12 @@ def _efficientnet_conf( _COMMON_META_V1 = { **_COMMON_META, - "architecture": "EfficientNet", "min_size": (1, 1), } _COMMON_META_V2 = { **_COMMON_META, - "architecture": "EfficientNetV2", "min_size": (33, 33), } diff --git a/torchvision/models/googlenet.py b/torchvision/models/googlenet.py index a84d394d8d6..bc330153194 100644 --- a/torchvision/models/googlenet.py +++ b/torchvision/models/googlenet.py @@ -280,8 +280,6 @@ class GoogLeNet_Weights(WeightsEnum): url="https://download.pytorch.org/models/googlenet-1378be20.pth", transforms=partial(ImageClassification, crop_size=224), meta={ - "task": "image_classification", - "architecture": "GoogLeNet", "num_params": 6624904, "size": (224, 224), "min_size": (15, 15), diff --git a/torchvision/models/inception.py b/torchvision/models/inception.py index a1997eb8dbd..a97a9955b2d 100644 --- a/torchvision/models/inception.py +++ b/torchvision/models/inception.py @@ -412,8 +412,6 @@ class Inception_V3_Weights(WeightsEnum): url="https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth", transforms=partial(ImageClassification, crop_size=299, resize_size=342), meta={ - "task": "image_classification", - "architecture": "InceptionV3", "num_params": 27161264, "size": (299, 299), "min_size": (75, 75), diff --git a/torchvision/models/mnasnet.py b/torchvision/models/mnasnet.py index 7517d3c9772..d13735baea0 100644 --- a/torchvision/models/mnasnet.py +++ b/torchvision/models/mnasnet.py @@ -212,8 +212,6 @@ def _load_from_state_dict( _COMMON_META = { - "task": "image_classification", - "architecture": "MNASNet", "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, diff --git a/torchvision/models/mobilenetv2.py b/torchvision/models/mobilenetv2.py index cf1580e3b75..9e28f6511ce 100644 --- a/torchvision/models/mobilenetv2.py +++ b/torchvision/models/mobilenetv2.py @@ -195,8 +195,6 @@ def forward(self, x: Tensor) -> Tensor: _COMMON_META = { - "task": "image_classification", - "architecture": "MobileNetV2", "num_params": 3504872, "size": (224, 224), "min_size": (1, 1), diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py index 8c0eb4c3223..eae48c4682e 100644 --- a/torchvision/models/mobilenetv3.py +++ b/torchvision/models/mobilenetv3.py @@ -304,8 +304,6 @@ def _mobilenet_v3( _COMMON_META = { - "task": "image_classification", - "architecture": "MobileNetV3", "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, diff --git a/torchvision/models/optical_flow/raft.py b/torchvision/models/optical_flow/raft.py index 3a3d0334593..1bce5118429 100644 --- a/torchvision/models/optical_flow/raft.py +++ b/torchvision/models/optical_flow/raft.py @@ -511,19 +511,12 @@ def forward(self, image1, image2, num_flow_updates: int = 12): return flow_predictions -_COMMON_META = { - "task": "optical_flow", - "architecture": "RAFT", -} - - class Raft_Large_Weights(WeightsEnum): C_T_V1 = Weights( # Chairs + Things, ported from original paper repo (raft-things.pth) url="https://download.pytorch.org/models/raft_large_C_T_V1-22a6c225.pth", transforms=OpticalFlow, meta={ - **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", "sintel_train_cleanpass_epe": 1.4411, @@ -538,7 +531,6 @@ class Raft_Large_Weights(WeightsEnum): url="https://download.pytorch.org/models/raft_large_C_T_V2-1bb1363a.pth", transforms=OpticalFlow, meta={ - **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", "sintel_train_cleanpass_epe": 1.3822, @@ -553,7 +545,6 @@ class Raft_Large_Weights(WeightsEnum): url="https://download.pytorch.org/models/raft_large_C_T_SKHT_V1-0b8c9e55.pth", transforms=OpticalFlow, meta={ - **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", "sintel_test_cleanpass_epe": 1.94, @@ -568,7 +559,6 @@ class Raft_Large_Weights(WeightsEnum): url="https://download.pytorch.org/models/raft_large_C_T_SKHT_V2-ff5fadd5.pth", transforms=OpticalFlow, meta={ - **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", "sintel_test_cleanpass_epe": 1.819, @@ -581,7 +571,6 @@ class Raft_Large_Weights(WeightsEnum): url="https://download.pytorch.org/models/raft_large_C_T_SKHT_K_V1-4a6a5039.pth", transforms=OpticalFlow, meta={ - **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", "kitti_test_f1-all": 5.10, @@ -596,7 +585,6 @@ class Raft_Large_Weights(WeightsEnum): url="https://download.pytorch.org/models/raft_large_C_T_SKHT_K_V2-b5c70766.pth", transforms=OpticalFlow, meta={ - **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", "kitti_test_f1-all": 5.19, @@ -612,7 +600,6 @@ class Raft_Small_Weights(WeightsEnum): url="https://download.pytorch.org/models/raft_small_C_T_V1-ad48884c.pth", transforms=OpticalFlow, meta={ - **_COMMON_META, "num_params": 990162, "recipe": "https://github.com/princeton-vl/RAFT", "sintel_train_cleanpass_epe": 2.1231, @@ -626,7 +613,6 @@ class Raft_Small_Weights(WeightsEnum): url="https://download.pytorch.org/models/raft_small_C_T_V2-01064c6d.pth", transforms=OpticalFlow, meta={ - **_COMMON_META, "num_params": 990162, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", "sintel_train_cleanpass_epe": 1.9901, diff --git a/torchvision/models/quantization/googlenet.py b/torchvision/models/quantization/googlenet.py index 37a420a0a10..0e13eb8ee8a 100644 --- a/torchvision/models/quantization/googlenet.py +++ b/torchvision/models/quantization/googlenet.py @@ -111,14 +111,11 @@ class GoogLeNet_QuantizedWeights(WeightsEnum): url="https://download.pytorch.org/models/quantized/googlenet_fbgemm-c00238cf.pth", transforms=partial(ImageClassification, crop_size=224), meta={ - "task": "image_classification", - "architecture": "GoogLeNet", "num_params": 6624904, "size": (224, 224), "min_size": (15, 15), "categories": _IMAGENET_CATEGORIES, "backend": "fbgemm", - "quantization": "Post Training Quantization", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models", "unquantized": GoogLeNet_Weights.IMAGENET1K_V1, "acc@1": 69.826, diff --git a/torchvision/models/quantization/inception.py b/torchvision/models/quantization/inception.py index 288be0068ca..06a3eb1d869 100644 --- a/torchvision/models/quantization/inception.py +++ b/torchvision/models/quantization/inception.py @@ -177,14 +177,11 @@ class Inception_V3_QuantizedWeights(WeightsEnum): url="https://download.pytorch.org/models/quantized/inception_v3_google_fbgemm-71447a44.pth", transforms=partial(ImageClassification, crop_size=299, resize_size=342), meta={ - "task": "image_classification", - "architecture": "InceptionV3", "num_params": 27161264, "size": (299, 299), "min_size": (75, 75), "categories": _IMAGENET_CATEGORIES, "backend": "fbgemm", - "quantization": "Post Training Quantization", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models", "unquantized": Inception_V3_Weights.IMAGENET1K_V1, "acc@1": 77.176, diff --git a/torchvision/models/quantization/mobilenetv2.py b/torchvision/models/quantization/mobilenetv2.py index 8c78ebcfd3b..39cc9e3689d 100644 --- a/torchvision/models/quantization/mobilenetv2.py +++ b/torchvision/models/quantization/mobilenetv2.py @@ -69,14 +69,11 @@ class MobileNet_V2_QuantizedWeights(WeightsEnum): url="https://download.pytorch.org/models/quantized/mobilenet_v2_qnnpack_37f702c5.pth", transforms=partial(ImageClassification, crop_size=224), meta={ - "task": "image_classification", - "architecture": "MobileNetV2", "num_params": 3504872, "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, "backend": "qnnpack", - "quantization": "Quantization Aware Training", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#qat-mobilenetv2", "unquantized": MobileNet_V2_Weights.IMAGENET1K_V1, "acc@1": 71.658, diff --git a/torchvision/models/quantization/mobilenetv3.py b/torchvision/models/quantization/mobilenetv3.py index afee0b1d954..8759116a22a 100644 --- a/torchvision/models/quantization/mobilenetv3.py +++ b/torchvision/models/quantization/mobilenetv3.py @@ -159,14 +159,11 @@ class MobileNet_V3_Large_QuantizedWeights(WeightsEnum): url="https://download.pytorch.org/models/quantized/mobilenet_v3_large_qnnpack-5bcacf28.pth", transforms=partial(ImageClassification, crop_size=224), meta={ - "task": "image_classification", - "architecture": "MobileNetV3", "num_params": 5483032, "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, "backend": "qnnpack", - "quantization": "Quantization Aware Training", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#qat-mobilenetv3", "unquantized": MobileNet_V3_Large_Weights.IMAGENET1K_V1, "acc@1": 73.004, diff --git a/torchvision/models/quantization/resnet.py b/torchvision/models/quantization/resnet.py index 4534678af7f..6e5c126c349 100644 --- a/torchvision/models/quantization/resnet.py +++ b/torchvision/models/quantization/resnet.py @@ -147,12 +147,10 @@ def _resnet( _COMMON_META = { - "task": "image_classification", "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, "backend": "fbgemm", - "quantization": "Post Training Quantization", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models", } @@ -163,7 +161,6 @@ class ResNet18_QuantizedWeights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ **_COMMON_META, - "architecture": "ResNet", "num_params": 11689512, "unquantized": ResNet18_Weights.IMAGENET1K_V1, "acc@1": 69.494, @@ -179,7 +176,6 @@ class ResNet50_QuantizedWeights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ **_COMMON_META, - "architecture": "ResNet", "num_params": 25557032, "unquantized": ResNet50_Weights.IMAGENET1K_V1, "acc@1": 75.920, @@ -191,7 +187,6 @@ class ResNet50_QuantizedWeights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224, resize_size=232), meta={ **_COMMON_META, - "architecture": "ResNet", "num_params": 25557032, "unquantized": ResNet50_Weights.IMAGENET1K_V2, "acc@1": 80.282, @@ -207,7 +202,6 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ **_COMMON_META, - "architecture": "ResNeXt", "num_params": 88791336, "unquantized": ResNeXt101_32X8D_Weights.IMAGENET1K_V1, "acc@1": 78.986, @@ -219,7 +213,6 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224, resize_size=232), meta={ **_COMMON_META, - "architecture": "ResNeXt", "num_params": 88791336, "unquantized": ResNeXt101_32X8D_Weights.IMAGENET1K_V2, "acc@1": 82.574, diff --git a/torchvision/models/quantization/shufflenetv2.py b/torchvision/models/quantization/shufflenetv2.py index 5e679da7399..be8c1661ca9 100644 --- a/torchvision/models/quantization/shufflenetv2.py +++ b/torchvision/models/quantization/shufflenetv2.py @@ -102,13 +102,10 @@ def _shufflenetv2( _COMMON_META = { - "task": "image_classification", - "architecture": "ShuffleNetV2", "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, "backend": "fbgemm", - "quantization": "Post Training Quantization", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models", } diff --git a/torchvision/models/regnet.py b/torchvision/models/regnet.py index 17c54203a8f..e4a87a75d26 100644 --- a/torchvision/models/regnet.py +++ b/torchvision/models/regnet.py @@ -403,8 +403,6 @@ def _regnet( _COMMON_META = { - "task": "image_classification", - "architecture": "RegNet", "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py index c0c102710be..40079aaf53b 100644 --- a/torchvision/models/resnet.py +++ b/torchvision/models/resnet.py @@ -302,7 +302,6 @@ def _resnet( _COMMON_META = { - "task": "image_classification", "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, @@ -315,7 +314,6 @@ class ResNet18_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ **_COMMON_META, - "architecture": "ResNet", "num_params": 11689512, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", "acc@1": 69.758, @@ -331,7 +329,6 @@ class ResNet34_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ **_COMMON_META, - "architecture": "ResNet", "num_params": 21797672, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", "acc@1": 73.314, @@ -347,7 +344,6 @@ class ResNet50_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ **_COMMON_META, - "architecture": "ResNet", "num_params": 25557032, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", "acc@1": 76.130, @@ -359,7 +355,6 @@ class ResNet50_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224, resize_size=232), meta={ **_COMMON_META, - "architecture": "ResNet", "num_params": 25557032, "recipe": "https://github.com/pytorch/vision/issues/3995#issuecomment-1013906621", "acc@1": 80.858, @@ -375,7 +370,6 @@ class ResNet101_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ **_COMMON_META, - "architecture": "ResNet", "num_params": 44549160, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", "acc@1": 77.374, @@ -387,7 +381,6 @@ class ResNet101_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224, resize_size=232), meta={ **_COMMON_META, - "architecture": "ResNet", "num_params": 44549160, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "acc@1": 81.886, @@ -403,7 +396,6 @@ class ResNet152_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ **_COMMON_META, - "architecture": "ResNet", "num_params": 60192808, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet", "acc@1": 78.312, @@ -415,7 +407,6 @@ class ResNet152_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224, resize_size=232), meta={ **_COMMON_META, - "architecture": "ResNet", "num_params": 60192808, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "acc@1": 82.284, @@ -431,7 +422,6 @@ class ResNeXt50_32X4D_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ **_COMMON_META, - "architecture": "ResNeXt", "num_params": 25028904, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnext", "acc@1": 77.618, @@ -443,7 +433,6 @@ class ResNeXt50_32X4D_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224, resize_size=232), meta={ **_COMMON_META, - "architecture": "ResNeXt", "num_params": 25028904, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "acc@1": 81.198, @@ -459,7 +448,6 @@ class ResNeXt101_32X8D_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ **_COMMON_META, - "architecture": "ResNeXt", "num_params": 88791336, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnext", "acc@1": 79.312, @@ -471,7 +459,6 @@ class ResNeXt101_32X8D_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224, resize_size=232), meta={ **_COMMON_META, - "architecture": "ResNeXt", "num_params": 88791336, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", "acc@1": 82.834, @@ -487,7 +474,6 @@ class Wide_ResNet50_2_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ **_COMMON_META, - "architecture": "WideResNet", "num_params": 68883240, "recipe": "https://github.com/pytorch/vision/pull/912#issue-445437439", "acc@1": 78.468, @@ -499,7 +485,6 @@ class Wide_ResNet50_2_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224, resize_size=232), meta={ **_COMMON_META, - "architecture": "WideResNet", "num_params": 68883240, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres", "acc@1": 81.602, @@ -515,7 +500,6 @@ class Wide_ResNet101_2_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ **_COMMON_META, - "architecture": "WideResNet", "num_params": 126886696, "recipe": "https://github.com/pytorch/vision/pull/912#issue-445437439", "acc@1": 78.848, @@ -527,7 +511,6 @@ class Wide_ResNet101_2_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224, resize_size=232), meta={ **_COMMON_META, - "architecture": "WideResNet", "num_params": 126886696, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe", "acc@1": 82.510, diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index beaa1a8c7fe..9d8bddf8b64 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -129,8 +129,6 @@ def _deeplabv3_resnet( _COMMON_META = { - "task": "image_semantic_segmentation", - "architecture": "DeepLabV3", "categories": _VOC_CATEGORIES, } diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index 678872915eb..933e3e977ea 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -48,8 +48,6 @@ def __init__(self, in_channels: int, channels: int) -> None: _COMMON_META = { - "task": "image_semantic_segmentation", - "architecture": "FCN", "categories": _VOC_CATEGORIES, } diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py index eb66fe93af2..a508d65a39d 100644 --- a/torchvision/models/segmentation/lraspp.py +++ b/torchvision/models/segmentation/lraspp.py @@ -98,8 +98,6 @@ class LRASPP_MobileNet_V3_Large_Weights(WeightsEnum): url="https://download.pytorch.org/models/lraspp_mobilenet_v3_large-d234d4ea.pth", transforms=partial(SemanticSegmentation, resize_size=520), meta={ - "task": "image_semantic_segmentation", - "architecture": "LRASPP", "num_params": 3221538, "categories": _VOC_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#lraspp_mobilenet_v3_large", diff --git a/torchvision/models/shufflenetv2.py b/torchvision/models/shufflenetv2.py index 4bde891d6b0..151368fa55e 100644 --- a/torchvision/models/shufflenetv2.py +++ b/torchvision/models/shufflenetv2.py @@ -184,8 +184,6 @@ def _shufflenetv2( _COMMON_META = { - "task": "image_classification", - "architecture": "ShuffleNetV2", "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, diff --git a/torchvision/models/squeezenet.py b/torchvision/models/squeezenet.py index 42618f02b9d..e4182ff27b7 100644 --- a/torchvision/models/squeezenet.py +++ b/torchvision/models/squeezenet.py @@ -115,8 +115,6 @@ def _squeezenet( _COMMON_META = { - "task": "image_classification", - "architecture": "SqueezeNet", "size": (224, 224), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/pull/49#issuecomment-277560717", diff --git a/torchvision/models/vgg.py b/torchvision/models/vgg.py index e75c8ad2c6f..eb4802d03c3 100644 --- a/torchvision/models/vgg.py +++ b/torchvision/models/vgg.py @@ -107,8 +107,6 @@ def _vgg(cfg: str, batch_norm: bool, weights: Optional[WeightsEnum], progress: b _COMMON_META = { - "task": "image_classification", - "architecture": "VGG", "size": (224, 224), "min_size": (32, 32), "categories": _IMAGENET_CATEGORIES, diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index b0fcd39fecc..6738ddea760 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -309,7 +309,6 @@ def _video_resnet( _COMMON_META = { - "task": "video_classification", "size": (112, 112), "min_size": (1, 1), "categories": _KINETICS400_CATEGORIES, @@ -323,7 +322,6 @@ class R3D_18_Weights(WeightsEnum): transforms=partial(VideoClassification, crop_size=(112, 112), resize_size=(128, 171)), meta={ **_COMMON_META, - "architecture": "R3D", "num_params": 33371472, "acc@1": 52.75, "acc@5": 75.45, @@ -338,7 +336,6 @@ class MC3_18_Weights(WeightsEnum): transforms=partial(VideoClassification, crop_size=(112, 112), resize_size=(128, 171)), meta={ **_COMMON_META, - "architecture": "MC3", "num_params": 11695440, "acc@1": 53.90, "acc@5": 76.29, @@ -353,7 +350,6 @@ class R2Plus1D_18_Weights(WeightsEnum): transforms=partial(VideoClassification, crop_size=(112, 112), resize_size=(128, 171)), meta={ **_COMMON_META, - "architecture": "R(2+1)D", "num_params": 31505325, "acc@1": 57.50, "acc@5": 78.81, diff --git a/torchvision/models/vision_transformer.py b/torchvision/models/vision_transformer.py index ffc6265eafc..2a97e90ad82 100644 --- a/torchvision/models/vision_transformer.py +++ b/torchvision/models/vision_transformer.py @@ -319,8 +319,6 @@ def _vision_transformer( _COMMON_META: Dict[str, Any] = { - "task": "image_classification", - "architecture": "ViT", "categories": _IMAGENET_CATEGORIES, } From a381e544af5952dd7cd48960457ffa46c5778787 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 21 Apr 2022 13:05:08 +0100 Subject: [PATCH 02/10] Fix mypy --- torchvision/models/efficientnet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index 6bff3342f51..a1a15a195fe 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -3,7 +3,7 @@ import warnings from dataclasses import dataclass from functools import partial -from typing import Any, Callable, Optional, List, Sequence, Tuple, Union +from typing import Any, Callable, Dict, Optional, List, Sequence, Tuple, Union import torch from torch import nn, Tensor @@ -429,7 +429,7 @@ def _efficientnet_conf( return inverted_residual_setting, last_channel -_COMMON_META = { +_COMMON_META: Dict[str, Any] = { "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet", } From 31d69bb6db24b35be3811a8ad356cd81e8ca9424 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 21 Apr 2022 13:28:55 +0100 Subject: [PATCH 03/10] Remove size field --- torchvision/models/alexnet.py | 1 - torchvision/models/convnext.py | 1 - torchvision/models/densenet.py | 1 - torchvision/models/detection/ssd.py | 1 - torchvision/models/detection/ssdlite.py | 1 - torchvision/models/efficientnet.py | 12 ---------- torchvision/models/googlenet.py | 1 - torchvision/models/inception.py | 1 - torchvision/models/mnasnet.py | 1 - torchvision/models/mobilenetv2.py | 1 - torchvision/models/mobilenetv3.py | 1 - torchvision/models/quantization/googlenet.py | 1 - torchvision/models/quantization/inception.py | 1 - .../models/quantization/mobilenetv2.py | 1 - .../models/quantization/mobilenetv3.py | 1 - torchvision/models/quantization/resnet.py | 1 - .../models/quantization/shufflenetv2.py | 1 - torchvision/models/regnet.py | 2 -- torchvision/models/resnet.py | 1 - torchvision/models/shufflenetv2.py | 1 - torchvision/models/squeezenet.py | 1 - torchvision/models/vgg.py | 1 - torchvision/models/video/resnet.py | 1 - torchvision/models/vision_transformer.py | 23 +++---------------- 24 files changed, 3 insertions(+), 55 deletions(-) diff --git a/torchvision/models/alexnet.py b/torchvision/models/alexnet.py index f1107405fa2..327530d6b2b 100644 --- a/torchvision/models/alexnet.py +++ b/torchvision/models/alexnet.py @@ -58,7 +58,6 @@ class AlexNet_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ "num_params": 61100840, - "size": (224, 224), "min_size": (63, 63), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#alexnet-and-vgg", diff --git a/torchvision/models/convnext.py b/torchvision/models/convnext.py index 29e5c6a2d24..4b64157634e 100644 --- a/torchvision/models/convnext.py +++ b/torchvision/models/convnext.py @@ -204,7 +204,6 @@ def _convnext( _COMMON_META = { - "size": (224, 224), "min_size": (32, 32), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#convnext", diff --git a/torchvision/models/densenet.py b/torchvision/models/densenet.py index 42e7a80d341..38e726d1825 100644 --- a/torchvision/models/densenet.py +++ b/torchvision/models/densenet.py @@ -266,7 +266,6 @@ def _densenet( _COMMON_META = { - "size": (224, 224), "min_size": (29, 29), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/pull/116", diff --git a/torchvision/models/detection/ssd.py b/torchvision/models/detection/ssd.py index 0da6e6444b9..598818f20db 100644 --- a/torchvision/models/detection/ssd.py +++ b/torchvision/models/detection/ssd.py @@ -31,7 +31,6 @@ class SSD300_VGG16_Weights(WeightsEnum): transforms=ObjectDetection, meta={ "num_params": 35641826, - "size": (300, 300), "categories": _COCO_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssd300-vgg16", "map": 25.1, diff --git a/torchvision/models/detection/ssdlite.py b/torchvision/models/detection/ssdlite.py index 1e6b3f8dd1f..c3974532b89 100644 --- a/torchvision/models/detection/ssdlite.py +++ b/torchvision/models/detection/ssdlite.py @@ -190,7 +190,6 @@ class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum): transforms=ObjectDetection, meta={ "num_params": 3440060, - "size": (320, 320), "categories": _COCO_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssdlite320-mobilenetv3-large", "map": 21.3, diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index a1a15a195fe..ef7e0c323a4 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -456,7 +456,6 @@ class EfficientNet_B0_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 5288548, - "size": (224, 224), "acc@1": 77.692, "acc@5": 93.532, }, @@ -473,7 +472,6 @@ class EfficientNet_B1_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 7794184, - "size": (240, 240), "acc@1": 78.642, "acc@5": 94.186, }, @@ -487,7 +485,6 @@ class EfficientNet_B1_Weights(WeightsEnum): **_COMMON_META_V1, "num_params": 7794184, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-lr-wd-crop-tuning", - "size": (240, 240), "acc@1": 79.838, "acc@5": 94.934, }, @@ -504,7 +501,6 @@ class EfficientNet_B2_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 9109994, - "size": (288, 288), "acc@1": 80.608, "acc@5": 95.310, }, @@ -521,7 +517,6 @@ class EfficientNet_B3_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 12233232, - "size": (300, 300), "acc@1": 82.008, "acc@5": 96.054, }, @@ -538,7 +533,6 @@ class EfficientNet_B4_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 19341616, - "size": (380, 380), "acc@1": 83.384, "acc@5": 96.594, }, @@ -555,7 +549,6 @@ class EfficientNet_B5_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 30389784, - "size": (456, 456), "acc@1": 83.444, "acc@5": 96.628, }, @@ -572,7 +565,6 @@ class EfficientNet_B6_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 43040704, - "size": (528, 528), "acc@1": 84.008, "acc@5": 96.916, }, @@ -589,7 +581,6 @@ class EfficientNet_B7_Weights(WeightsEnum): meta={ **_COMMON_META_V1, "num_params": 66347960, - "size": (600, 600), "acc@1": 84.122, "acc@5": 96.908, }, @@ -609,7 +600,6 @@ class EfficientNet_V2_S_Weights(WeightsEnum): meta={ **_COMMON_META_V2, "num_params": 21458488, - "size": (384, 384), "acc@1": 84.228, "acc@5": 96.878, }, @@ -629,7 +619,6 @@ class EfficientNet_V2_M_Weights(WeightsEnum): meta={ **_COMMON_META_V2, "num_params": 54139356, - "size": (480, 480), "acc@1": 85.112, "acc@5": 97.156, }, @@ -651,7 +640,6 @@ class EfficientNet_V2_L_Weights(WeightsEnum): meta={ **_COMMON_META_V2, "num_params": 118515272, - "size": (480, 480), "acc@1": 85.808, "acc@5": 97.788, }, diff --git a/torchvision/models/googlenet.py b/torchvision/models/googlenet.py index bc330153194..e5f420198e0 100644 --- a/torchvision/models/googlenet.py +++ b/torchvision/models/googlenet.py @@ -281,7 +281,6 @@ class GoogLeNet_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ "num_params": 6624904, - "size": (224, 224), "min_size": (15, 15), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#googlenet", diff --git a/torchvision/models/inception.py b/torchvision/models/inception.py index a97a9955b2d..f7e006233af 100644 --- a/torchvision/models/inception.py +++ b/torchvision/models/inception.py @@ -413,7 +413,6 @@ class Inception_V3_Weights(WeightsEnum): transforms=partial(ImageClassification, crop_size=299, resize_size=342), meta={ "num_params": 27161264, - "size": (299, 299), "min_size": (75, 75), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#inception-v3", diff --git a/torchvision/models/mnasnet.py b/torchvision/models/mnasnet.py index d13735baea0..27967b50608 100644 --- a/torchvision/models/mnasnet.py +++ b/torchvision/models/mnasnet.py @@ -212,7 +212,6 @@ def _load_from_state_dict( _COMMON_META = { - "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/1e100/mnasnet_trainer", diff --git a/torchvision/models/mobilenetv2.py b/torchvision/models/mobilenetv2.py index 9e28f6511ce..af8b51049fd 100644 --- a/torchvision/models/mobilenetv2.py +++ b/torchvision/models/mobilenetv2.py @@ -196,7 +196,6 @@ def forward(self, x: Tensor) -> Tensor: _COMMON_META = { "num_params": 3504872, - "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, } diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py index eae48c4682e..128ff580641 100644 --- a/torchvision/models/mobilenetv3.py +++ b/torchvision/models/mobilenetv3.py @@ -304,7 +304,6 @@ def _mobilenet_v3( _COMMON_META = { - "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, } diff --git a/torchvision/models/quantization/googlenet.py b/torchvision/models/quantization/googlenet.py index 0e13eb8ee8a..196bedf1b29 100644 --- a/torchvision/models/quantization/googlenet.py +++ b/torchvision/models/quantization/googlenet.py @@ -112,7 +112,6 @@ class GoogLeNet_QuantizedWeights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ "num_params": 6624904, - "size": (224, 224), "min_size": (15, 15), "categories": _IMAGENET_CATEGORIES, "backend": "fbgemm", diff --git a/torchvision/models/quantization/inception.py b/torchvision/models/quantization/inception.py index 06a3eb1d869..d0d19b36a7e 100644 --- a/torchvision/models/quantization/inception.py +++ b/torchvision/models/quantization/inception.py @@ -178,7 +178,6 @@ class Inception_V3_QuantizedWeights(WeightsEnum): transforms=partial(ImageClassification, crop_size=299, resize_size=342), meta={ "num_params": 27161264, - "size": (299, 299), "min_size": (75, 75), "categories": _IMAGENET_CATEGORIES, "backend": "fbgemm", diff --git a/torchvision/models/quantization/mobilenetv2.py b/torchvision/models/quantization/mobilenetv2.py index 39cc9e3689d..d550b92d30a 100644 --- a/torchvision/models/quantization/mobilenetv2.py +++ b/torchvision/models/quantization/mobilenetv2.py @@ -70,7 +70,6 @@ class MobileNet_V2_QuantizedWeights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ "num_params": 3504872, - "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, "backend": "qnnpack", diff --git a/torchvision/models/quantization/mobilenetv3.py b/torchvision/models/quantization/mobilenetv3.py index 8759116a22a..be061ca6364 100644 --- a/torchvision/models/quantization/mobilenetv3.py +++ b/torchvision/models/quantization/mobilenetv3.py @@ -160,7 +160,6 @@ class MobileNet_V3_Large_QuantizedWeights(WeightsEnum): transforms=partial(ImageClassification, crop_size=224), meta={ "num_params": 5483032, - "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, "backend": "qnnpack", diff --git a/torchvision/models/quantization/resnet.py b/torchvision/models/quantization/resnet.py index 6e5c126c349..b01f92ae547 100644 --- a/torchvision/models/quantization/resnet.py +++ b/torchvision/models/quantization/resnet.py @@ -147,7 +147,6 @@ def _resnet( _COMMON_META = { - "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, "backend": "fbgemm", diff --git a/torchvision/models/quantization/shufflenetv2.py b/torchvision/models/quantization/shufflenetv2.py index be8c1661ca9..f795fcad5b8 100644 --- a/torchvision/models/quantization/shufflenetv2.py +++ b/torchvision/models/quantization/shufflenetv2.py @@ -102,7 +102,6 @@ def _shufflenetv2( _COMMON_META = { - "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, "backend": "fbgemm", diff --git a/torchvision/models/regnet.py b/torchvision/models/regnet.py index e4a87a75d26..40daf9bde91 100644 --- a/torchvision/models/regnet.py +++ b/torchvision/models/regnet.py @@ -403,14 +403,12 @@ def _regnet( _COMMON_META = { - "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, } _COMMON_SWAG_META = { **_COMMON_META, - "size": (384, 384), "recipe": "https://github.com/facebookresearch/SWAG", "license": "https://github.com/facebookresearch/SWAG/blob/main/LICENSE", } diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py index 40079aaf53b..25990e0d4d4 100644 --- a/torchvision/models/resnet.py +++ b/torchvision/models/resnet.py @@ -302,7 +302,6 @@ def _resnet( _COMMON_META = { - "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, } diff --git a/torchvision/models/shufflenetv2.py b/torchvision/models/shufflenetv2.py index 151368fa55e..c4f6f2466b0 100644 --- a/torchvision/models/shufflenetv2.py +++ b/torchvision/models/shufflenetv2.py @@ -184,7 +184,6 @@ def _shufflenetv2( _COMMON_META = { - "size": (224, 224), "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/barrh/Shufflenet-v2-Pytorch/tree/v0.1.0", diff --git a/torchvision/models/squeezenet.py b/torchvision/models/squeezenet.py index e4182ff27b7..43fe8a516e3 100644 --- a/torchvision/models/squeezenet.py +++ b/torchvision/models/squeezenet.py @@ -115,7 +115,6 @@ def _squeezenet( _COMMON_META = { - "size": (224, 224), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/pull/49#issuecomment-277560717", } diff --git a/torchvision/models/vgg.py b/torchvision/models/vgg.py index eb4802d03c3..89e08b331f6 100644 --- a/torchvision/models/vgg.py +++ b/torchvision/models/vgg.py @@ -107,7 +107,6 @@ def _vgg(cfg: str, batch_norm: bool, weights: Optional[WeightsEnum], progress: b _COMMON_META = { - "size": (224, 224), "min_size": (32, 32), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#alexnet-and-vgg", diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index 6738ddea760..dab837571bd 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -309,7 +309,6 @@ def _video_resnet( _COMMON_META = { - "size": (112, 112), "min_size": (1, 1), "categories": _KINETICS400_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/video_classification", diff --git a/torchvision/models/vision_transformer.py b/torchvision/models/vision_transformer.py index 2a97e90ad82..a6c15c9b9d7 100644 --- a/torchvision/models/vision_transformer.py +++ b/torchvision/models/vision_transformer.py @@ -288,18 +288,11 @@ def _vision_transformer( ) -> VisionTransformer: if weights is not None: _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"])) - if isinstance(weights.meta["size"], int): - _ovewrite_named_param(kwargs, "image_size", weights.meta["size"]) - elif isinstance(weights.meta["size"], Sequence): - if len(weights.meta["size"]) != 2 or weights.meta["size"][0] != weights.meta["size"][1]: - raise ValueError( - f'size: {weights.meta["size"]} is not valid! Currently we only support a 2-dimensional square and width = height' - ) - _ovewrite_named_param(kwargs, "image_size", weights.meta["size"][0]) - else: + if weights.meta["min_size"][0] != weights.meta["min_size"][1]: raise ValueError( - f'weights.meta["size"]: {weights.meta["size"]} is not valid, the type should be either an int or a Sequence[int]' + f'min_size: {weights.meta["min_size"]} is not valid! Currently we only support a 2-dimensional square and width = height' ) + _ovewrite_named_param(kwargs, "image_size", weights.meta["min_size"][0]) image_size = kwargs.pop("image_size", 224) model = VisionTransformer( @@ -336,7 +329,6 @@ class ViT_B_16_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 86567656, - "size": (224, 224), "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_16", "acc@1": 81.072, @@ -354,7 +346,6 @@ class ViT_B_16_Weights(WeightsEnum): meta={ **_COMMON_SWAG_META, "num_params": 86859496, - "size": (384, 384), "min_size": (384, 384), "acc@1": 85.304, "acc@5": 97.650, @@ -372,7 +363,6 @@ class ViT_B_16_Weights(WeightsEnum): **_COMMON_SWAG_META, "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 86567656, - "size": (224, 224), "min_size": (224, 224), "acc@1": 81.886, "acc@5": 96.180, @@ -388,7 +378,6 @@ class ViT_B_32_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 88224232, - "size": (224, 224), "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_32", "acc@1": 75.912, @@ -405,7 +394,6 @@ class ViT_L_16_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 304326632, - "size": (224, 224), "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_16", "acc@1": 79.662, @@ -423,7 +411,6 @@ class ViT_L_16_Weights(WeightsEnum): meta={ **_COMMON_SWAG_META, "num_params": 305174504, - "size": (512, 512), "min_size": (512, 512), "acc@1": 88.064, "acc@5": 98.512, @@ -441,7 +428,6 @@ class ViT_L_16_Weights(WeightsEnum): **_COMMON_SWAG_META, "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 304326632, - "size": (224, 224), "min_size": (224, 224), "acc@1": 85.146, "acc@5": 97.422, @@ -457,7 +443,6 @@ class ViT_L_32_Weights(WeightsEnum): meta={ **_COMMON_META, "num_params": 306535400, - "size": (224, 224), "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_32", "acc@1": 76.972, @@ -479,7 +464,6 @@ class ViT_H_14_Weights(WeightsEnum): meta={ **_COMMON_SWAG_META, "num_params": 633470440, - "size": (518, 518), "min_size": (518, 518), "acc@1": 88.552, "acc@5": 98.694, @@ -497,7 +481,6 @@ class ViT_H_14_Weights(WeightsEnum): **_COMMON_SWAG_META, "recipe": "https://github.com/pytorch/vision/pull/5793", "num_params": 632045800, - "size": (224, 224), "min_size": (224, 224), "acc@1": 85.708, "acc@5": 97.730, From 7c051622b0f9d8035eea5a6e4118fe8b029557ae Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 21 Apr 2022 13:32:09 +0100 Subject: [PATCH 04/10] Remove unused import. --- torchvision/models/vision_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/models/vision_transformer.py b/torchvision/models/vision_transformer.py index a6c15c9b9d7..e94531464fd 100644 --- a/torchvision/models/vision_transformer.py +++ b/torchvision/models/vision_transformer.py @@ -1,7 +1,7 @@ import math from collections import OrderedDict from functools import partial -from typing import Any, Callable, List, NamedTuple, Optional, Sequence, Dict +from typing import Any, Callable, List, NamedTuple, Optional, Dict import torch import torch.nn as nn From 52bc5fcd0ca76c67d49639084300c3d32dcccb8b Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 21 Apr 2022 13:41:13 +0100 Subject: [PATCH 05/10] Fix mypy --- torchvision/models/regnet.py | 4 ++-- torchvision/models/vision_transformer.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/models/regnet.py b/torchvision/models/regnet.py index 40daf9bde91..f878bdd5754 100644 --- a/torchvision/models/regnet.py +++ b/torchvision/models/regnet.py @@ -1,7 +1,7 @@ import math from collections import OrderedDict from functools import partial -from typing import Any, Callable, List, Optional, Tuple +from typing import Any, Callable, Dict, List, Optional, Tuple import torch from torch import nn, Tensor @@ -402,7 +402,7 @@ def _regnet( return model -_COMMON_META = { +_COMMON_META: Dict[str, Any] = { "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, } diff --git a/torchvision/models/vision_transformer.py b/torchvision/models/vision_transformer.py index e94531464fd..40ad07a93d0 100644 --- a/torchvision/models/vision_transformer.py +++ b/torchvision/models/vision_transformer.py @@ -315,7 +315,7 @@ def _vision_transformer( "categories": _IMAGENET_CATEGORIES, } -_COMMON_SWAG_META: Dict[str, Any] = { +_COMMON_SWAG_META = { **_COMMON_META, "recipe": "https://github.com/facebookresearch/SWAG", "license": "https://github.com/facebookresearch/SWAG/blob/main/LICENSE", From d14f5ad376e38c416b0e45176f0e3fa7189f23bb Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 21 Apr 2022 13:52:06 +0100 Subject: [PATCH 06/10] Remove size from schema list. --- test/test_extended_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_extended_models.py b/test/test_extended_models.py index f30e04fe433..6358d0b979b 100644 --- a/test/test_extended_models.py +++ b/test/test_extended_models.py @@ -79,7 +79,7 @@ def test_naming_conventions(model_fn): ) @run_if_test_with_extended def test_schema_meta_validation(model_fn): - classification_fields = ["size", "categories", "acc@1", "acc@5", "min_size"] + classification_fields = ["categories", "acc@1", "acc@5", "min_size"] defaults = { "all": ["recipe", "num_params"], "models": classification_fields, From 0c96e141520437fc6dcd11fa4e5a8b1a2f65147b Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 21 Apr 2022 13:53:19 +0100 Subject: [PATCH 07/10] update todo --- test/test_extended_models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_extended_models.py b/test/test_extended_models.py index 6358d0b979b..017a4a33868 100644 --- a/test/test_extended_models.py +++ b/test/test_extended_models.py @@ -79,6 +79,7 @@ def test_naming_conventions(model_fn): ) @run_if_test_with_extended def test_schema_meta_validation(model_fn): + # TODO: add list of permitted fields classification_fields = ["categories", "acc@1", "acc@5", "min_size"] defaults = { "all": ["recipe", "num_params"], From b0aaaf197351a0024ac90cd3445fda1273a295c6 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 21 Apr 2022 14:34:34 +0100 Subject: [PATCH 08/10] Simplify with assert --- torchvision/models/vision_transformer.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/torchvision/models/vision_transformer.py b/torchvision/models/vision_transformer.py index 40ad07a93d0..6d881080d04 100644 --- a/torchvision/models/vision_transformer.py +++ b/torchvision/models/vision_transformer.py @@ -288,10 +288,7 @@ def _vision_transformer( ) -> VisionTransformer: if weights is not None: _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"])) - if weights.meta["min_size"][0] != weights.meta["min_size"][1]: - raise ValueError( - f'min_size: {weights.meta["min_size"]} is not valid! Currently we only support a 2-dimensional square and width = height' - ) + assert weights.meta["min_size"][0] == weights.meta["min_size"][1] _ovewrite_named_param(kwargs, "image_size", weights.meta["min_size"][0]) image_size = kwargs.pop("image_size", 224) From 22059c5fb24c3bdaa1781603fc9eeb891248da73 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 21 Apr 2022 15:11:38 +0100 Subject: [PATCH 09/10] Adding min_size to all models. --- test/test_extended_models.py | 4 ++-- torchvision/models/detection/faster_rcnn.py | 1 + torchvision/models/detection/fcos.py | 1 + torchvision/models/detection/keypoint_rcnn.py | 1 + torchvision/models/detection/mask_rcnn.py | 1 + torchvision/models/detection/retinanet.py | 1 + torchvision/models/detection/ssd.py | 1 + torchvision/models/detection/ssdlite.py | 1 + torchvision/models/optical_flow/raft.py | 13 +++++++++++++ torchvision/models/segmentation/deeplabv3.py | 1 + torchvision/models/segmentation/fcn.py | 1 + torchvision/models/segmentation/lraspp.py | 1 + 12 files changed, 25 insertions(+), 2 deletions(-) diff --git a/test/test_extended_models.py b/test/test_extended_models.py index 017a4a33868..be45a53b17f 100644 --- a/test/test_extended_models.py +++ b/test/test_extended_models.py @@ -80,9 +80,9 @@ def test_naming_conventions(model_fn): @run_if_test_with_extended def test_schema_meta_validation(model_fn): # TODO: add list of permitted fields - classification_fields = ["categories", "acc@1", "acc@5", "min_size"] + classification_fields = ["categories", "acc@1", "acc@5"] defaults = { - "all": ["recipe", "num_params"], + "all": ["recipe", "num_params", "min_size"], "models": classification_fields, "detection": ["categories", "map"], "quantization": classification_fields + ["backend", "unquantized"], diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index a0ae56965ab..08274e7bc75 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -371,6 +371,7 @@ def forward(self, x): _COMMON_META = { "categories": _COCO_CATEGORIES, + "min_size": (1, 1), } diff --git a/torchvision/models/detection/fcos.py b/torchvision/models/detection/fcos.py index 5cd8eab2c88..438566d3ecd 100644 --- a/torchvision/models/detection/fcos.py +++ b/torchvision/models/detection/fcos.py @@ -653,6 +653,7 @@ class FCOS_ResNet50_FPN_Weights(WeightsEnum): meta={ "num_params": 32269600, "categories": _COCO_CATEGORIES, + "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#fcos-resnet-50-fpn", "map": 39.2, }, diff --git a/torchvision/models/detection/keypoint_rcnn.py b/torchvision/models/detection/keypoint_rcnn.py index 44d8133c496..b864c2854ef 100644 --- a/torchvision/models/detection/keypoint_rcnn.py +++ b/torchvision/models/detection/keypoint_rcnn.py @@ -310,6 +310,7 @@ def forward(self, x): _COMMON_META = { "categories": _COCO_PERSON_CATEGORIES, "keypoint_names": _COCO_PERSON_KEYPOINT_NAMES, + "min_size": (1, 1), } diff --git a/torchvision/models/detection/mask_rcnn.py b/torchvision/models/detection/mask_rcnn.py index e9b3a69be0e..44254913a22 100644 --- a/torchvision/models/detection/mask_rcnn.py +++ b/torchvision/models/detection/mask_rcnn.py @@ -352,6 +352,7 @@ def __init__(self, in_channels, dim_reduced, num_classes): _COMMON_META = { "categories": _COCO_CATEGORIES, + "min_size": (1, 1), } diff --git a/torchvision/models/detection/retinanet.py b/torchvision/models/detection/retinanet.py index 5c74bb429ff..ed40702eed8 100644 --- a/torchvision/models/detection/retinanet.py +++ b/torchvision/models/detection/retinanet.py @@ -675,6 +675,7 @@ def forward(self, images, targets=None): _COMMON_META = { "categories": _COCO_CATEGORIES, + "min_size": (1, 1), } diff --git a/torchvision/models/detection/ssd.py b/torchvision/models/detection/ssd.py index 598818f20db..7d9800f15ea 100644 --- a/torchvision/models/detection/ssd.py +++ b/torchvision/models/detection/ssd.py @@ -32,6 +32,7 @@ class SSD300_VGG16_Weights(WeightsEnum): meta={ "num_params": 35641826, "categories": _COCO_CATEGORIES, + "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssd300-vgg16", "map": 25.1, }, diff --git a/torchvision/models/detection/ssdlite.py b/torchvision/models/detection/ssdlite.py index c3974532b89..73afc1eaca6 100644 --- a/torchvision/models/detection/ssdlite.py +++ b/torchvision/models/detection/ssdlite.py @@ -191,6 +191,7 @@ class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum): meta={ "num_params": 3440060, "categories": _COCO_CATEGORIES, + "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssdlite320-mobilenetv3-large", "map": 21.3, }, diff --git a/torchvision/models/optical_flow/raft.py b/torchvision/models/optical_flow/raft.py index 1bce5118429..1d623cce3ec 100644 --- a/torchvision/models/optical_flow/raft.py +++ b/torchvision/models/optical_flow/raft.py @@ -511,12 +511,18 @@ def forward(self, image1, image2, num_flow_updates: int = 12): return flow_predictions +_COMMON_META = { + "min_size": (64, 64), +} + + class Raft_Large_Weights(WeightsEnum): C_T_V1 = Weights( # Chairs + Things, ported from original paper repo (raft-things.pth) url="https://download.pytorch.org/models/raft_large_C_T_V1-22a6c225.pth", transforms=OpticalFlow, meta={ + **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", "sintel_train_cleanpass_epe": 1.4411, @@ -531,6 +537,7 @@ class Raft_Large_Weights(WeightsEnum): url="https://download.pytorch.org/models/raft_large_C_T_V2-1bb1363a.pth", transforms=OpticalFlow, meta={ + **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", "sintel_train_cleanpass_epe": 1.3822, @@ -545,6 +552,7 @@ class Raft_Large_Weights(WeightsEnum): url="https://download.pytorch.org/models/raft_large_C_T_SKHT_V1-0b8c9e55.pth", transforms=OpticalFlow, meta={ + **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", "sintel_test_cleanpass_epe": 1.94, @@ -559,6 +567,7 @@ class Raft_Large_Weights(WeightsEnum): url="https://download.pytorch.org/models/raft_large_C_T_SKHT_V2-ff5fadd5.pth", transforms=OpticalFlow, meta={ + **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", "sintel_test_cleanpass_epe": 1.819, @@ -571,6 +580,7 @@ class Raft_Large_Weights(WeightsEnum): url="https://download.pytorch.org/models/raft_large_C_T_SKHT_K_V1-4a6a5039.pth", transforms=OpticalFlow, meta={ + **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/princeton-vl/RAFT", "kitti_test_f1-all": 5.10, @@ -585,6 +595,7 @@ class Raft_Large_Weights(WeightsEnum): url="https://download.pytorch.org/models/raft_large_C_T_SKHT_K_V2-b5c70766.pth", transforms=OpticalFlow, meta={ + **_COMMON_META, "num_params": 5257536, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", "kitti_test_f1-all": 5.19, @@ -600,6 +611,7 @@ class Raft_Small_Weights(WeightsEnum): url="https://download.pytorch.org/models/raft_small_C_T_V1-ad48884c.pth", transforms=OpticalFlow, meta={ + **_COMMON_META, "num_params": 990162, "recipe": "https://github.com/princeton-vl/RAFT", "sintel_train_cleanpass_epe": 2.1231, @@ -613,6 +625,7 @@ class Raft_Small_Weights(WeightsEnum): url="https://download.pytorch.org/models/raft_small_C_T_V2-01064c6d.pth", transforms=OpticalFlow, meta={ + **_COMMON_META, "num_params": 990162, "recipe": "https://github.com/pytorch/vision/tree/main/references/optical_flow", "sintel_train_cleanpass_epe": 1.9901, diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index 9d8bddf8b64..f4fbab128bb 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -130,6 +130,7 @@ def _deeplabv3_resnet( _COMMON_META = { "categories": _VOC_CATEGORIES, + "min_size": (1, 1), } diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index 933e3e977ea..09a62c97089 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -49,6 +49,7 @@ def __init__(self, in_channels: int, channels: int) -> None: _COMMON_META = { "categories": _VOC_CATEGORIES, + "min_size": (1, 1), } diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py index a508d65a39d..48107f13466 100644 --- a/torchvision/models/segmentation/lraspp.py +++ b/torchvision/models/segmentation/lraspp.py @@ -100,6 +100,7 @@ class LRASPP_MobileNet_V3_Large_Weights(WeightsEnum): meta={ "num_params": 3221538, "categories": _VOC_CATEGORIES, + "min_size": (1, 1), "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#lraspp_mobilenet_v3_large", "mIoU": 57.9, "acc": 91.2, From 1da50ae38c6ff792299ddd5e91986498f5ea6e59 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 21 Apr 2022 15:14:23 +0100 Subject: [PATCH 10/10] Update RAFT min size to 128 --- torchvision/models/optical_flow/raft.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/models/optical_flow/raft.py b/torchvision/models/optical_flow/raft.py index 1d623cce3ec..346641ae83d 100644 --- a/torchvision/models/optical_flow/raft.py +++ b/torchvision/models/optical_flow/raft.py @@ -512,7 +512,7 @@ def forward(self, image1, image2, num_flow_updates: int = 12): _COMMON_META = { - "min_size": (64, 64), + "min_size": (128, 128), }