From 0b3dd469eb0362a997e77d98d0e9c453f4839c24 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Sun, 23 Jan 2022 09:31:51 +0000 Subject: [PATCH] Revert vit_h_14 as it breaks our CI --- docs/source/models.rst | 2 -- hubconf.py | 1 - .../ModelTester.test_vit_h_14_expect.pkl | Bin 939 -> 0 bytes torchvision/models/vision_transformer.py | 21 ---------------- .../prototype/models/vision_transformer.py | 23 ------------------ 5 files changed, 47 deletions(-) delete mode 100644 test/expect/ModelTester.test_vit_h_14_expect.pkl diff --git a/docs/source/models.rst b/docs/source/models.rst index 4daee5d5534..82eb3170e78 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -89,7 +89,6 @@ You can construct a model with random weights by calling its constructor: vit_b_32 = models.vit_b_32() vit_l_16 = models.vit_l_16() vit_l_32 = models.vit_l_32() - vit_h_14 = models.vit_h_14() We provide pre-trained models, using the PyTorch :mod:`torch.utils.model_zoo`. These can be constructed by passing ``pretrained=True``: @@ -464,7 +463,6 @@ VisionTransformer vit_b_32 vit_l_16 vit_l_32 - vit_h_14 Quantized Models ---------------- diff --git a/hubconf.py b/hubconf.py index 1b3b191efa4..2b2eeb1c166 100644 --- a/hubconf.py +++ b/hubconf.py @@ -63,5 +63,4 @@ vit_b_32, vit_l_16, vit_l_32, - vit_h_14, ) diff --git a/test/expect/ModelTester.test_vit_h_14_expect.pkl b/test/expect/ModelTester.test_vit_h_14_expect.pkl deleted file mode 100644 index 1f846beb6a0bccf8b545f5a67b74482015cc878b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 939 zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW zr zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5(jATumGUYDqB%_@&wQ~AdEY-_!+F>p;eYzR1Ay-Hz#ul>i!MRpZGie3qz3t@Vp zVG!WW#-;;RB*&}^R}M ) -def vit_h_14(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VisionTransformer: - """ - Constructs a vit_h_14 architecture from - `"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale" `_. - - NOTE: Pretrained weights are not available for this model. - """ - return _vision_transformer( - arch="vit_h_14", - patch_size=14, - num_layers=32, - num_heads=16, - hidden_dim=1280, - mlp_dim=5120, - pretrained=pretrained, - progress=progress, - **kwargs, - ) - - def interpolate_embeddings( image_size: int, patch_size: int, diff --git a/torchvision/prototype/models/vision_transformer.py b/torchvision/prototype/models/vision_transformer.py index 72330fd1191..3f256842429 100644 --- a/torchvision/prototype/models/vision_transformer.py +++ b/torchvision/prototype/models/vision_transformer.py @@ -19,12 +19,10 @@ "ViT_B_32_Weights", "ViT_L_16_Weights", "ViT_L_32_Weights", - "ViT_H_14_Weights", "vit_b_16", "vit_b_32", "vit_l_16", "vit_l_32", - "vit_h_14", ] @@ -105,11 +103,6 @@ class ViT_L_32_Weights(WeightsEnum): default = ImageNet1K_V1 -class ViT_H_14_Weights(WeightsEnum): - # Weights are not available yet. - pass - - def _vision_transformer( patch_size: int, num_layers: int, @@ -203,19 +196,3 @@ def vit_l_32(*, weights: Optional[ViT_L_32_Weights] = None, progress: bool = Tru progress=progress, **kwargs, ) - - -@handle_legacy_interface(weights=("pretrained", None)) -def vit_h_14(*, weights: Optional[ViT_H_14_Weights] = None, progress: bool = True, **kwargs: Any) -> VisionTransformer: - weights = ViT_H_14_Weights.verify(weights) - - return _vision_transformer( - patch_size=14, - num_layers=32, - num_heads=16, - hidden_dim=1280, - mlp_dim=5120, - weights=weights, - progress=progress, - **kwargs, - )