From a5231db180bf5372a7a59f655770ff87b7ec043e Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 13 May 2022 13:44:11 +0100 Subject: [PATCH 1/9] Adding code examples for image classification + quant --- docs/source/models_new.rst | 60 +++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/docs/source/models_new.rst b/docs/source/models_new.rst index 43374582f2a..28f39055216 100644 --- a/docs/source/models_new.rst +++ b/docs/source/models_new.rst @@ -13,7 +13,7 @@ Models and pre-trained weights - New The ``torchvision.models`` subpackage contains definitions of models for addressing different tasks, including: image classification, pixelwise semantic segmentation, object detection, instance segmentation, person -keypoint detection, video classification, and optical flow. +keypoint detection, video classification and optical flow. .. note :: Backward compatibility is guaranteed for loading a serialized @@ -56,6 +56,35 @@ weights: models/vision_transformer models/wide_resnet +| + +Here is an example of how to use the pre-trained image classification models: + +.. code:: python + + from torchvision.io import read_image + from torchvision.models import resnet50, ResNet50_Weights + + img = read_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg") + + # Step 1: Initialize model + weights = ResNet50_Weights.DEFAULT + model = resnet50(weights=weights) + model.eval() + + # Step 2: Initialize the inference transforms + preprocess = weights.transforms() + + # Step 3: Apply inference preprocessing transforms + batch = preprocess(img).unsqueeze(0) + prediction = model(batch).squeeze(0).softmax(0) + + # Step 4: Use the model and print the predicted category + class_id = prediction.argmax().item() + score = prediction[class_id].item() + category_name = weights.meta["categories"][class_id] + print(f"{category_name}: {100 * score}%") + Table of all available classification weights --------------------------------------------- @@ -78,6 +107,35 @@ pre-trained weights: models/googlenet_quant models/mobilenetv2_quant +| + +Here is an example of how to use the pre-trained quantized image classification models: + +.. code:: python + + from torchvision.io import read_image + from torchvision.models.quantization import resnet50, ResNet50_QuantizedWeights + + img = read_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg") + + # Step 1: Initialize model + weights = ResNet50_QuantizedWeights.DEFAULT + model = resnet50(weights=weights, quantize=True) + model.eval() + + # Step 2: Initialize the inference transforms + preprocess = weights.transforms() + + # Step 3: Apply inference preprocessing transforms + batch = preprocess(img).unsqueeze(0) + prediction = model(batch).squeeze(0).softmax(0) + + # Step 4: Use the model and print the predicted category + class_id = prediction.argmax().item() + score = prediction[class_id].item() + category_name = weights.meta["categories"][class_id] + print(f"{category_name}: {100 * score}%") + Table of all available quantized classification weights ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 73797d277ead7669a0b9b751e096b53b3ae6df4e Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 13 May 2022 13:56:18 +0100 Subject: [PATCH 2/9] Adding code example detection --- docs/source/models_new.rst | 40 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/docs/source/models_new.rst b/docs/source/models_new.rst index 28f39055216..2a7cadf863b 100644 --- a/docs/source/models_new.rst +++ b/docs/source/models_new.rst @@ -67,7 +67,7 @@ Here is an example of how to use the pre-trained image classification models: img = read_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg") - # Step 1: Initialize model + # Step 1: Initialize model with the best available weights weights = ResNet50_Weights.DEFAULT model = resnet50(weights=weights) model.eval() @@ -118,7 +118,7 @@ Here is an example of how to use the pre-trained quantized image classification img = read_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg") - # Step 1: Initialize model + # Step 1: Initialize model with the best available weights weights = ResNet50_QuantizedWeights.DEFAULT model = resnet50(weights=weights, quantize=True) model.eval() @@ -188,6 +188,42 @@ weights: models/ssd models/ssdlite +| + +Here is an example of how to use the pre-trained object detection models: + +.. code:: python + + + from torchvision.io.image import read_image + from torchvision.utils import draw_bounding_boxes + from torchvision.transforms.functional import to_pil_image + from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights + + + img = read_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg") + + # Step 1: Initialize model with the best available weights + weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT + model = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.9) + model.eval() + + # Step 2: Initialize the inference transforms + preprocess = weights.transforms() + + # Step 3: Apply inference preprocessing transforms + batch = [preprocess(img)] + prediction = model(batch)[0] + + # Step 4: Use the model and visualize the prediction + labels = [weights.meta["categories"][i] for i in prediction["labels"]] + box = draw_bounding_boxes(img, boxes=prediction["boxes"], + labels=labels, + colors="red", + width=4, font_size=30) + im = to_pil_image(box.detach()) + im.show() + Table of all available Object detection weights ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 6cc9be47da3bddfc593b19e79a7ad2dd5b810e96 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 13 May 2022 14:05:39 +0100 Subject: [PATCH 3/9] Adding code example segmentation --- docs/source/models_new.rst | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/docs/source/models_new.rst b/docs/source/models_new.rst index 2a7cadf863b..fd000ae19c8 100644 --- a/docs/source/models_new.rst +++ b/docs/source/models_new.rst @@ -159,6 +159,37 @@ pre-trained weights: models/fcn models/lraspp +| + +Here is an example of how to use the pre-trained semantic segmentation models: + +.. code:: python + + from torchvision.io.image import read_image + from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights + from torchvision.transforms.functional import to_pil_image + + img = read_image("gallery/assets/dog1.jpg") + + # Step 1: Initialize model with the best available weights + weights = FCN_ResNet50_Weights.DEFAULT + model = fcn_resnet50(weights=weights) + model.eval() + + # Step 2: Initialize the inference transforms + preprocess = weights.transforms() + + # Step 3: Apply inference preprocessing transforms + batch = preprocess(img).unsqueeze(0) + prediction = model(batch)['out'] + normalized_masks = prediction.softmax(dim=1) + + # Step 4: Use the model and visualize the prediction + class_to_idx = {cls: idx for (idx, cls) in enumerate(weights.meta["categories"])} + mask = normalized_masks[0, class_to_idx["dog"]] + to_pil_image(mask).show() + + Table of all available semantic segmentation weights ---------------------------------------------------- @@ -196,10 +227,9 @@ Here is an example of how to use the pre-trained object detection models: from torchvision.io.image import read_image + from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights from torchvision.utils import draw_bounding_boxes from torchvision.transforms.functional import to_pil_image - from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights - img = read_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg") From 21c2c679f788a29b19192274c41d0bb47af87a72 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 13 May 2022 14:11:15 +0100 Subject: [PATCH 4/9] Adding code example for video classification --- docs/source/models_new.rst | 42 +++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/docs/source/models_new.rst b/docs/source/models_new.rst index fd000ae19c8..14536cc730e 100644 --- a/docs/source/models_new.rst +++ b/docs/source/models_new.rst @@ -77,9 +77,9 @@ Here is an example of how to use the pre-trained image classification models: # Step 3: Apply inference preprocessing transforms batch = preprocess(img).unsqueeze(0) - prediction = model(batch).squeeze(0).softmax(0) # Step 4: Use the model and print the predicted category + prediction = model(batch).squeeze(0).softmax(0) class_id = prediction.argmax().item() score = prediction[class_id].item() category_name = weights.meta["categories"][class_id] @@ -128,9 +128,9 @@ Here is an example of how to use the pre-trained quantized image classification # Step 3: Apply inference preprocessing transforms batch = preprocess(img).unsqueeze(0) - prediction = model(batch).squeeze(0).softmax(0) # Step 4: Use the model and print the predicted category + prediction = model(batch).squeeze(0).softmax(0) class_id = prediction.argmax().item() score = prediction[class_id].item() category_name = weights.meta["categories"][class_id] @@ -181,10 +181,10 @@ Here is an example of how to use the pre-trained semantic segmentation models: # Step 3: Apply inference preprocessing transforms batch = preprocess(img).unsqueeze(0) - prediction = model(batch)['out'] - normalized_masks = prediction.softmax(dim=1) # Step 4: Use the model and visualize the prediction + prediction = model(batch)['out'] + normalized_masks = prediction.softmax(dim=1) class_to_idx = {cls: idx for (idx, cls) in enumerate(weights.meta["categories"])} mask = normalized_masks[0, class_to_idx["dog"]] to_pil_image(mask).show() @@ -243,9 +243,9 @@ Here is an example of how to use the pre-trained object detection models: # Step 3: Apply inference preprocessing transforms batch = [preprocess(img)] - prediction = model(batch)[0] # Step 4: Use the model and visualize the prediction + prediction = model(batch)[0] labels = [weights.meta["categories"][i] for i in prediction["labels"]] box = draw_bounding_boxes(img, boxes=prediction["boxes"], labels=labels, @@ -315,6 +315,38 @@ pre-trained weights: models/video_resnet +| + +Here is an example of how to use the pre-trained video classification models: + +.. code:: python + + + from torchvision.io.video import read_video + from torchvision.models.video import r3d_18, R3D_18_Weights + + vid, _, _ = read_video("test/assets/videos/v_SoccerJuggling_g23_c01.avi") + vid = vid[:32] # optionally shorten duration + + # Step 1: Initialize model with the best available weights + weights = R3D_18_Weights.DEFAULT + model = r3d_18(weights=weights) + model.eval() + + # Step 2: Initialize the inference transforms + preprocess = weights.transforms() + + # Step 3: Apply inference preprocessing transforms + batch = preprocess(vid).unsqueeze(0) + + # Step 4: Use the model and print the predicted category + prediction = model(batch).squeeze(0).softmax(0) + label = prediction.argmax().item() + score = prediction[label].item() + category_name = weights.meta["categories"][label] + print(f"{category_name}: {100 * score}%") + + Table of all available video classification weights --------------------------------------------------- From 3736ca8740bf362fce0804d2028d9fd19474686f Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 13 May 2022 14:45:51 +0100 Subject: [PATCH 5/9] Adding information on how to use the new API. --- docs/source/models_new.rst | 44 +++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/docs/source/models_new.rst b/docs/source/models_new.rst index 14536cc730e..8c480104c64 100644 --- a/docs/source/models_new.rst +++ b/docs/source/models_new.rst @@ -24,6 +24,48 @@ keypoint detection, video classification and optical flow. `documentation `_ +TorchVision offers a new `Multi-weight support API +`_ for loading different weights to the +existing model builder methods: + +.. code:: python + + from torchvision.models import resnet50, ResNet50_Weights + + # Old weights with accuracy 76.130% + resnet50(weights=ResNet50_Weights.IMAGENET1K_V1) + + # New weights with accuracy 80.858% + resnet50(weights=ResNet50_Weights.IMAGENET1K_V2) + + # Best available weights (currently alias for IMAGENET1K_V2) + resnet50(weights=ResNet50_Weights.DEFAULT) + + # Strings are also supported + resnet50(weights="IMAGENET1K_V2") + + # No weights - random initialization + resnet50(weights=None) + + +Migrating to the new API is very straightforward. The following method calls between the 2 APIs are all equivalent: + +.. code:: python + + from torchvision.models import resnet50, ResNet50_Weights + + # Using pretrained weights: + resnet50(weights=ResNet50_Weights.IMAGENET1K_V1) + resnet50(pretrained=True) + resnet50(True) + + # Using no weights: + resnet50(weights=None) + resnet50(pretrained=False) + resnet50(False) + +Note that the ``pretrained`` parameter is now deprecated, using it will emit warnings and will be removed on v0.15. + Classification ============== @@ -183,7 +225,7 @@ Here is an example of how to use the pre-trained semantic segmentation models: batch = preprocess(img).unsqueeze(0) # Step 4: Use the model and visualize the prediction - prediction = model(batch)['out'] + prediction = model(batch)["out"] normalized_masks = prediction.softmax(dim=1) class_to_idx = {cls: idx for (idx, cls) in enumerate(weights.meta["categories"])} mask = normalized_masks[0, class_to_idx["dog"]] From 8c32ee0497209a4fccd41f79ea9a225c11185894 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 13 May 2022 16:13:17 +0100 Subject: [PATCH 6/9] Putting back the comma. --- docs/source/models_new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/models_new.rst b/docs/source/models_new.rst index 8c480104c64..9a6975f0914 100644 --- a/docs/source/models_new.rst +++ b/docs/source/models_new.rst @@ -13,7 +13,7 @@ Models and pre-trained weights - New The ``torchvision.models`` subpackage contains definitions of models for addressing different tasks, including: image classification, pixelwise semantic segmentation, object detection, instance segmentation, person -keypoint detection, video classification and optical flow. +keypoint detection, video classification, and optical flow. .. note :: Backward compatibility is guaranteed for loading a serialized From 8f2a23cfb629ff73b9d7d3ed55b0505d05efc023 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 13 May 2022 16:20:13 +0100 Subject: [PATCH 7/9] Apply suggestions from code review Co-authored-by: Nicolas Hug --- docs/source/models_new.rst | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/source/models_new.rst b/docs/source/models_new.rst index 9a6975f0914..cdc632d4892 100644 --- a/docs/source/models_new.rst +++ b/docs/source/models_new.rst @@ -24,7 +24,7 @@ keypoint detection, video classification, and optical flow. `documentation `_ -TorchVision offers a new `Multi-weight support API +As of 0.13, TorchVision offers a new `Multi-weight support API `_ for loading different weights to the existing model builder methods: @@ -39,13 +39,14 @@ existing model builder methods: resnet50(weights=ResNet50_Weights.IMAGENET1K_V2) # Best available weights (currently alias for IMAGENET1K_V2) + # Note that these weights may change across versions resnet50(weights=ResNet50_Weights.DEFAULT) # Strings are also supported resnet50(weights="IMAGENET1K_V2") # No weights - random initialization - resnet50(weights=None) + resnet50(weights=None) # or resnet50() Migrating to the new API is very straightforward. The following method calls between the 2 APIs are all equivalent: @@ -56,13 +57,13 @@ Migrating to the new API is very straightforward. The following method calls bet # Using pretrained weights: resnet50(weights=ResNet50_Weights.IMAGENET1K_V1) - resnet50(pretrained=True) - resnet50(True) + resnet50(pretrained=True) # deprecated + resnet50(True) # deprecated # Using no weights: resnet50(weights=None) - resnet50(pretrained=False) - resnet50(False) + resnet50(pretrained=False) # deprecated + resnet50(False) # deprecated Note that the ``pretrained`` parameter is now deprecated, using it will emit warnings and will be removed on v0.15. @@ -125,8 +126,8 @@ Here is an example of how to use the pre-trained image classification models: class_id = prediction.argmax().item() score = prediction[class_id].item() category_name = weights.meta["categories"][class_id] - print(f"{category_name}: {100 * score}%") - + print(f"{category_name}: {100 * score:.1f}%") + # prints: bow tie: 14.4% Table of all available classification weights --------------------------------------------- From a99cb397d31ec63cec56c8f0579925f17938301b Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 13 May 2022 16:26:27 +0100 Subject: [PATCH 8/9] Remove output to avoid staleness from flakiness. --- docs/source/models_new.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/models_new.rst b/docs/source/models_new.rst index cdc632d4892..525f41a7d64 100644 --- a/docs/source/models_new.rst +++ b/docs/source/models_new.rst @@ -127,7 +127,6 @@ Here is an example of how to use the pre-trained image classification models: score = prediction[class_id].item() category_name = weights.meta["categories"][class_id] print(f"{category_name}: {100 * score:.1f}%") - # prints: bow tie: 14.4% Table of all available classification weights --------------------------------------------- From 7aa2fc7986ceec6d069bda06baaced07a64f8e5c Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 13 May 2022 16:28:35 +0100 Subject: [PATCH 9/9] Minor fixes. --- docs/source/models_new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/models_new.rst b/docs/source/models_new.rst index 525f41a7d64..1d349afdbdd 100644 --- a/docs/source/models_new.rst +++ b/docs/source/models_new.rst @@ -24,7 +24,7 @@ keypoint detection, video classification, and optical flow. `documentation `_ -As of 0.13, TorchVision offers a new `Multi-weight support API +As of v0.13, TorchVision offers a new `Multi-weight support API `_ for loading different weights to the existing model builder methods: