From 71447a1a3cfbcf5446e70250f55429f11eeb6a6a Mon Sep 17 00:00:00 2001 From: Alexander Soare Date: Fri, 1 Oct 2021 11:26:39 +0100 Subject: [PATCH] Tweak docs for better understanding of what spatial_scale does --- torchvision/ops/ps_roi_align.py | 6 ++++-- torchvision/ops/ps_roi_pool.py | 6 ++++-- torchvision/ops/roi_align.py | 6 ++++-- torchvision/ops/roi_pool.py | 6 ++++-- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/torchvision/ops/ps_roi_align.py b/torchvision/ops/ps_roi_align.py index d42353e2b0d..be64bdddd87 100644 --- a/torchvision/ops/ps_roi_align.py +++ b/torchvision/ops/ps_roi_align.py @@ -30,8 +30,10 @@ def ps_roi_align( in the batch. output_size (int or Tuple[int, int]): the size of the output (in bins or pixels) after the pooling is performed, as (height, width). - spatial_scale (float): a scaling factor that maps the input coordinates to - the box coordinates. Default: 1.0 + spatial_scale (float): a scaling factor that maps the box coordinates to + the input coordinates. For example, if your boxes are defined on the scale + of a 224x224 image and your input is a 112x112 feature map (resulting from a 0.5x scaling of + the original image), you'll want to set this to 0.5. Default: 1.0 sampling_ratio (int): number of sampling points in the interpolation grid used to compute the output value of each pooled output bin. If > 0, then exactly ``sampling_ratio x sampling_ratio`` sampling points per bin are used. If diff --git a/torchvision/ops/ps_roi_pool.py b/torchvision/ops/ps_roi_pool.py index d0331e557fd..0084c0e0c74 100644 --- a/torchvision/ops/ps_roi_pool.py +++ b/torchvision/ops/ps_roi_pool.py @@ -29,8 +29,10 @@ def ps_roi_pool( in the batch. output_size (int or Tuple[int, int]): the size of the output (in bins or pixels) after the pooling is performed, as (height, width). - spatial_scale (float): a scaling factor that maps the input coordinates to - the box coordinates. Default: 1.0 + spatial_scale (float): a scaling factor that maps the box coordinates to + the input coordinates. For example, if your boxes are defined on the scale + of a 224x224 image and your input is a 112x112 feature map (resulting from a 0.5x scaling of + the original image), you'll want to set this to 0.5. Default: 1.0 Returns: Tensor[K, C / (output_size[0] * output_size[1]), output_size[0], output_size[1]]: The pooled RoIs. diff --git a/torchvision/ops/roi_align.py b/torchvision/ops/roi_align.py index b589089aa42..959499fdb24 100644 --- a/torchvision/ops/roi_align.py +++ b/torchvision/ops/roi_align.py @@ -32,8 +32,10 @@ def roi_align( in the batch. output_size (int or Tuple[int, int]): the size of the output (in bins or pixels) after the pooling is performed, as (height, width). - spatial_scale (float): a scaling factor that maps the input coordinates to - the box coordinates. Default: 1.0 + spatial_scale (float): a scaling factor that maps the box coordinates to + the input coordinates. For example, if your boxes are defined on the scale + of a 224x224 image and your input is a 112x112 feature map (resulting from a 0.5x scaling of + the original image), you'll want to set this to 0.5. Default: 1.0 sampling_ratio (int): number of sampling points in the interpolation grid used to compute the output value of each pooled output bin. If > 0, then exactly ``sampling_ratio x sampling_ratio`` sampling points per bin are used. If diff --git a/torchvision/ops/roi_pool.py b/torchvision/ops/roi_pool.py index 90f2dd3d173..37590b78164 100644 --- a/torchvision/ops/roi_pool.py +++ b/torchvision/ops/roi_pool.py @@ -29,8 +29,10 @@ def roi_pool( in the batch. output_size (int or Tuple[int, int]): the size of the output after the cropping is performed, as (height, width) - spatial_scale (float): a scaling factor that maps the input coordinates to - the box coordinates. Default: 1.0 + spatial_scale (float): a scaling factor that maps the box coordinates to + the input coordinates. For example, if your boxes are defined on the scale + of a 224x224 image and your input is a 112x112 feature map (resulting from a 0.5x scaling of + the original image), you'll want to set this to 0.5. Default: 1.0 Returns: Tensor[K, C, output_size[0], output_size[1]]: The pooled RoIs.