From 9e85e4645829f2f73b69adae2b5afecd0668da8d Mon Sep 17 00:00:00 2001 From: Francisco Massa Date: Thu, 1 Aug 2019 14:05:06 +0200 Subject: [PATCH 1/2] Expose docs for io and ops package Had do modify the docstrings to use Napoleon NumPy style, because Napoleon Google Style doesn't support multiple return arguments --- docs/source/index.rst | 2 ++ docs/source/io.rst | 13 +++++++++ docs/source/ops.rst | 17 +++++++++++ torchvision/io/video.py | 61 +++++++++++++++++++++++++--------------- torchvision/ops/boxes.py | 60 +++++++++++++++++++++++---------------- 5 files changed, 106 insertions(+), 47 deletions(-) create mode 100644 docs/source/io.rst create mode 100644 docs/source/ops.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index f8f89f92629..9de82b6e7fc 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -9,7 +9,9 @@ architectures, and common image transformations for computer vision. :caption: Package Reference datasets + io models + ops transforms utils diff --git a/docs/source/io.rst b/docs/source/io.rst new file mode 100644 index 00000000000..d59d5c96048 --- /dev/null +++ b/docs/source/io.rst @@ -0,0 +1,13 @@ +torchvision.io +============== + +.. currentmodule:: torchvision.io + +The :mod:`torchvision.io` package provides functions for performing IO +operations. They are currently specific to reading and writing video. + +.. autofunction:: read_video + +.. autofunction:: read_video_timestamps + +.. autofunction:: write_video diff --git a/docs/source/ops.rst b/docs/source/ops.rst new file mode 100644 index 00000000000..ec87d02556e --- /dev/null +++ b/docs/source/ops.rst @@ -0,0 +1,17 @@ +torchvision.ops +=============== + +.. currentmodule:: torchvision.ops + +:mod:`torchvision.ops` implements operators that are specific for Computer Vision. + +.. note:: + Those operators currently do not support TorchScript. + + +.. autofunction:: nms +.. autofunction:: roi_align +.. autofunction:: roi_pool + +.. autoclass:: RoIAlign +.. autoclass:: RoIPool diff --git a/torchvision/io/video.py b/torchvision/io/video.py index 83afe726e43..5f883f8e5b9 100644 --- a/torchvision/io/video.py +++ b/torchvision/io/video.py @@ -28,11 +28,14 @@ def write_video(filename, video_array, fps, video_codec='libx264', options=None) """ Writes a 4d tensor in [T, H, W, C] format in a video file - Arguments: - filename (str): path where the video will be saved - video_array (Tensor[T, H, W, C]): tensor containing the individual frames, - as a uint8 tensor in [T, H, W, C] format - fps (Number): frames per second + Parameters + ---------- + filename : str + path where the video will be saved + video_array : Tensor[T, H, W, C] + tensor containing the individual frames, as a uint8 tensor in [T, H, W, C] format + fps : Number + frames per second """ _check_av_available() video_array = torch.as_tensor(video_array, dtype=torch.uint8).numpy() @@ -135,18 +138,25 @@ def read_video(filename, start_pts=0, end_pts=None): Reads a video from a file, returning both the video frames as well as the audio frames - Arguments: - filename (str): path to the video file - start_pts (int, optional): the start presentation time of the video - end_pts (int, optional): the end presentation time - - Returns: - vframes (Tensor[T, H, W, C]): the `T` video frames - aframes (Tensor[K, L]): the audio frames, where `K` is the number of channels - and `L` is the number of points - info (Dict): metadata for the video and audio. Can contain the fields - - video_fps (float) - - audio_fps (int) + Parameters + ---------- + filename : str + path to the video file + start_pts : int, optional + the start presentation time of the video + end_pts : int, optional + the end presentation time + + Returns + ------- + vframes : Tensor[T, H, W, C] + the `T` video frames + aframes : Tensor[K, L] + the audio frames, where `K` is the number of channels and `L` is the + number of points + info : Dict + metadata for the video and audio. Can contain the fields video_fps (float) + and audio_fps (int) """ _check_av_available() @@ -201,13 +211,18 @@ def read_video_timestamps(filename): Note that the function decodes the whole video frame-by-frame. - Arguments: - filename (str): path to the video file + Parameters + ---------- + filename : str + path to the video file + + Returns + ------- + pts : List[int] + presentation timestamps for each one of the frames in the video. + video_fps : int + the frame rate for the video - Returns: - pts (List[int]): presentation timestamps for each one of the frames - in the video. - video_fps (int): the frame rate for the video """ _check_av_available() container = av.open(filename, metadata_errors='ignore') diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py index 982fb2f7d13..af92a0cbf0e 100644 --- a/torchvision/ops/boxes.py +++ b/torchvision/ops/boxes.py @@ -11,17 +11,23 @@ def nms(boxes, scores, iou_threshold): IoU greater than iou_threshold with another (higher scoring) box. - Arguments: - boxes (Tensor[N, 4]): boxes to perform NMS on. They - are expected to be in (x1, y1, x2, y2) format - scores (Tensor[N]): scores for each one of the boxes - iou_threshold (float): discards all overlapping - boxes with IoU < iou_threshold - - Returns: - keep (Tensor): int64 tensor with the indices - of the elements that have been kept - by NMS, sorted in decreasing order of scores + Parameters + ---------- + boxes : Tensor[N, 4]) + boxes to perform NMS on. They + are expected to be in (x1, y1, x2, y2) format + scores : Tensor[N] + scores for each one of the boxes + iou_threshold : float + discards all overlapping + boxes with IoU < iou_threshold + + Returns + ------- + keep : Tensor + int64 tensor with the indices + of the elements that have been kept + by NMS, sorted in decreasing order of scores """ _C = _lazy_import() return _C.nms(boxes, scores, iou_threshold) @@ -34,19 +40,25 @@ def batched_nms(boxes, scores, idxs, iou_threshold): Each index value correspond to a category, and NMS will not be applied between elements of different categories. - Arguments: - boxes (Tensor[N, 4]): boxes where NMS will be performed. They - are expected to be in (x1, y1, x2, y2) format - scores (Tensor[N]): scores for each one of the boxes - idxs (Tensor[N]): indices of the categories for each - one of the boxes. - iou_threshold (float): discards all overlapping boxes - with IoU < iou_threshold - - Returns: - keep (Tensor): int64 tensor with the indices of - the elements that have been kept by NMS, sorted - in decreasing order of scores + Parameters + ---------- + boxes : Tensor[N, 4] + boxes where NMS will be performed. They + are expected to be in (x1, y1, x2, y2) format + scores : Tensor[N] + scores for each one of the boxes + idxs : Tensor[N] + indices of the categories for each one of the boxes. + iou_threshold : float + discards all overlapping boxes + with IoU < iou_threshold + + Returns + ------- + keep : Tensor + int64 tensor with the indices of + the elements that have been kept by NMS, sorted + in decreasing order of scores """ if boxes.numel() == 0: return torch.empty((0,), dtype=torch.int64, device=boxes.device) From 613d61036904e83311b51705d91882176a9bd735 Mon Sep 17 00:00:00 2001 From: Francisco Massa Date: Thu, 1 Aug 2019 14:07:42 +0200 Subject: [PATCH 2/2] Add video section --- docs/source/io.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/io.rst b/docs/source/io.rst index d59d5c96048..e7aeedc0716 100644 --- a/docs/source/io.rst +++ b/docs/source/io.rst @@ -6,6 +6,9 @@ torchvision.io The :mod:`torchvision.io` package provides functions for performing IO operations. They are currently specific to reading and writing video. +Video +----- + .. autofunction:: read_video .. autofunction:: read_video_timestamps