diff --git a/README.md b/README.md index 8643e0e3a..c42c16828 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,10 @@ # TorchCodec -TorchCodec is a Python library for decoding videos into PyTorch tensors. It aims -to be fast, easy to use, and well integrated into the PyTorch ecosystem. If you -want to use PyTorch to train ML models on videos, TorchCodec is how you turn -those videos into data. +TorchCodec is a Python library for decoding videos into PyTorch tensors, on CPU +and CUDA GPU. It aims to be fast, easy to use, and well integrated into the +PyTorch ecosystem. If you want to use PyTorch to train ML models on videos, +TorchCodec is how you turn those videos into data. We achieve these capabilities through: @@ -19,21 +19,24 @@ We achieve these capabilities through: or used directly to train models. > [!NOTE] -> ⚠️ TorchCodec is still in early development stage and some APIs may be updated -> in future versions without a deprecation cycle, depending on user feedback. +> ⚠️ TorchCodec is still in development stage and some APIs may be updated +> in future versions, depending on user feedback. > If you have any suggestions or issues, please let us know by > [opening an issue](https://github.com/pytorch/torchcodec/issues/new/choose)! ## Using TorchCodec -Here's a condensed summary of what you can do with TorchCodec. For a more -detailed example, [check out our +Here's a condensed summary of what you can do with TorchCodec. For more detailed +examples, [check out our documentation](https://pytorch.org/torchcodec/stable/generated_examples/)! +#### Decoding + ```python from torchcodec.decoders import VideoDecoder -decoder = VideoDecoder("path/to/video.mp4") +device = "cpu" # or e.g. "cuda" ! +decoder = VideoDecoder("path/to/video.mp4", device=device) decoder.metadata # VideoStreamMetadata: @@ -44,39 +47,47 @@ decoder.metadata # average_fps: 25.0 # ... (truncated output) -len(decoder) # == decoder.metadata.num_frames! -# 250 -decoder.metadata.average_fps # Note: instantaneous fps can be higher or lower -# 25.0 - # Simple Indexing API decoder[0] # uint8 tensor of shape [C, H, W] decoder[0 : -1 : 20] # uint8 stacked tensor of shape [N, C, H, W] +# Indexing, with PTS and duration info: +decoder.get_frames_at(indices=[2, 100]) +# FrameBatch: +# data (shape): torch.Size([2, 3, 270, 480]) +# pts_seconds: tensor([0.0667, 3.3367], dtype=torch.float64) +# duration_seconds: tensor([0.0334, 0.0334], dtype=torch.float64) -# Iterate over frames: -for frame in decoder: - pass +# Time-based indexing with PTS and duration info +decoder.get_frames_played_at(seconds=[0.5, 10.4]) +# FrameBatch: +# data (shape): torch.Size([2, 3, 270, 480]) +# pts_seconds: tensor([ 0.4671, 10.3770], dtype=torch.float64) +# duration_seconds: tensor([0.0334, 0.0334], dtype=torch.float64) +``` -# Indexing, with PTS and duration info -decoder.get_frame_at(len(decoder) - 1) -# Frame: -# data (shape): torch.Size([3, 400, 640]) -# pts_seconds: 9.960000038146973 -# duration_seconds: 0.03999999910593033 +#### Clip sampling -decoder.get_frames_in_range(start=10, stop=30, step=5) -# FrameBatch: -# data (shape): torch.Size([4, 3, 400, 640]) -# pts_seconds: tensor([0.4000, 0.6000, 0.8000, 1.0000]) -# duration_seconds: tensor([0.0400, 0.0400, 0.0400, 0.0400]) +```python -# Time-based indexing with PTS and duration info -decoder.get_frame_played_at(pts_seconds=2) -# Frame: -# data (shape): torch.Size([3, 400, 640]) -# pts_seconds: 2.0 -# duration_seconds: 0.03999999910593033 +from torchcodec.samplers import clips_at_regular_timestamps + +clips_at_regular_timestamps( + decoder, + seconds_between_clip_starts=1.5, + num_frames_per_clip=4, + seconds_between_frames=0.1 +) +# FrameBatch: +# data (shape): torch.Size([9, 4, 3, 270, 480]) +# pts_seconds: tensor([[ 0.0000, 0.0667, 0.1668, 0.2669], +# [ 1.4681, 1.5682, 1.6683, 1.7684], +# [ 2.9696, 3.0697, 3.1698, 3.2699], +# ... (truncated), dtype=torch.float64) +# duration_seconds: tensor([[0.0334, 0.0334, 0.0334, 0.0334], +# [0.0334, 0.0334, 0.0334, 0.0334], +# [0.0334, 0.0334, 0.0334, 0.0334], +# ... (truncated), dtype=torch.float64) ``` You can use the following snippet to generate a video with FFmpeg and tryout @@ -142,7 +153,7 @@ format you want. Refer to Nvidia's GPU support matrix for more details [official instructions](https://pytorch.org/get-started/locally/). 3. Install or compile FFmpeg with NVDEC support. - TorchCodec with CUDA should work with FFmpeg versions in [4, 7]. + TorchCodec with CUDA should work with FFmpeg versions in [5, 7]. If FFmpeg is not already installed, or you need a more recent version, an easy way to install it is to use `conda`: @@ -172,16 +183,17 @@ format you want. Refer to Nvidia's GPU support matrix for more details ffmpeg -hwaccel cuda -hwaccel_output_format cuda -i test/resources/nasa_13013.mp4 -f null - ``` -4. Install TorchCodec by passing in an `--index-url` parameter that corresponds to your CUDA - Toolkit version, example: +4. Install TorchCodec by passing in an `--index-url` parameter that corresponds + to your CUDA Toolkit version, example: ```bash - # This corresponds to CUDA Toolkit version 12.4 and nightly Pytorch. - pip install torchcodec --index-url=https://download.pytorch.org/whl/nightly/cu124 + # This corresponds to CUDA Toolkit version 12.4. It should be the same one + # you used when you installed PyTorch (If you installed PyTorch with pip). + pip install torchcodec --index-url=https://download.pytorch.org/whl/cu124 ``` - Note that without passing in the `--index-url` parameter, `pip` installs TorchCodec - binaries from PyPi which are CPU-only and do not have CUDA support. + Note that without passing in the `--index-url` parameter, `pip` installs + the CPU-only version of TorchCodec. ## Benchmark Results diff --git a/docs/source/index.rst b/docs/source/index.rst index c882dc488..bbc8bfa78 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,10 +1,10 @@ Welcome to the TorchCodec documentation! ======================================== -TorchCodec is a Python library for decoding videos into PyTorch tensors. It aims -to be fast, easy to use, and well integrated into the PyTorch ecosystem. If you -want to use PyTorch to train ML models on videos, TorchCodec is how you turn -those videos into data. +TorchCodec is a Python library for decoding videos into PyTorch tensors, on CPU +and CUDA GPU. It aims to be fast, easy to use, and well integrated into the +PyTorch ecosystem. If you want to use PyTorch to train ML models on videos, +TorchCodec is how you turn those videos into data. We achieve these capabilities through: @@ -17,13 +17,6 @@ We achieve these capabilities through: * Returning data as PyTorch tensors, ready to be fed into PyTorch transforms or used directly to train models. -.. note:: - - TorchCodec is still in early development stage and we are actively seeking - feedback. If you have any suggestions or issues, please let us know by - `opening an issue `_ - on our `GitHub repository `_. - .. grid:: 3 .. grid-item-card:: :octicon:`file-code;1em` @@ -48,16 +41,23 @@ We achieve these capabilities through: :link: generated_examples/sampling.html :link-type: url - How to sample video clips + How to sample regular and random clips from a video .. grid-item-card:: :octicon:`file-code;1em` - GPU decoding using TorchCodec + GPU decoding :img-top: _static/img/card-background.svg :link: generated_examples/basic_cuda_example.html :link-type: url A simple example demonstrating CUDA GPU decoding +.. note:: + + TorchCodec is still in development stage and we are actively seeking + feedback. If you have any suggestions or issues, please let us know by + `opening an issue `_ + on our `GitHub repository `_. + .. toctree:: :maxdepth: 1 :caption: TorchCodec documentation diff --git a/examples/basic_example.py b/examples/basic_example.py index ba85b32f9..01c26d445 100644 --- a/examples/basic_example.py +++ b/examples/basic_example.py @@ -120,22 +120,22 @@ def plot(frames: torch.Tensor, title : Optional[str] = None): # their :term:`pts` (Presentation Time Stamp), and their duration. # This can be achieved using the # :meth:`~torchcodec.decoders.VideoDecoder.get_frame_at` and -# :meth:`~torchcodec.decoders.VideoDecoder.get_frames_in_range` methods, which -# will return a :class:`~torchcodec.Frame` and -# :class:`~torchcodec.FrameBatch` objects respectively. +# :meth:`~torchcodec.decoders.VideoDecoder.get_frames_at` methods, which will +# return a :class:`~torchcodec.Frame` and :class:`~torchcodec.FrameBatch` +# objects respectively. last_frame = decoder.get_frame_at(len(decoder) - 1) print(f"{type(last_frame) = }") print(last_frame) # %% -middle_frames = decoder.get_frames_in_range(start=10, stop=20, step=2) -print(f"{type(middle_frames) = }") -print(middle_frames) +other_frames = decoder.get_frames_at([10, 0, 50]) +print(f"{type(other_frames) = }") +print(other_frames) # %% plot(last_frame.data, "Last frame") -plot(middle_frames.data, "Middle frames") +plot(other_frames.data, "Other frames") # %% # Both :class:`~torchcodec.Frame` and @@ -152,7 +152,7 @@ def plot(frames: torch.Tensor, title : Optional[str] = None): # So far, we have retrieved frames based on their index. We can also retrieve # frames based on *when* they are played with # :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at` and -# :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_in_range`, which +# :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_at`, which # also returns :class:`~torchcodec.Frame` and :class:`~torchcodec.FrameBatch` # respectively. @@ -161,13 +161,10 @@ def plot(frames: torch.Tensor, title : Optional[str] = None): print(frame_at_2_seconds) # %% -first_two_seconds = decoder.get_frames_played_in_range( - start_seconds=0, - stop_seconds=2, -) -print(f"{type(first_two_seconds) = }") -print(first_two_seconds) +other_frames = decoder.get_frames_played_at(seconds=[10.1, 0.3, 5]) +print(f"{type(other_frames) = }") +print(other_frames) # %% plot(frame_at_2_seconds.data, "Frame played at 2 seconds") -plot(first_two_seconds.data, "Frames played during [0, 2) seconds") +plot(other_frames.data, "Other frames")