Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
ace0bd4
Create AddAudioStream
NicolasHug Feb 10, 2025
1b362ec
Add basic decoding
NicolasHug Feb 10, 2025
224e18c
Fix getNumChannels, sort of
NicolasHug Feb 11, 2025
274730d
Merge branch 'main' of github.com:pytorch/torchcodec into audio_support
NicolasHug Feb 11, 2025
4f7c1c4
tmp
NicolasHug Feb 12, 2025
a4ae1b0
Bunch of debug stuff
NicolasHug Feb 13, 2025
36d0dc0
Rename currentPts into lastDecodedAvFramePts
NicolasHug Feb 13, 2025
181b2c9
Merge branch 'renameCurrentPts' into audio_support
NicolasHug Feb 13, 2025
1f5ed83
Merge branch 'main' of github.com:pytorch/torchcodec into audio_support
NicolasHug Feb 13, 2025
bd946c4
lint
NicolasHug Feb 13, 2025
5464e62
Improve skip logic
NicolasHug Feb 13, 2025
0c9b1f6
Add offset logic
NicolasHug Feb 13, 2025
8b34014
Update test framework
NicolasHug Feb 14, 2025
cb77e37
Comment out debugging printf
NicolasHug Feb 14, 2025
c0583e6
bunch of tests
NicolasHug Feb 14, 2025
221c22c
Merge branch 'main' of github.com:pytorch/torchcodec into audio_support
NicolasHug Feb 14, 2025
3e740dd
Maybe fix version stuff
NicolasHug Feb 14, 2025
73fa225
Skip test that's bound to fail
NicolasHug Feb 14, 2025
6082803
Add support for get_frames_in_range
NicolasHug Feb 14, 2025
cf8ccb3
More tests, more methods
NicolasHug Feb 15, 2025
f7615e2
test. cleanup
NicolasHug Feb 15, 2025
e87837f
typo
NicolasHug Feb 15, 2025
0b17d99
Remove debug stuff
NicolasHug Feb 15, 2025
915631d
Let core ops return 3D tensors
NicolasHug Feb 15, 2025
c6fcf16
Small refac
NicolasHug Feb 16, 2025
1751c6b
Some cleanup
NicolasHug Feb 16, 2025
0f92d60
Handle approximate mode. Sort of.
NicolasHug Feb 16, 2025
a2b10ca
cleanup
NicolasHug Feb 16, 2025
6c7e31f
tons of comments
NicolasHug Feb 17, 2025
04bf185
Fix some tests
NicolasHug Feb 18, 2025
e44b444
Fix C++ tests
NicolasHug Feb 18, 2025
a89287d
some comments
NicolasHug Feb 18, 2025
90d7409
Put back loose compilation flag for now
NicolasHug Feb 19, 2025
79ade05
Merge branch 'main' of github.com:pytorch/torchcodec into audio_support
NicolasHug Feb 28, 2025
b572276
reduce diff
NicolasHug Mar 3, 2025
0443c3b
Merge branch 'main' of github.com:pytorch/torchcodec into audio_support
NicolasHug Mar 4, 2025
c987f9c
Revert ops tests to those of main
NicolasHug Mar 4, 2025
2ef91b7
Only allow get_frames_played_in_range
NicolasHug Mar 4, 2025
8a4a444
Add correct support for getFramesPlayedInRange
NicolasHug Mar 4, 2025
2d39297
Start creating public class
NicolasHug Mar 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/torchcodec/decoders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@
from ._core import VideoStreamMetadata
from ._video_decoder import VideoDecoder # noqa

# from ._audio_decoder import AudioDecoder # Will be public when more stable

SimpleVideoDecoder = VideoDecoder
140 changes: 140 additions & 0 deletions src/torchcodec/decoders/_audio_decoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from pathlib import Path
from typing import Literal, Optional, Tuple, Union

from torch import Tensor

from torchcodec.decoders import _core as core

_ERROR_REPORTING_INSTRUCTIONS = """
This should never happen. Please report an issue following the steps in
https://github.com/pytorch/torchcodec/issues/new?assignees=&labels=&projects=&template=bug-report.yml.
"""


class AudioDecoder:
"""A single-stream audio decoder.

TODO docs
"""

def __init__(
self,
source: Union[str, Path, bytes, Tensor],
*,
sample_rate: Optional[int] = None,
stream_index: Optional[int] = None,
seek_mode: Literal["exact", "approximate"] = "exact",
):
if sample_rate is not None:
raise ValueError("TODO implement this")

# TODO unify validation with VideoDecoder?
allowed_seek_modes = ("exact", "approximate")
if seek_mode not in allowed_seek_modes:
raise ValueError(
f"Invalid seek mode ({seek_mode}). "
f"Supported values are {', '.join(allowed_seek_modes)}."
)

if isinstance(source, str):
self._decoder = core.create_from_file(source, seek_mode)
elif isinstance(source, Path):
self._decoder = core.create_from_file(str(source), seek_mode)
elif isinstance(source, bytes):
self._decoder = core.create_from_bytes(source, seek_mode)
elif isinstance(source, Tensor):
self._decoder = core.create_from_tensor(source, seek_mode)
else:
raise TypeError(
f"Unknown source type: {type(source)}. "
"Supported types are str, Path, bytes and Tensor."
)

core.add_audio_stream(self._decoder, stream_index=stream_index)

self.metadata, self.stream_index = _get_and_validate_stream_metadata(
self._decoder, stream_index
)

# if self.metadata.num_frames is None:
# raise ValueError(
# "The number of frames is unknown. " + _ERROR_REPORTING_INSTRUCTIONS
# )
# self._num_frames = self.metadata.num_frames

# if self.metadata.begin_stream_seconds is None:
# raise ValueError(
# "The minimum pts value in seconds is unknown. "
# + _ERROR_REPORTING_INSTRUCTIONS
# )
# self._begin_stream_seconds = self.metadata.begin_stream_seconds

# if self.metadata.end_stream_seconds is None:
# raise ValueError(
# "The maximum pts value in seconds is unknown. "
# + _ERROR_REPORTING_INSTRUCTIONS
# )
# self._end_stream_seconds = self.metadata.end_stream_seconds

# TODO we need to have a default for stop_seconds.
def get_samples_played_in_range(
self, start_seconds: float, stop_seconds: float
) -> Tensor:
"""
TODO DOCS
"""
# if not start_seconds <= stop_seconds:
# raise ValueError(
# f"Invalid start seconds: {start_seconds}. It must be less than or equal to stop seconds ({stop_seconds})."
# )
# if not self._begin_stream_seconds <= start_seconds < self._end_stream_seconds:
# raise ValueError(
# f"Invalid start seconds: {start_seconds}. "
# f"It must be greater than or equal to {self._begin_stream_seconds} "
# f"and less than or equal to {self._end_stream_seconds}."
# )
# if not stop_seconds <= self._end_stream_seconds:
# raise ValueError(
# f"Invalid stop seconds: {stop_seconds}. "
# f"It must be less than or equal to {self._end_stream_seconds}."
# )

frames, *_ = core.get_frames_by_pts_in_range(
self._decoder,
start_seconds=start_seconds,
stop_seconds=stop_seconds,
)
# TODO need to return view on this to account for samples instead of
# frames
return frames


def _get_and_validate_stream_metadata(
decoder: Tensor,
stream_index: Optional[int] = None,
) -> Tuple[core.AudioStreamMetadata, int]:

# TODO should this still be called `get_video_metadata`?
container_metadata = core.get_video_metadata(decoder)

if stream_index is None:
best_stream_index = container_metadata.best_audio_stream_index
if best_stream_index is None:
raise ValueError(
"The best audio stream is unknown and there is no specified stream. "
+ _ERROR_REPORTING_INSTRUCTIONS
)
stream_index = best_stream_index

# This should be logically true because of the above conditions, but type checker
# is not clever enough.
assert stream_index is not None

stream_metadata = container_metadata.streams[stream_index]
return (stream_metadata, stream_index)
4 changes: 3 additions & 1 deletion src/torchcodec/decoders/_core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

find_package(Torch REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
# TODO Put back normal flags
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall ${TORCH_CXX_FLAGS}")
find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)

function(make_torchcodec_library library_name ffmpeg_target)
Expand Down
20 changes: 20 additions & 0 deletions src/torchcodec/decoders/_core/FFMPEGCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,26 @@ int64_t getDuration(const AVFrame* frame) {
#endif
}

int getNumChannels(const AVFrame* avFrame) {
#if LIBAVFILTER_VERSION_MAJOR > 8 || \
(IBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)
return avFrame->ch_layout.nb_channels;
#else
return av_get_channel_layout_nb_channels(avFrame->channel_layout);
#endif
}

int getNumChannels(const UniqueAVCodecContext& avCodecContext) {
// Not sure about the exactness of the version bounds, but as long as this
// compile we're fine.
#if LIBAVFILTER_VERSION_MAJOR > 8 || \
(IBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)
return avCodecContext->ch_layout.nb_channels;
#else
return avCodecContext->channels;
#endif
}

AVIOBytesContext::AVIOBytesContext(
const void* data,
size_t data_size,
Expand Down
3 changes: 3 additions & 0 deletions src/torchcodec/decoders/_core/FFMPEGCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,9 @@ std::string getFFMPEGErrorStringFromErrorCode(int errorCode);
int64_t getDuration(const UniqueAVFrame& frame);
int64_t getDuration(const AVFrame* frame);

int getNumChannels(const AVFrame* avFrame);
int getNumChannels(const UniqueAVCodecContext& avCodecContext);

// Returns true if sws_scale can handle unaligned data.
bool canSwsScaleHandleUnalignedData();

Expand Down
Loading