From fd34aa53649b0e336ef9ab57107ec6acb3d6da98 Mon Sep 17 00:00:00 2001 From: Pearu Peterson Date: Tue, 9 Sep 2025 11:05:30 +0300 Subject: [PATCH 1/2] Fix pre-commit and enable Lint CI workflow. --- .github/workflows/lint.yml | 109 ++++++++++++------ cmake/LoadHIP.cmake | 1 - docs/source/conf.py | 2 +- .../audio_feature_augmentation_tutorial.py | 1 + .../tutorials/audio_resampling_tutorial.py | 3 +- examples/tutorials/mvdr_tutorial.py | 1 + examples/tutorials/squim_tutorial.py | 8 +- .../forced_align/cpu/compute.cpp | 38 +++--- src/libtorchaudio/lfilter.cpp | 6 +- src/torchaudio/__init__.py | 28 ++--- src/torchaudio/_extension/__init__.py | 3 +- src/torchaudio/_extension/utils.py | 2 - src/torchaudio/_internal/module_utils.py | 14 ++- src/torchaudio/_torchcodec.py | 58 ++++------ src/torchaudio/datasets/utils.py | 2 +- src/torchaudio/functional/filtering.py | 51 ++++---- src/torchaudio/functional/functional.py | 20 ++-- src/torchaudio/models/decoder/__init__.py | 18 +-- src/torchaudio/models/decoder/_ctc_decoder.py | 4 +- .../models/decoder/_cuda_ctc_decoder.py | 2 +- src/torchaudio/pipelines/_tts/utils.py | 2 +- src/torchaudio/transforms/__init__.py | 1 + src/torchaudio/transforms/_transforms.py | 2 +- src/torchaudio/utils/__init__.py | 4 +- src/torchaudio/utils/download.py | 1 - test/librosa_mock.py | 10 +- test/smoke_test/smoke_test.py | 1 + .../common_utils/__init__.py | 5 +- .../common_utils/case_utils.py | 1 + .../common_utils/func_utils.py | 2 +- .../common_utils/kaldi_utils.py | 4 +- .../common_utils/sox_utils.py | 4 +- .../kaldi/kaldi_compatibility_impl.py | 2 +- test/torchaudio_unittest/deprecation_test.py | 2 - .../functional/autograd_impl.py | 2 +- .../functional/functional_cpu_test.py | 5 +- .../kaldi_compatibility_test_impl.py | 2 +- .../librosa_compatibility_test_impl.py | 27 +++-- .../functional/sox_compatibility_test.py | 2 +- .../torchscript_consistency_impl.py | 2 - .../test_load_save_torchcodec.py | 49 ++++---- .../transforms/autograd_test_impl.py | 2 +- .../librosa_compatibility_test_impl.py | 38 +++--- .../transforms/sox_compatibility_test.py | 2 +- test/torchcodec/decoders.py | 4 +- test/torchcodec/encoders.py | 4 +- tools/setup_helpers/extension.py | 4 +- 47 files changed, 297 insertions(+), 258 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 3eb6b05ef4..329c2b5f0c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,43 +1,76 @@ -# name: Lint +name: Lint -# on: -# pull_request: -# push: -# branches: -# - nightly -# - main -# - release/* -# workflow_dispatch: +on: + push: + branches: [ main ] + pull_request: -# jobs: -# python-source-and-configs: -# uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main -# permissions: -# id-token: write -# contents: read -# with: -# repository: pytorch/audio -# script: | -# set -euo pipefail +concurrency: + group: unit-test${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }} + cancel-in-progress: true -# echo '::group::Setup environment' -# eval "$("$(which conda)" shell.bash hook)" -# # libcst does not have 3.11 pre-built binaries yet. Use python 3.10 -# conda create -y --name env python=3.10 -# conda activate env -# pip3 install --progress-bar=off pre-commit -# echo '::endgroup::' +defaults: + run: + shell: bash -l -eo pipefail {0} -# set +e -# pre-commit run --all-files --show-diff-on-failure -# status=$? +jobs: + pre-commit-checks: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ['3.12'] + steps: + - name: Check out repo + uses: actions/checkout@v3 + - name: Setup conda env + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + miniconda-version: "latest" + activate-environment: test + python-version: ${{ matrix.python-version }} + - name: Update pip + run: python -m pip install --upgrade pip + - name: Install pre-commit + run: | + python -m pip install pre-commit + - name: Run pre-commit checks + run: | + pre-commit run --all-files + - name: Check to see what files pre-commit modified + run: | + git diff -# echo '::group::Add Summry' -# if [ $status -ne 0 ]; then -# echo '### Lint failure' >> $GITHUB_STEP_SUMMARY -# echo '```diff' >> $GITHUB_STEP_SUMMARY -# git --no-pager diff >> $GITHUB_STEP_SUMMARY -# echo '```' >> $GITHUB_STEP_SUMMARY -# fi -# echo '::endgroup::' -# exit $status + mypy: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ['3.12'] + steps: + - name: Check out repo + uses: actions/checkout@v3 + - name: Setup conda env + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + miniconda-version: "latest" + activate-environment: test + python-version: ${{ matrix.python-version }} + - name: Update pip + run: python -m pip install --upgrade pip + - name: Install dependencies and FFmpeg + run: | + python -m pip install --pre torch torchcodec --index-url https://download.pytorch.org/whl/nightly/cpu + conda install "ffmpeg=7.0.1" pkg-config pybind11 -c conda-forge + ffmpeg -version + - name: Build and install torchaudio + run: | + python -m pip install -e ".[dev]" --no-build-isolation -vvv + - name: Install mypy + run: | + python -m pip install mypy + - name: Run mypy + run: | + mypy --install-types --non-interactive --config-file mypy.ini diff --git a/cmake/LoadHIP.cmake b/cmake/LoadHIP.cmake index 3ca1befc35..a326d2d9ad 100644 --- a/cmake/LoadHIP.cmake +++ b/cmake/LoadHIP.cmake @@ -267,4 +267,3 @@ if(HIP_FOUND) set(roctracer_INCLUDE_DIRS ${ROCTRACER_PATH}/include) endif() endif() - diff --git a/docs/source/conf.py b/docs/source/conf.py index aa7e15cb30..a63947550c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -59,7 +59,7 @@ nbsphinx_requirejs_path = "" autodoc_member_order = "bysource" -autodoc_mock_imports = ['torchaudio.models.decoder'] +autodoc_mock_imports = ["torchaudio.models.decoder"] # katex options # diff --git a/examples/tutorials/audio_feature_augmentation_tutorial.py b/examples/tutorials/audio_feature_augmentation_tutorial.py index 4c43af1b67..643cb7d573 100644 --- a/examples/tutorials/audio_feature_augmentation_tutorial.py +++ b/examples/tutorials/audio_feature_augmentation_tutorial.py @@ -36,6 +36,7 @@ def _get_sample(path): return torchaudio.load(path) + def get_speech_sample(): return _get_sample(SAMPLE_WAV_SPEECH_PATH) diff --git a/examples/tutorials/audio_resampling_tutorial.py b/examples/tutorials/audio_resampling_tutorial.py index adca5073c9..64035fe09f 100644 --- a/examples/tutorials/audio_resampling_tutorial.py +++ b/examples/tutorials/audio_resampling_tutorial.py @@ -26,7 +26,6 @@ import math import timeit -import matplotlib.colors as mcolors import matplotlib.pyplot as plt from IPython.display import Audio import numpy as np @@ -427,7 +426,7 @@ def plot(data, cols, rows): x_data = np.arange(len(rows)) bar_width = 0.8 / len(cols) for (i, (c, d)) in enumerate(zip(cols, data.T)): - x_pos = x_data + (i - len(cols)/2 + 0.5) * bar_width + x_pos = x_data + (i - len(cols) / 2 + 0.5) * bar_width ax.bar(x_pos, d, bar_width, label=c) ax.legend() ax.set_xticks(x_data) diff --git a/examples/tutorials/mvdr_tutorial.py b/examples/tutorials/mvdr_tutorial.py index 01890afeb9..ba9ce8f94e 100644 --- a/examples/tutorials/mvdr_tutorial.py +++ b/examples/tutorials/mvdr_tutorial.py @@ -128,6 +128,7 @@ def generate_mixture(waveform_clean, waveform_noise, target_snr): waveform_noise *= 10 ** (-(target_snr - current_snr) / 20) return waveform_clean + waveform_noise + # If you have mir_eval installed, you can use it to evaluate the separation quality of the estimated sources. # You can also evaluate the intelligibility of the speech with the Short-Time Objective Intelligibility (STOI) metric # available in the `pystoi` package, or the Perceptual Evaluation of Speech Quality (PESQ) metric available in the `pesq` package. diff --git a/examples/tutorials/squim_tutorial.py b/examples/tutorials/squim_tutorial.py index 55143b5071..3b32505402 100644 --- a/examples/tutorials/squim_tutorial.py +++ b/examples/tutorials/squim_tutorial.py @@ -284,8 +284,8 @@ def plot(waveform, title, sample_rate=16000): # ``` # These values are precomputed and hard-coded below. print(f"Reference metrics for distorted speech at {snr_dbs[0]}dB are\n") -print(f"STOI: 0.9670831113894452") -print(f"PESQ: 2.7961528301239014") +print("STOI: 0.9670831113894452") +print("PESQ: 2.7961528301239014") si_sdr_ref = si_snr(WAVEFORM_DISTORTED[0:1], WAVEFORM_SPEECH) print(f"SI-SDR: {si_sdr_ref}") @@ -304,8 +304,8 @@ def plot(waveform, title, sample_rate=16000): si_sdr_ref = si_snr(WAVEFORM_DISTORTED[1:2], WAVEFORM_SPEECH) # STOI and PESQ metrics are precomputed and hardcoded below. print(f"Reference metrics for distorted speech at {snr_dbs[1]}dB are\n") -print(f"STOI: 0.5743247866630554") -print(f"PESQ: 1.1112866401672363") +print("STOI: 0.5743247866630554") +print("PESQ: 1.1112866401672363") print(f"SI-SDR: {si_sdr_ref}") diff --git a/src/libtorchaudio/forced_align/cpu/compute.cpp b/src/libtorchaudio/forced_align/cpu/compute.cpp index 0ddd21b126..7988099eb1 100644 --- a/src/libtorchaudio/forced_align/cpu/compute.cpp +++ b/src/libtorchaudio/forced_align/cpu/compute.cpp @@ -1,9 +1,9 @@ -#include -#include +#include #include -#include #include -#include +#include +#include +#include using namespace std; @@ -81,18 +81,21 @@ void forced_align_impl( auto curIdxOffset = t % 2; auto prevIdxOffset = (t - 1) % 2; for (auto j = 0; j < S; ++j) { - alphas_a[curIdxOffset * S + j] = -std::numeric_limits::infinity(); // alphas_a[curIdxOffset][j] + alphas_a[curIdxOffset * S + j] = -std::numeric_limits< + scalar_t>::infinity(); // alphas_a[curIdxOffset][j] } if (start == 0) { - alphas_a[curIdxOffset * S] = - alphas_a[prevIdxOffset * S] + logProbs_a[batchIndex][t][blank]; // alphas_a[curIdxOffset][0] + alphas_a[curIdxOffset * S] = alphas_a[prevIdxOffset * S] + + logProbs_a[batchIndex][t][blank]; // alphas_a[curIdxOffset][0] backPtr_a[S * t] = 0; // backPtr_a[t][0] = 0 startloop += 1; } for (auto i = startloop; i < end; i++) { auto x0 = alphas_a[prevIdxOffset * S + i]; // alphas_a[prevIdxOffset][i]; - auto x1 = alphas_a[prevIdxOffset * S + i - 1]; // alphas_a[prevIdxOffset][i - 1]; + auto x1 = + alphas_a[prevIdxOffset * S + i - 1]; // alphas_a[prevIdxOffset][i + // - 1]; auto x2 = -std::numeric_limits::infinity(); auto labelIdx = (i % 2 == 0) ? blank : targets_a[batchIndex][i / 2]; @@ -103,7 +106,8 @@ void forced_align_impl( // (i != 1) just ensures we don't access targets[i - 2] if its i < 2 if (i % 2 != 0 && i != 1 && targets_a[batchIndex][i / 2] != targets_a[batchIndex][i / 2 - 1]) { - x2 = alphas_a[prevIdxOffset * S + i - 2]; // alphas_a[prevIdxOffset][i - 2]; + x2 = alphas_a[prevIdxOffset * S + i - 2]; // alphas_a[prevIdxOffset][i - + // 2]; } scalar_t result = 0.0; if (x2 > x1 && x2 > x0) { @@ -116,12 +120,14 @@ void forced_align_impl( result = x0; backPtr_a[t * S + i] = 0; // backPtr_a[t][i] = 0 } - alphas_a[curIdxOffset * S + i] = result + logProbs_a[batchIndex][t][labelIdx]; // alphas_a[curIdxOffset][i] + alphas_a[curIdxOffset * S + i] = result + + logProbs_a[batchIndex][t][labelIdx]; // alphas_a[curIdxOffset][i] } } auto idx1 = (T - 1) % 2; - auto ltrIdx = alphas_a[S * idx1 + S - 1] > - alphas_a[S * idx1 + S - 2] ? S - 1 : S - 2; // alphas_a[idx1][S - 1], alphas_a[idx1][S - 2] + auto ltrIdx = alphas_a[S * idx1 + S - 1] > alphas_a[S * idx1 + S - 2] + ? S - 1 + : S - 2; // alphas_a[idx1][S - 1], alphas_a[idx1][S - 2] delete[] alphas_a; // path stores the token index for each time step after force alignment. for (auto t = T - 1; t > -1; t--) { @@ -194,15 +200,9 @@ std::tuple compute( logProbs, targets, blank, paths); } }); - return std::make_tuple( - paths, - logProbs - ); + return std::make_tuple(paths, logProbs); } - - - TORCH_LIBRARY_IMPL(torchaudio, CPU, m) { m.impl("forced_align", &compute); } diff --git a/src/libtorchaudio/lfilter.cpp b/src/libtorchaudio/lfilter.cpp index 4a130f34d5..9d9b05c7d8 100644 --- a/src/libtorchaudio/lfilter.cpp +++ b/src/libtorchaudio/lfilter.cpp @@ -82,9 +82,9 @@ void lfilter_core_generic_loop( auto coeff = a_coeff_flipped.unsqueeze(2); for (int64_t i_sample = 0; i_sample < n_samples_input; i_sample++) { auto windowed_output_signal = - torch::narrow(padded_output_waveform, 2, i_sample, i_sample + n_order).transpose(0, 1); - auto o0 = - torch::select(input_signal_windows, 2, i_sample) - + torch::narrow(padded_output_waveform, 2, i_sample, i_sample + n_order) + .transpose(0, 1); + auto o0 = torch::select(input_signal_windows, 2, i_sample) - at::matmul(windowed_output_signal, coeff).squeeze(2).transpose(0, 1); padded_output_waveform.index_put_( {torch::indexing::Slice(), diff --git a/src/torchaudio/__init__.py b/src/torchaudio/__init__.py index 6a9d65f3fc..6b86967c8e 100644 --- a/src/torchaudio/__init__.py +++ b/src/torchaudio/__init__.py @@ -1,23 +1,13 @@ -from torchaudio._internal.module_utils import dropping_io_support, dropping_class_io_support -from typing import Union, BinaryIO, Optional, Tuple import os +from typing import BinaryIO, Optional, Tuple, Union + import torch -import sys # Initialize extension and backend first from . import _extension # noqa # usort: skip +from . import compliance, datasets, functional, models, pipelines, transforms, utils # noqa: F401 from ._torchcodec import load_with_torchcodec, save_with_torchcodec -from . import ( # noqa: F401 - compliance, - datasets, - functional, - models, - pipelines, - transforms, - utils, -) - try: from .version import __version__, git_version # noqa: F401 @@ -100,9 +90,10 @@ def load( channels_first=channels_first, format=format, buffer_size=buffer_size, - backend=backend + backend=backend, ) + def save( uri: Union[str, os.PathLike], src: torch.Tensor, @@ -182,14 +173,19 @@ def save( - The output format is determined by the file extension in the uri. - TorchCodec uses FFmpeg under the hood for encoding. """ - return save_with_torchcodec(uri, src, sample_rate, + return save_with_torchcodec( + uri, + src, + sample_rate, channels_first=channels_first, format=format, encoding=encoding, bits_per_sample=bits_per_sample, buffer_size=buffer_size, backend=backend, - compression=compression) + compression=compression, + ) + __all__ = [ "load", diff --git a/src/torchaudio/_extension/__init__.py b/src/torchaudio/_extension/__init__.py index 11f7c6deec..9b8bd21d5e 100644 --- a/src/torchaudio/_extension/__init__.py +++ b/src/torchaudio/_extension/__init__.py @@ -4,7 +4,7 @@ from torchaudio._internal.module_utils import fail_with_message, is_module_available, no_op -from .utils import _check_cuda_version, _init_dll_path, _LazyImporter, _load_lib +from .utils import _check_cuda_version, _init_dll_path, _load_lib _LG = logging.getLogger(__name__) @@ -43,7 +43,6 @@ _IS_ALIGN_AVAILABLE = torchaudio.lib._torchaudio.is_align_available() - fail_if_no_rir = ( no_op if _IS_RIR_AVAILABLE diff --git a/src/torchaudio/_extension/utils.py b/src/torchaudio/_extension/utils.py index 1cbe3d93e5..bc1dc1404a 100644 --- a/src/torchaudio/_extension/utils.py +++ b/src/torchaudio/_extension/utils.py @@ -4,14 +4,12 @@ They should not depend on external state. Anything that depends on external state should happen in __init__.py """ -import importlib import logging import os import types from pathlib import Path import torch -from torchaudio._internal.module_utils import eval_env _LG = logging.getLogger(__name__) _LIB_DIR = Path(__file__).parent.parent / "lib" diff --git a/src/torchaudio/_internal/module_utils.py b/src/torchaudio/_internal/module_utils.py index 2201055954..ce64c31f0e 100644 --- a/src/torchaudio/_internal/module_utils.py +++ b/src/torchaudio/_internal/module_utils.py @@ -1,7 +1,7 @@ import importlib.util import os import warnings -from functools import wraps, partial +from functools import partial, wraps from typing import Optional @@ -59,8 +59,10 @@ def wrapped(*args, **kwargs): return decorator + UNSUPPORTED = [] + def wrap_deprecated(func, name, direction: str, version: Optional[str] = None, remove: bool = False): @wraps(func) def wrapped(*args, **kwargs): @@ -69,8 +71,10 @@ def wrapped(*args, **kwargs): message += f' It will be removed from {"a future" if version is None else "the " + str(version)} release. ' warnings.warn(message, stacklevel=2) return func(*args, **kwargs) + return wrapped + def deprecated(direction: str, version: Optional[str] = None, remove: bool = False): """Decorator to add deprecation message @@ -101,6 +105,7 @@ def decorator(func): return decorator + DEPRECATION_MSG = ( "This deprecation is part of a large refactoring effort to transition TorchAudio into a maintenance phase. " "Please see https://github.com/pytorch/audio/issues/3902 for more information." @@ -108,12 +113,14 @@ def decorator(func): IO_DEPRECATION_MSG = ( "This deprecation is part of a large refactoring effort to transition TorchAudio into a maintenance phase. " - "The decoding and encoding capabilities of PyTorch for both audio and video are being consolidated into TorchCodec. " + "The decoding and encoding capabilities of PyTorch for both audio" + " and video are being consolidated into TorchCodec. " "Please see https://github.com/pytorch/audio/issues/3902 for more information." ) dropping_support = deprecated(DEPRECATION_MSG, version="2.9", remove=True) + def dropping_class_support(c, msg=DEPRECATION_MSG): c.__init__ = wrap_deprecated(c.__init__, f"{c.__module__}.{c.__name__}", msg, version="2.9", remove=True) c.__doc__ = f"""DEPRECATED @@ -128,6 +135,7 @@ def dropping_class_support(c, msg=DEPRECATION_MSG): UNSUPPORTED.append(c) return c + def dropping_const_support(c, msg=DEPRECATION_MSG, name=None): c.__doc__ = f"""[DEPRECATED] @@ -139,10 +147,12 @@ def dropping_const_support(c, msg=DEPRECATION_MSG, name=None): """ return c + dropping_class_io_support = partial(dropping_class_support, msg=IO_DEPRECATION_MSG) dropping_io_support = deprecated(IO_DEPRECATION_MSG, version="2.9", remove=True) + def fail_with_message(message): """Generate decorator to give users message about missing TorchAudio extension.""" diff --git a/src/torchaudio/_torchcodec.py b/src/torchaudio/_torchcodec.py index 0bc99de043..ef9ab007a3 100644 --- a/src/torchaudio/_torchcodec.py +++ b/src/torchaudio/_torchcodec.py @@ -81,43 +81,34 @@ def load_with_torchcodec( from torchcodec.decoders import AudioDecoder except ImportError as e: raise ImportError( - "TorchCodec is required for load_with_torchcodec. " - "Please install torchcodec to use this function." + "TorchCodec is required for load_with_torchcodec. " "Please install torchcodec to use this function." ) from e # Parameter validation and warnings if not normalize: import warnings + warnings.warn( "TorchCodec AudioDecoder always returns normalized float32 samples. " "The 'normalize=False' parameter is ignored.", UserWarning, - stacklevel=2 + stacklevel=2, ) if buffer_size != 4096: import warnings - warnings.warn( - "The 'buffer_size' parameter is not used by TorchCodec AudioDecoder.", - UserWarning, - stacklevel=2 - ) + + warnings.warn("The 'buffer_size' parameter is not used by TorchCodec AudioDecoder.", UserWarning, stacklevel=2) if backend is not None: import warnings - warnings.warn( - "The 'backend' parameter is not used by TorchCodec AudioDecoder.", - UserWarning, - stacklevel=2 - ) + + warnings.warn("The 'backend' parameter is not used by TorchCodec AudioDecoder.", UserWarning, stacklevel=2) if format is not None: import warnings - warnings.warn( - "The 'format' parameter is not supported by TorchCodec AudioDecoder.", - UserWarning, - stacklevel=2 - ) + + warnings.warn("The 'format' parameter is not supported by TorchCodec AudioDecoder.", UserWarning, stacklevel=2) # Create AudioDecoder try: @@ -253,51 +244,45 @@ def save_with_torchcodec( from torchcodec.encoders import AudioEncoder except ImportError as e: raise ImportError( - "TorchCodec is required for save_with_torchcodec. " - "Please install torchcodec to use this function." + "TorchCodec is required for save_with_torchcodec. " "Please install torchcodec to use this function." ) from e # Parameter validation and warnings if format is not None: import warnings + warnings.warn( "The 'format' parameter is not used by TorchCodec AudioEncoder. " "Format is determined by the file extension.", UserWarning, - stacklevel=2 + stacklevel=2, ) if encoding is not None: import warnings + warnings.warn( - "The 'encoding' parameter is not fully supported by TorchCodec AudioEncoder.", - UserWarning, - stacklevel=2 + "The 'encoding' parameter is not fully supported by TorchCodec AudioEncoder.", UserWarning, stacklevel=2 ) if bits_per_sample is not None: import warnings + warnings.warn( "The 'bits_per_sample' parameter is not directly supported by TorchCodec AudioEncoder.", UserWarning, - stacklevel=2 + stacklevel=2, ) if buffer_size != 4096: import warnings - warnings.warn( - "The 'buffer_size' parameter is not used by TorchCodec AudioEncoder.", - UserWarning, - stacklevel=2 - ) + + warnings.warn("The 'buffer_size' parameter is not used by TorchCodec AudioEncoder.", UserWarning, stacklevel=2) if backend is not None: import warnings - warnings.warn( - "The 'backend' parameter is not used by TorchCodec AudioEncoder.", - UserWarning, - stacklevel=2 - ) + + warnings.warn("The 'backend' parameter is not used by TorchCodec AudioEncoder.", UserWarning, stacklevel=2) # Input validation if not isinstance(src, torch.Tensor): @@ -338,11 +323,12 @@ def save_with_torchcodec( bit_rate = int(compression) else: import warnings + warnings.warn( f"Unsupported compression type {type(compression)}. " "TorchCodec AudioEncoder expects int or float for bit_rate.", UserWarning, - stacklevel=2 + stacklevel=2, ) # Save to file diff --git a/src/torchaudio/datasets/utils.py b/src/torchaudio/datasets/utils.py index b4599f83aa..b9ee2aa51e 100644 --- a/src/torchaudio/datasets/utils.py +++ b/src/torchaudio/datasets/utils.py @@ -2,7 +2,7 @@ import os import tarfile import zipfile -from typing import Any, List, Optional +from typing import Any, List, Optional # noqa: F401 import torchaudio diff --git a/src/torchaudio/functional/filtering.py b/src/torchaudio/functional/filtering.py index 76deb04a96..1a7aa3e37e 100644 --- a/src/torchaudio/functional/filtering.py +++ b/src/torchaudio/functional/filtering.py @@ -3,8 +3,8 @@ from typing import Optional import torch -from torch import Tensor import torch.nn.functional as F +from torch import Tensor from torchaudio._extension import _IS_TORCHAUDIO_EXT_AVAILABLE @@ -955,20 +955,22 @@ def backward(ctx, dy): n_batch = x.size(0) n_channel = x.size(1) n_order = b_coeffs.size(1) - db = F.conv1d( + db = ( + F.conv1d( F.pad(x, (n_order - 1, 0)).view(1, n_batch * n_channel, -1), dy.view(n_batch * n_channel, 1, -1), - groups=n_batch * n_channel - ).view( - n_batch, n_channel, -1 - ).sum(0).flip(1) if b_coeffs.requires_grad else None - dx = F.conv1d( - F.pad(dy, (0, n_order - 1)), - b_coeffs.unsqueeze(1), - groups=n_channel - ) if x.requires_grad else None + groups=n_batch * n_channel, + ) + .view(n_batch, n_channel, -1) + .sum(0) + .flip(1) + if b_coeffs.requires_grad + else None + ) + dx = F.conv1d(F.pad(dy, (0, n_order - 1)), b_coeffs.unsqueeze(1), groups=n_channel) if x.requires_grad else None return (dx, db) + class DifferentiableIIR(torch.autograd.Function): @staticmethod def forward(ctx, waveform, a_coeffs_normalized): @@ -976,11 +978,12 @@ def forward(ctx, waveform, a_coeffs_normalized): n_order = a_coeffs_normalized.size(1) n_sample_padded = n_sample + n_order - 1 - a_coeff_flipped = a_coeffs_normalized.flip(1).contiguous(); - padded_output_waveform = torch.zeros(n_batch, n_channel, n_sample_padded, - device=waveform.device, dtype=waveform.dtype) + a_coeff_flipped = a_coeffs_normalized.flip(1).contiguous() + padded_output_waveform = torch.zeros( + n_batch, n_channel, n_sample_padded, device=waveform.device, dtype=waveform.dtype + ) _lfilter_core_loop(waveform, a_coeff_flipped, padded_output_waveform) - output = padded_output_waveform[:,:,n_order - 1:] + output = padded_output_waveform[:, :, n_order - 1 :] ctx.save_for_backward(waveform, a_coeffs_normalized, output) return output @@ -991,17 +994,24 @@ def backward(ctx, dy): n_order = a_coeffs_normalized.size(1) tmp = DifferentiableIIR.apply(dy.flip(2).contiguous(), a_coeffs_normalized).flip(2) dx = tmp if x.requires_grad else None - da = -(tmp.transpose(0, 1).reshape(n_channel, 1, -1) @ - F.pad(y, (n_order - 1, 0)).unfold(2, n_order, 1).transpose(0,1) - .reshape(n_channel, -1, n_order) - ).squeeze(1).flip(1) if a_coeffs_normalized.requires_grad else None + da = ( + -( + tmp.transpose(0, 1).reshape(n_channel, 1, -1) + @ F.pad(y, (n_order - 1, 0)).unfold(2, n_order, 1).transpose(0, 1).reshape(n_channel, -1, n_order) + ) + .squeeze(1) + .flip(1) + if a_coeffs_normalized.requires_grad + else None + ) return (dx, da) + def _lfilter(waveform, a_coeffs, b_coeffs): - n_order = b_coeffs.size(1) filtered_waveform = DifferentiableFIR.apply(waveform, b_coeffs / a_coeffs[:, 0:1]) return DifferentiableIIR.apply(filtered_waveform, a_coeffs / a_coeffs[:, 0:1]) + def lfilter(waveform: Tensor, a_coeffs: Tensor, b_coeffs: Tensor, clamp: bool = True, batching: bool = True) -> Tensor: r"""Perform an IIR filter by evaluating difference equation, using differentiable implementation developed separately by *Yu et al.* :cite:`ismir_YuF23` and *Forgione et al.* :cite:`forgione2021dynonet`. @@ -1071,6 +1081,7 @@ def lfilter(waveform: Tensor, a_coeffs: Tensor, b_coeffs: Tensor, clamp: bool = return output + def lowpass_biquad(waveform: Tensor, sample_rate: int, cutoff_freq: float, Q: float = 0.707) -> Tensor: r"""Design biquad lowpass filter and perform filtering. Similar to SoX implementation. diff --git a/src/torchaudio/functional/functional.py b/src/torchaudio/functional/functional.py index 9c904120c4..4070141958 100644 --- a/src/torchaudio/functional/functional.py +++ b/src/torchaudio/functional/functional.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import math -import tempfile import warnings from collections.abc import Sequence from typing import List, Optional, Tuple, Union @@ -9,8 +8,7 @@ import torch import torchaudio from torch import Tensor -from torchaudio._internal.module_utils import deprecated, dropping_support - +from torchaudio._internal.module_utils import dropping_support from .filtering import highpass_biquad, treble_biquad @@ -1719,6 +1717,7 @@ def _fix_waveform_shape( waveform_shift = waveform_shift.view(shape[:-1] + waveform_shift.shape[-1:]) return waveform_shift + class RnntLoss(torch.autograd.Function): @staticmethod def forward(ctx, *args): @@ -1730,9 +1729,10 @@ def forward(ctx, *args): def backward(ctx, dy): grad = ctx.saved_tensors[0] grad_out = dy.view((-1, 1, 1, 1)) - result = grad * grad_out; + result = grad * grad_out return (result, None, None, None, None, None, None, None) + def _rnnt_loss( logits: Tensor, targets: Tensor, @@ -1775,15 +1775,7 @@ def _rnnt_loss( if blank < 0: # reinterpret blank index if blank < 0. blank = logits.shape[-1] + blank - costs = RnntLoss.apply( - logits, - targets, - logit_lengths, - target_lengths, - blank, - clamp, - fused_log_softmax - ) + costs = RnntLoss.apply(logits, targets, logit_lengths, target_lengths, blank, clamp, fused_log_softmax) if reduction == "mean": return costs.mean() @@ -1837,10 +1829,12 @@ def psd( psd = psd.sum(dim=-3) return psd + # Expose both deprecated wrapper as well as original because torchscript breaks on # wrapped functions. rnnt_loss = dropping_support(_rnnt_loss) + def _compute_mat_trace(input: torch.Tensor, dim1: int = -1, dim2: int = -2) -> torch.Tensor: r"""Compute the trace of a Tensor along ``dim1`` and ``dim2`` dimensions. diff --git a/src/torchaudio/models/decoder/__init__.py b/src/torchaudio/models/decoder/__init__.py index 184b02eb45..9623872336 100644 --- a/src/torchaudio/models/decoder/__init__.py +++ b/src/torchaudio/models/decoder/__init__.py @@ -1,5 +1,7 @@ -from torchaudio._internal.module_utils import dropping_support, dropping_class_support import inspect + +from torchaudio._internal.module_utils import dropping_class_support, dropping_support + _CTC_DECODERS = [ "CTCHypothesis", "CTCDecoder", @@ -39,14 +41,12 @@ def __getattr__(name: str): # following if-else block with # item = getattr(_cuda_ctc_decoder, name) orig_item = getattr(_cuda_ctc_decoder, name) - if ( - inspect.isclass(orig_item) - or ( - # workaround a failure to detect type instances - # after sphinx autodoc mocking, required for - # building docs - getattr(orig_item, "__sphinx_mock__", False) and inspect.isclass(orig_item.__class__) - ) + if inspect.isclass(orig_item) or ( + # workaround a failure to detect type instances + # after sphinx autodoc mocking, required for + # building docs + getattr(orig_item, "__sphinx_mock__", False) + and inspect.isclass(orig_item.__class__) ): item = dropping_class_support(orig_item) else: diff --git a/src/torchaudio/models/decoder/_ctc_decoder.py b/src/torchaudio/models/decoder/_ctc_decoder.py index a45662011b..e7dbaa7244 100644 --- a/src/torchaudio/models/decoder/_ctc_decoder.py +++ b/src/torchaudio/models/decoder/_ctc_decoder.py @@ -69,7 +69,7 @@ def _get_word_dict(lexicon, lm, lm_dict, tokens_dict, unk_word): if lexicon and word_dict is None: word_dict = _create_word_dict(lexicon) - elif not lexicon and word_dict is None and type(lm) == str: + elif not lexicon and word_dict is None and type(lm) is str: d = {tokens_dict.get_entry(i): [[tokens_dict.get_entry(i)]] for i in range(tokens_dict.index_size())} d[unk_word] = [[unk_word]] word_dict = _create_word_dict(d) @@ -499,7 +499,7 @@ def ctc_decoder( # construct word dict and language model word_dict = _get_word_dict(lexicon, lm, lm_dict, tokens_dict, unk_word) - if type(lm) == str: + if type(lm) is str: if _KenLM is None: raise RuntimeError( "flashlight-text is installed, but KenLM is not installed. " diff --git a/src/torchaudio/models/decoder/_cuda_ctc_decoder.py b/src/torchaudio/models/decoder/_cuda_ctc_decoder.py index 9b1f509644..f1aae838c4 100644 --- a/src/torchaudio/models/decoder/_cuda_ctc_decoder.py +++ b/src/torchaudio/models/decoder/_cuda_ctc_decoder.py @@ -181,7 +181,7 @@ def cuda_ctc_decoder( >>> ) >>> results = decoder(log_probs, encoder_out_lens) # List of shape (B, nbest) of Hypotheses """ - if type(tokens) == str: + if type(tokens) is str: tokens = _get_vocab_list(tokens) return CUCTCDecoder(vocab_list=tokens, beam_size=beam_size, nbest=nbest, blank_skip_threshold=blank_skip_threshold) diff --git a/src/torchaudio/pipelines/_tts/utils.py b/src/torchaudio/pipelines/_tts/utils.py index d402c1d4eb..da466aebed 100644 --- a/src/torchaudio/pipelines/_tts/utils.py +++ b/src/torchaudio/pipelines/_tts/utils.py @@ -161,7 +161,7 @@ def _load_phonemizer(file, dl_kwargs): raise RuntimeError("DeepPhonemizer is not installed. Please install it.") from dp.phonemizer import Phonemizer - from dp.preprocessing.text import Preprocessor, LanguageTokenizer, SequenceTokenizer + from dp.preprocessing.text import LanguageTokenizer, Preprocessor, SequenceTokenizer # By default, dp issues DEBUG level log. logger = logging.getLogger("dp") diff --git a/src/torchaudio/transforms/__init__.py b/src/torchaudio/transforms/__init__.py index 06a32ca846..4dba12abb8 100644 --- a/src/torchaudio/transforms/__init__.py +++ b/src/torchaudio/transforms/__init__.py @@ -1,4 +1,5 @@ from torchaudio._internal.module_utils import dropping_class_support + from ._multi_channel import MVDR, PSD, RTFMVDR, SoudenMVDR from ._transforms import ( AddNoise, diff --git a/src/torchaudio/transforms/_transforms.py b/src/torchaudio/transforms/_transforms.py index f208de13ae..08d2dcef11 100644 --- a/src/torchaudio/transforms/_transforms.py +++ b/src/torchaudio/transforms/_transforms.py @@ -10,12 +10,12 @@ from torch.nn.parameter import UninitializedParameter from torchaudio import functional as F -from torchaudio.functional.functional import _rnnt_loss from torchaudio.functional.functional import ( _apply_sinc_resample_kernel, _check_convolve_mode, _fix_waveform_shape, _get_sinc_resample_kernel, + _rnnt_loss, _stretch_waveform, ) diff --git a/src/torchaudio/utils/__init__.py b/src/torchaudio/utils/__init__.py index 4866a21390..f30b5ed929 100644 --- a/src/torchaudio/utils/__init__.py +++ b/src/torchaudio/utils/__init__.py @@ -1,6 +1,4 @@ - from .download import _download_asset -__all__ = [ -] +__all__ = ["_download_asset"] diff --git a/src/torchaudio/utils/download.py b/src/torchaudio/utils/download.py index b74cd60604..5519b7f4be 100644 --- a/src/torchaudio/utils/download.py +++ b/src/torchaudio/utils/download.py @@ -30,7 +30,6 @@ def _get_hash(path, hash, chunk_size=1028): data = file.read(chunk_size) return m.hexdigest() -from torchaudio._internal.module_utils import dropping_support def _download_asset( key: str, diff --git a/test/librosa_mock.py b/test/librosa_mock.py index e15f0ca9eb..951db59e14 100644 --- a/test/librosa_mock.py +++ b/test/librosa_mock.py @@ -1,8 +1,8 @@ -import re -import os from pathlib import Path + import torch + def mock_function(f): """ Create a mocked version of a function from the librosa library that loads a precomputed result @@ -12,6 +12,7 @@ def mock_function(f): """ this_file = Path(__file__).parent.resolve() expected_results_folder = this_file / "torchaudio_unittest" / "assets" / "librosa_expected_results" + def wrapper(request, *args, **kwargs): mocked_results = expected_results_folder / f"{request}.pt" return torch.load(mocked_results, weights_only=False) @@ -25,8 +26,10 @@ def wrapper(request, *args, **kwargs): # mocked_results.parent.mkdir(parents=True, exist_ok=True) # torch.save(result, mocked_results) # return result + return wrapper + griffinlim = mock_function("librosa.griffinlim") mel = mock_function("librosa.filters.mel") @@ -41,8 +44,10 @@ def wrapper(request, *args, **kwargs): mel_spectrogram = mock_function("librosa.feature.melspectrogram") + def _mfcc_from_waveform(waveform, sample_rate, n_fft, hop_length, n_mels, n_mfcc): import librosa + melspec = librosa.feature.melspectrogram( y=waveform[0].cpu().numpy(), sr=sample_rate, @@ -56,6 +61,7 @@ def _mfcc_from_waveform(waveform, sample_rate, n_fft, hop_length, n_mels, n_mfcc ) return librosa.feature.mfcc(S=librosa.core.power_to_db(melspec), n_mfcc=n_mfcc, dct_type=2, norm="ortho") + mfcc_from_waveform = mock_function("_mfcc_from_waveform") diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py index c8c22a463c..bde704758c 100644 --- a/test/smoke_test/smoke_test.py +++ b/test/smoke_test/smoke_test.py @@ -13,6 +13,7 @@ def base_smoke_test(): import torchaudio.transforms # noqa: F401 import torchaudio.utils # noqa: F401 + def _run_smoke_test(): base_smoke_test() print("Smoke test passed.") diff --git a/test/torchaudio_unittest/common_utils/__init__.py b/test/torchaudio_unittest/common_utils/__init__.py index 8e954e20bf..67a70033a8 100644 --- a/test/torchaudio_unittest/common_utils/__init__.py +++ b/test/torchaudio_unittest/common_utils/__init__.py @@ -1,3 +1,5 @@ +import pytest + from .autograd_utils import use_deterministic_algorithms from .case_utils import ( disabledInCI, @@ -26,7 +28,7 @@ from .func_utils import torch_script from .parameterized_utils import load_params, nested_params from .wav_utils import get_wav_data, load_wav, normalize_wav, save_wav -import pytest + class RequestMixin: """ @@ -39,6 +41,7 @@ class RequestMixin: def inject_request(self, request): self.request = request.node.nodeid.replace(":", "_").replace("_cpu_", "_").replace("_cuda_", "_") + __all__ = [ "get_asset_path", "get_whitenoise", diff --git a/test/torchaudio_unittest/common_utils/case_utils.py b/test/torchaudio_unittest/common_utils/case_utils.py index 9c0d59b199..f269d9ea18 100644 --- a/test/torchaudio_unittest/common_utils/case_utils.py +++ b/test/torchaudio_unittest/common_utils/case_utils.py @@ -262,6 +262,7 @@ def skipIfNoHWAccel(name): key = "NO_HW_ACCEL" return _skipIf(True, reason="ffmpeg features are not available.", key=key) + def zip_equal(*iterables): """With the regular Python `zip` function, if one iterable is longer than the other, the remainder portions are ignored.This is resolved in Python 3.10 where we can use diff --git a/test/torchaudio_unittest/common_utils/func_utils.py b/test/torchaudio_unittest/common_utils/func_utils.py index 95fcb3def3..b08a8a4c6f 100644 --- a/test/torchaudio_unittest/common_utils/func_utils.py +++ b/test/torchaudio_unittest/common_utils/func_utils.py @@ -6,7 +6,7 @@ def torch_script(obj): """TorchScript the given function or Module""" buffer = io.BytesIO() - if hasattr(obj, '__wrapped__'): + if hasattr(obj, "__wrapped__"): # This is hack for those functions which are deprecated with decorators # like @deprecated or @dropping_support. Adding the decorators breaks # TorchScript. We need to unwrap the function to get the original one, diff --git a/test/torchaudio_unittest/common_utils/kaldi_utils.py b/test/torchaudio_unittest/common_utils/kaldi_utils.py index 9985728d5d..1728517f2a 100644 --- a/test/torchaudio_unittest/common_utils/kaldi_utils.py +++ b/test/torchaudio_unittest/common_utils/kaldi_utils.py @@ -1,8 +1,6 @@ -import subprocess +from pathlib import Path import torch -import os -from pathlib import Path def convert_args(**kwargs): diff --git a/test/torchaudio_unittest/common_utils/sox_utils.py b/test/torchaudio_unittest/common_utils/sox_utils.py index d51321319b..90ef9b532f 100644 --- a/test/torchaudio_unittest/common_utils/sox_utils.py +++ b/test/torchaudio_unittest/common_utils/sox_utils.py @@ -1,9 +1,9 @@ +import shutil import subprocess import sys import warnings -import shutil from pathlib import Path -import os + def get_encoding(dtype): encodings = { diff --git a/test/torchaudio_unittest/compliance/kaldi/kaldi_compatibility_impl.py b/test/torchaudio_unittest/compliance/kaldi/kaldi_compatibility_impl.py index 83b7d6314a..573082131b 100644 --- a/test/torchaudio_unittest/compliance/kaldi/kaldi_compatibility_impl.py +++ b/test/torchaudio_unittest/compliance/kaldi/kaldi_compatibility_impl.py @@ -5,9 +5,9 @@ get_asset_path, load_params, load_wav, + RequestMixin, TempDirMixin, TestBaseMixin, - RequestMixin ) from torchaudio_unittest.common_utils.kaldi_utils import convert_args, run_kaldi diff --git a/test/torchaudio_unittest/deprecation_test.py b/test/torchaudio_unittest/deprecation_test.py index c270894f40..59521f6564 100644 --- a/test/torchaudio_unittest/deprecation_test.py +++ b/test/torchaudio_unittest/deprecation_test.py @@ -1,7 +1,5 @@ import pytest -import torch - from torchaudio._internal.module_utils import UNSUPPORTED diff --git a/test/torchaudio_unittest/functional/autograd_impl.py b/test/torchaudio_unittest/functional/autograd_impl.py index 1489fdef79..7d7286fe83 100644 --- a/test/torchaudio_unittest/functional/autograd_impl.py +++ b/test/torchaudio_unittest/functional/autograd_impl.py @@ -419,7 +419,7 @@ def assert_grad( def test_rnnt_loss(self, data_func): def get_data(data_func, device): data = data_func() - if type(data) == tuple: + if type(data) is tuple: data = data[0] return data diff --git a/test/torchaudio_unittest/functional/functional_cpu_test.py b/test/torchaudio_unittest/functional/functional_cpu_test.py index d4f1b4578e..c3b14c38db 100644 --- a/test/torchaudio_unittest/functional/functional_cpu_test.py +++ b/test/torchaudio_unittest/functional/functional_cpu_test.py @@ -1,10 +1,7 @@ import unittest import torch -import torchaudio.functional as F -from parameterized import parameterized -import unittest -from torchaudio_unittest.common_utils import PytorchTestCase, TorchaudioTestCase +from torchaudio_unittest.common_utils import PytorchTestCase from .functional_impl import Functional, FunctionalCPUOnly diff --git a/test/torchaudio_unittest/functional/kaldi_compatibility_test_impl.py b/test/torchaudio_unittest/functional/kaldi_compatibility_test_impl.py index 04caed7398..922fbcf3d2 100644 --- a/test/torchaudio_unittest/functional/kaldi_compatibility_test_impl.py +++ b/test/torchaudio_unittest/functional/kaldi_compatibility_test_impl.py @@ -1,6 +1,6 @@ import torch import torchaudio.functional as F -from torchaudio_unittest.common_utils import TempDirMixin, TestBaseMixin, RequestMixin +from torchaudio_unittest.common_utils import RequestMixin, TempDirMixin, TestBaseMixin from torchaudio_unittest.common_utils.kaldi_utils import convert_args, run_kaldi diff --git a/test/torchaudio_unittest/functional/librosa_compatibility_test_impl.py b/test/torchaudio_unittest/functional/librosa_compatibility_test_impl.py index d06881f0b0..7e3bedf53f 100644 --- a/test/torchaudio_unittest/functional/librosa_compatibility_test_impl.py +++ b/test/torchaudio_unittest/functional/librosa_compatibility_test_impl.py @@ -1,17 +1,12 @@ -import unittest -from distutils.version import StrictVersion +import librosa_mock +import numpy as np import torch import torchaudio.functional as F from parameterized import param -from torchaudio._internal.module_utils import is_module_available - -import librosa_mock -import numpy as np -import pytest +from torchaudio_unittest.common_utils import get_spectrogram, get_whitenoise, nested_params, RequestMixin, TestBaseMixin -from torchaudio_unittest.common_utils import get_spectrogram, get_whitenoise, nested_params, TestBaseMixin, RequestMixin class Functional(TestBaseMixin, RequestMixin): """Test suite for functions in `functional` module.""" @@ -79,7 +74,13 @@ def test_create_mel_fb( expected = librosa_mock.mel( self.request, - sr=sample_rate, n_fft=n_fft, n_mels=n_mels, fmax=fmax, fmin=fmin, htk=mel_scale == "htk", norm=norm + sr=sample_rate, + n_fft=n_fft, + n_mels=n_mels, + fmax=fmax, + fmin=fmin, + htk=mel_scale == "htk", + norm=norm, ).T result = F.melscale_fbanks( sample_rate=sample_rate, @@ -126,13 +127,17 @@ def test_phase_vocoder(self, rate): # Due to cummulative sum, numerical error in using torch.float32 will # result in bottom right values of the stretched sectrogram to not # match with librosa. - spec = torch.randn(num_freq, num_frames, dtype=torch.complex128).to(self.device,) + spec = torch.randn(num_freq, num_frames, dtype=torch.complex128).to( + self.device, + ) phase_advance = torch.linspace(0, np.pi * hop_length, num_freq, device=self.device, dtype=torch.float64)[ ..., None ] stretched = F.phase_vocoder(spec, rate=rate, phase_advance=phase_advance) - expected_stretched = librosa_mock.phase_vocoder(self.request, spec.cpu().numpy(), rate=rate, hop_length=hop_length) + expected_stretched = librosa_mock.phase_vocoder( + self.request, spec.cpu().numpy(), rate=rate, hop_length=hop_length + ) self.assertEqual(stretched, torch.from_numpy(expected_stretched)) diff --git a/test/torchaudio_unittest/functional/sox_compatibility_test.py b/test/torchaudio_unittest/functional/sox_compatibility_test.py index 0a41ec13ac..4ee9addc5e 100644 --- a/test/torchaudio_unittest/functional/sox_compatibility_test.py +++ b/test/torchaudio_unittest/functional/sox_compatibility_test.py @@ -4,11 +4,11 @@ get_asset_path, get_whitenoise, load_wav, + RequestMixin, save_wav, sox_utils, TempDirMixin, TorchaudioTestCase, - RequestMixin ) diff --git a/test/torchaudio_unittest/functional/torchscript_consistency_impl.py b/test/torchaudio_unittest/functional/torchscript_consistency_impl.py index 099b370086..6b1e28b869 100644 --- a/test/torchaudio_unittest/functional/torchscript_consistency_impl.py +++ b/test/torchaudio_unittest/functional/torchscript_consistency_impl.py @@ -6,8 +6,6 @@ from parameterized import parameterized from torchaudio_unittest import common_utils from torchaudio_unittest.common_utils import skipIfRocm, TempDirMixin, TestBaseMixin, torch_script -from torchaudio.functional.functional import rnnt_loss - class Functional(TempDirMixin, TestBaseMixin): diff --git a/test/torchaudio_unittest/test_load_save_torchcodec.py b/test/torchaudio_unittest/test_load_save_torchcodec.py index 9057e93811..b9eed55572 100644 --- a/test/torchaudio_unittest/test_load_save_torchcodec.py +++ b/test/torchaudio_unittest/test_load_save_torchcodec.py @@ -1,10 +1,8 @@ -from unittest.mock import patch +import os import re import subprocess - - -import os import tempfile + import pytest import torch @@ -21,13 +19,14 @@ # skip them unconditionally. pytest.skip(allow_module_level=True) + def get_ffmpeg_version(): """Get FFmpeg version to check for compatibility issues.""" try: - result = subprocess.run(['ffmpeg', '-version'], capture_output=True, text=True, timeout=5) + result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True, timeout=5) if result.returncode == 0: # Extract version number from output like "ffmpeg version 4.4.2-0ubuntu0.22.04.1" - match = re.search(r'ffmpeg version (\d+)\.', result.stdout) + match = re.search(r"ffmpeg version (\d+)\.", result.stdout) if match: return int(match.group(1)) except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError): @@ -79,31 +78,32 @@ def test_basic_load(filename): # Check values are close (allowing for small differences in decoders) torch.testing.assert_close(waveform_ta, waveform_tc) -@pytest.mark.parametrize("frame_offset,num_frames", [ - (0, 1000), # First 1000 samples - (1000, 2000), # 2000 samples starting from 1000 - (5000, -1), # From 5000 to end - (0, -1), # Full file -]) + +@pytest.mark.parametrize( + "frame_offset,num_frames", + [ + (0, 1000), # First 1000 samples + (1000, 2000), # 2000 samples starting from 1000 + (5000, -1), # From 5000 to end + (0, -1), # Full file + ], +) def test_frame_offset_and_num_frames(frame_offset, num_frames): """Test frame_offset and num_frames parameters.""" file_path = get_asset_path("sinewave.wav") # Load with torchaudio - waveform_ta, sample_rate_ta = torchaudio.load( - file_path, frame_offset=frame_offset, num_frames=num_frames - ) + waveform_ta, sample_rate_ta = torchaudio.load(file_path, frame_offset=frame_offset, num_frames=num_frames) # Load with torchcodec - waveform_tc, sample_rate_tc = load_with_torchcodec( - file_path, frame_offset=frame_offset, num_frames=num_frames - ) + waveform_tc, sample_rate_tc = load_with_torchcodec(file_path, frame_offset=frame_offset, num_frames=num_frames) # Check results match assert sample_rate_ta == sample_rate_tc assert waveform_ta.shape == waveform_tc.shape torch.testing.assert_close(waveform_ta, waveform_tc) + def test_channels_first(): """Test channels_first parameter.""" file_path = get_asset_path("sinewave.wav") # Use sinewave.wav for compatibility @@ -127,6 +127,7 @@ def test_channels_first(): torch.testing.assert_close(waveform_cf_true, waveform_ta_true) torch.testing.assert_close(waveform_cf_false, waveform_ta_false) + def test_normalize_parameter_warning(): """Test that normalize=False produces a warning.""" file_path = get_asset_path("sinewave.wav") @@ -138,6 +139,7 @@ def test_normalize_parameter_warning(): # Result should still be float32 (normalized) assert waveform.dtype == torch.float32 + def test_buffer_size_parameter_warning(): """Test that non-default buffer_size produces a warning.""" file_path = get_asset_path("sinewave.wav") @@ -180,13 +182,7 @@ def test_multiple_warnings(): with pytest.warns() as warning_list: # This should produce multiple warnings - waveform, sample_rate = load_with_torchcodec( - file_path, - normalize=False, - buffer_size=8192, - backend="ffmpeg" - ) - + waveform, sample_rate = load_with_torchcodec(file_path, normalize=False, buffer_size=8192, backend="ffmpeg") # Check that expected warnings are present messages = [str(w.message) for w in warning_list] @@ -197,6 +193,7 @@ def test_multiple_warnings(): # ===== SAVE WITH TORCHCODEC TESTS ===== + @pytest.mark.parametrize("filename", TEST_FILES) def test_save_basic_save(filename): """Test basic saving functionality against torchaudio.save.""" @@ -416,7 +413,7 @@ def test_save_multiple_warnings(): encoding="PCM_16", bits_per_sample=16, buffer_size=8192, - backend="ffmpeg" + backend="ffmpeg", ) # Check that expected warnings are present diff --git a/test/torchaudio_unittest/transforms/autograd_test_impl.py b/test/torchaudio_unittest/transforms/autograd_test_impl.py index 9c321fe223..e3cf93ba39 100644 --- a/test/torchaudio_unittest/transforms/autograd_test_impl.py +++ b/test/torchaudio_unittest/transforms/autograd_test_impl.py @@ -400,7 +400,7 @@ def assert_grad( def test_rnnt_loss(self, data_func): def get_data(data_func, device): data = data_func() - if type(data) == tuple: + if type(data) is tuple: data = data[0] return data diff --git a/test/torchaudio_unittest/transforms/librosa_compatibility_test_impl.py b/test/torchaudio_unittest/transforms/librosa_compatibility_test_impl.py index 07b3fadd84..a108e06e3c 100644 --- a/test/torchaudio_unittest/transforms/librosa_compatibility_test_impl.py +++ b/test/torchaudio_unittest/transforms/librosa_compatibility_test_impl.py @@ -1,12 +1,17 @@ -import unittest +import librosa_mock import torch import torchaudio.transforms as T from parameterized import param, parameterized -from torchaudio._internal.module_utils import is_module_available -from torchaudio_unittest.common_utils import get_sinusoid, get_spectrogram, get_whitenoise, nested_params, TestBaseMixin, RequestMixin -import librosa_mock -import pytest +from torchaudio_unittest.common_utils import ( + get_sinusoid, + get_spectrogram, + get_whitenoise, + nested_params, + RequestMixin, + TestBaseMixin, +) + class TransformsTestBase(TestBaseMixin, RequestMixin): @parameterized.expand( @@ -26,7 +31,11 @@ def test_Spectrogram(self, n_fft, hop_length, power): expected = librosa_mock.spectrogram( self.request, - y=waveform[0].cpu().numpy(), n_fft=n_fft, hop_length=hop_length, power=power, pad_mode="reflect" + y=waveform[0].cpu().numpy(), + n_fft=n_fft, + hop_length=hop_length, + power=power, + pad_mode="reflect", )[0] result = T.Spectrogram(n_fft=n_fft, hop_length=hop_length, power=power,).to(self.device, self.dtype)( @@ -44,8 +53,7 @@ def test_Spectrogram_complex(self): ).to(self.device, self.dtype) expected = librosa_mock.spectrogram( - self.request, - y=waveform[0].cpu().numpy(), n_fft=n_fft, hop_length=hop_length, power=1, pad_mode="reflect" + self.request, y=waveform[0].cpu().numpy(), n_fft=n_fft, hop_length=hop_length, power=1, pad_mode="reflect" )[0] result = T.Spectrogram(n_fft=n_fft, hop_length=hop_length, power=None, return_complex=True,).to( @@ -121,13 +129,7 @@ def test_mfcc(self, n_fft, hop_length, n_mels, n_mfcc): ).to(self.device, self.dtype)(waveform)[0] expected = librosa_mock.mfcc_from_waveform( - f"{self.request}", - waveform, - sample_rate, - n_fft, - hop_length, - n_mels, - n_mfcc + f"{self.request}", waveform, sample_rate, n_fft, hop_length, n_mels, n_mfcc ) self.assertEqual(result, torch.from_numpy(expected), atol=5e-4, rtol=1e-5) @@ -147,6 +149,10 @@ def test_spectral_centroid(self, n_fft, hop_length): )(waveform) expected = librosa_mock.spectral_centroid( self.request, - y=waveform[0].cpu().numpy(), sr=sample_rate, n_fft=n_fft, hop_length=hop_length, pad_mode="reflect" + y=waveform[0].cpu().numpy(), + sr=sample_rate, + n_fft=n_fft, + hop_length=hop_length, + pad_mode="reflect", ) self.assertEqual(result, torch.from_numpy(expected), atol=5e-4, rtol=1e-5) diff --git a/test/torchaudio_unittest/transforms/sox_compatibility_test.py b/test/torchaudio_unittest/transforms/sox_compatibility_test.py index a20c44e5ba..cdf6c999cb 100644 --- a/test/torchaudio_unittest/transforms/sox_compatibility_test.py +++ b/test/torchaudio_unittest/transforms/sox_compatibility_test.py @@ -7,11 +7,11 @@ get_asset_path, get_whitenoise, load_wav, + RequestMixin, save_wav, sox_utils, TempDirMixin, TorchaudioTestCase, - RequestMixin ) diff --git a/test/torchcodec/decoders.py b/test/torchcodec/decoders.py index 0064be91d6..a1f0433805 100644 --- a/test/torchcodec/decoders.py +++ b/test/torchcodec/decoders.py @@ -1,9 +1,11 @@ -import torchaudio_unittest.common_utils.wav_utils as wav_utils from types import SimpleNamespace +import torchaudio_unittest.common_utils.wav_utils as wav_utils + # See corresponding [TorchCodec test dependency mocking hack] note in # conftest.py + class AudioDecoder: def __init__(self, uri): self.uri = uri diff --git a/test/torchcodec/encoders.py b/test/torchcodec/encoders.py index e6b0693018..d81cc5aa84 100644 --- a/test/torchcodec/encoders.py +++ b/test/torchcodec/encoders.py @@ -1,9 +1,11 @@ -import torchaudio_unittest.common_utils.wav_utils as wav_utils from types import SimpleNamespace +import torchaudio_unittest.common_utils.wav_utils as wav_utils + # See corresponding [TorchCodec test dependency mocking hack] note in # conftest.py + class AudioEncoder: def __init__(self, data, sample_rate): self.data = data diff --git a/tools/setup_helpers/extension.py b/tools/setup_helpers/extension.py index a440572a02..66f816aa43 100644 --- a/tools/setup_helpers/extension.py +++ b/tools/setup_helpers/extension.py @@ -135,8 +135,8 @@ def build_extension(self, ext): python_version = sys.version_info - cxx_compiler = os.environ.get('CXX', 'cl') - c_compiler = os.environ.get('CC', 'cl') + cxx_compiler = os.environ.get("CXX", "cl") + c_compiler = os.environ.get("CC", "cl") cmake_args += [ f"-DCMAKE_C_COMPILER={c_compiler}", From 36f5bed40369eabab0175e88cf055b24a5f5fa1d Mon Sep 17 00:00:00 2001 From: Pearu Peterson Date: Tue, 9 Sep 2025 11:19:13 +0300 Subject: [PATCH 2/2] Switch to python 3.11. Remove mypy section. --- .github/workflows/lint.yml | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 329c2b5f0c..986aeb9f08 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -19,7 +19,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.12'] + python-version: ['3.11'] steps: - name: Check out repo uses: actions/checkout@v3 @@ -41,36 +41,3 @@ jobs: - name: Check to see what files pre-commit modified run: | git diff - - mypy: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ['3.12'] - steps: - - name: Check out repo - uses: actions/checkout@v3 - - name: Setup conda env - uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - miniconda-version: "latest" - activate-environment: test - python-version: ${{ matrix.python-version }} - - name: Update pip - run: python -m pip install --upgrade pip - - name: Install dependencies and FFmpeg - run: | - python -m pip install --pre torch torchcodec --index-url https://download.pytorch.org/whl/nightly/cpu - conda install "ffmpeg=7.0.1" pkg-config pybind11 -c conda-forge - ffmpeg -version - - name: Build and install torchaudio - run: | - python -m pip install -e ".[dev]" --no-build-isolation -vvv - - name: Install mypy - run: | - python -m pip install mypy - - name: Run mypy - run: | - mypy --install-types --non-interactive --config-file mypy.ini