Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 39 additions & 39 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -1,43 +1,43 @@
# name: Lint
name: Lint

# on:
# pull_request:
# push:
# branches:
# - nightly
# - main
# - release/*
# workflow_dispatch:
on:
push:
branches: [ main ]
pull_request:

# jobs:
# python-source-and-configs:
# uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
# permissions:
# id-token: write
# contents: read
# with:
# repository: pytorch/audio
# script: |
# set -euo pipefail
concurrency:
group: unit-test${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
cancel-in-progress: true

# echo '::group::Setup environment'
# eval "$("$(which conda)" shell.bash hook)"
# # libcst does not have 3.11 pre-built binaries yet. Use python 3.10
# conda create -y --name env python=3.10
# conda activate env
# pip3 install --progress-bar=off pre-commit
# echo '::endgroup::'
defaults:
run:
shell: bash -l -eo pipefail {0}

# set +e
# pre-commit run --all-files --show-diff-on-failure
# status=$?

# echo '::group::Add Summry'
# if [ $status -ne 0 ]; then
# echo '### Lint failure' >> $GITHUB_STEP_SUMMARY
# echo '```diff' >> $GITHUB_STEP_SUMMARY
# git --no-pager diff >> $GITHUB_STEP_SUMMARY
# echo '```' >> $GITHUB_STEP_SUMMARY
# fi
# echo '::endgroup::'
# exit $status
jobs:
pre-commit-checks:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ['3.11']
steps:
- name: Check out repo
uses: actions/checkout@v3
- name: Setup conda env
uses: conda-incubator/setup-miniconda@v2
with:
auto-update-conda: true
miniconda-version: "latest"
activate-environment: test
python-version: ${{ matrix.python-version }}
- name: Update pip
run: python -m pip install --upgrade pip
- name: Install pre-commit
run: |
python -m pip install pre-commit
- name: Run pre-commit checks
run: |
pre-commit run --all-files
- name: Check to see what files pre-commit modified
run: |
git diff
1 change: 0 additions & 1 deletion cmake/LoadHIP.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -267,4 +267,3 @@ if(HIP_FOUND)
set(roctracer_INCLUDE_DIRS ${ROCTRACER_PATH}/include)
endif()
endif()

2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
nbsphinx_requirejs_path = ""

autodoc_member_order = "bysource"
autodoc_mock_imports = ['torchaudio.models.decoder']
autodoc_mock_imports = ["torchaudio.models.decoder"]

# katex options
#
Expand Down
1 change: 1 addition & 0 deletions examples/tutorials/audio_feature_augmentation_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
def _get_sample(path):
return torchaudio.load(path)


def get_speech_sample():
return _get_sample(SAMPLE_WAV_SPEECH_PATH)

Expand Down
3 changes: 1 addition & 2 deletions examples/tutorials/audio_resampling_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import math
import timeit

import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
from IPython.display import Audio
import numpy as np
Expand Down Expand Up @@ -427,7 +426,7 @@ def plot(data, cols, rows):
x_data = np.arange(len(rows))
bar_width = 0.8 / len(cols)
for (i, (c, d)) in enumerate(zip(cols, data.T)):
x_pos = x_data + (i - len(cols)/2 + 0.5) * bar_width
x_pos = x_data + (i - len(cols) / 2 + 0.5) * bar_width
ax.bar(x_pos, d, bar_width, label=c)
ax.legend()
ax.set_xticks(x_data)
Expand Down
1 change: 1 addition & 0 deletions examples/tutorials/mvdr_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def generate_mixture(waveform_clean, waveform_noise, target_snr):
waveform_noise *= 10 ** (-(target_snr - current_snr) / 20)
return waveform_clean + waveform_noise


# If you have mir_eval installed, you can use it to evaluate the separation quality of the estimated sources.
# You can also evaluate the intelligibility of the speech with the Short-Time Objective Intelligibility (STOI) metric
# available in the `pystoi` package, or the Perceptual Evaluation of Speech Quality (PESQ) metric available in the `pesq` package.
Expand Down
8 changes: 4 additions & 4 deletions examples/tutorials/squim_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,8 @@ def plot(waveform, title, sample_rate=16000):
# ```
# These values are precomputed and hard-coded below.
print(f"Reference metrics for distorted speech at {snr_dbs[0]}dB are\n")
print(f"STOI: 0.9670831113894452")
print(f"PESQ: 2.7961528301239014")
print("STOI: 0.9670831113894452")
print("PESQ: 2.7961528301239014")
si_sdr_ref = si_snr(WAVEFORM_DISTORTED[0:1], WAVEFORM_SPEECH)
print(f"SI-SDR: {si_sdr_ref}")

Expand All @@ -304,8 +304,8 @@ def plot(waveform, title, sample_rate=16000):
si_sdr_ref = si_snr(WAVEFORM_DISTORTED[1:2], WAVEFORM_SPEECH)
# STOI and PESQ metrics are precomputed and hardcoded below.
print(f"Reference metrics for distorted speech at {snr_dbs[1]}dB are\n")
print(f"STOI: 0.5743247866630554")
print(f"PESQ: 1.1112866401672363")
print("STOI: 0.5743247866630554")
print("PESQ: 1.1112866401672363")
print(f"SI-SDR: {si_sdr_ref}")


Expand Down
38 changes: 19 additions & 19 deletions src/libtorchaudio/forced_align/cpu/compute.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#include <torch/script.h>
#include <torch/torch.h>
#include <torch/csrc/inductor/aoti_torch/c/shim.h>
#include <torch/csrc/stable/library.h>
#include <torch/csrc/stable/tensor.h>
#include <torch/csrc/stable/ops.h>
#include <torch/csrc/inductor/aoti_torch/c/shim.h>
#include <torch/csrc/stable/tensor.h>
#include <torch/script.h>
#include <torch/torch.h>

using namespace std;

Expand Down Expand Up @@ -81,18 +81,21 @@ void forced_align_impl(
auto curIdxOffset = t % 2;
auto prevIdxOffset = (t - 1) % 2;
for (auto j = 0; j < S; ++j) {
alphas_a[curIdxOffset * S + j] = -std::numeric_limits<scalar_t>::infinity(); // alphas_a[curIdxOffset][j]
alphas_a[curIdxOffset * S + j] = -std::numeric_limits<
scalar_t>::infinity(); // alphas_a[curIdxOffset][j]
}
if (start == 0) {
alphas_a[curIdxOffset * S] =
alphas_a[prevIdxOffset * S] + logProbs_a[batchIndex][t][blank]; // alphas_a[curIdxOffset][0]
alphas_a[curIdxOffset * S] = alphas_a[prevIdxOffset * S] +
logProbs_a[batchIndex][t][blank]; // alphas_a[curIdxOffset][0]
backPtr_a[S * t] = 0; // backPtr_a[t][0] = 0
startloop += 1;
}

for (auto i = startloop; i < end; i++) {
auto x0 = alphas_a[prevIdxOffset * S + i]; // alphas_a[prevIdxOffset][i];
auto x1 = alphas_a[prevIdxOffset * S + i - 1]; // alphas_a[prevIdxOffset][i - 1];
auto x1 =
alphas_a[prevIdxOffset * S + i - 1]; // alphas_a[prevIdxOffset][i
// - 1];
auto x2 = -std::numeric_limits<scalar_t>::infinity();

auto labelIdx = (i % 2 == 0) ? blank : targets_a[batchIndex][i / 2];
Expand All @@ -103,7 +106,8 @@ void forced_align_impl(
// (i != 1) just ensures we don't access targets[i - 2] if its i < 2
if (i % 2 != 0 && i != 1 &&
targets_a[batchIndex][i / 2] != targets_a[batchIndex][i / 2 - 1]) {
x2 = alphas_a[prevIdxOffset * S + i - 2]; // alphas_a[prevIdxOffset][i - 2];
x2 = alphas_a[prevIdxOffset * S + i - 2]; // alphas_a[prevIdxOffset][i -
// 2];
}
scalar_t result = 0.0;
if (x2 > x1 && x2 > x0) {
Expand All @@ -116,12 +120,14 @@ void forced_align_impl(
result = x0;
backPtr_a[t * S + i] = 0; // backPtr_a[t][i] = 0
}
alphas_a[curIdxOffset * S + i] = result + logProbs_a[batchIndex][t][labelIdx]; // alphas_a[curIdxOffset][i]
alphas_a[curIdxOffset * S + i] = result +
logProbs_a[batchIndex][t][labelIdx]; // alphas_a[curIdxOffset][i]
}
}
auto idx1 = (T - 1) % 2;
auto ltrIdx = alphas_a[S * idx1 + S - 1] >
alphas_a[S * idx1 + S - 2] ? S - 1 : S - 2; // alphas_a[idx1][S - 1], alphas_a[idx1][S - 2]
auto ltrIdx = alphas_a[S * idx1 + S - 1] > alphas_a[S * idx1 + S - 2]
? S - 1
: S - 2; // alphas_a[idx1][S - 1], alphas_a[idx1][S - 2]
delete[] alphas_a;
// path stores the token index for each time step after force alignment.
for (auto t = T - 1; t > -1; t--) {
Expand Down Expand Up @@ -194,15 +200,9 @@ std::tuple<torch::Tensor, torch::Tensor> compute(
logProbs, targets, blank, paths);
}
});
return std::make_tuple(
paths,
logProbs
);
return std::make_tuple(paths, logProbs);
}




TORCH_LIBRARY_IMPL(torchaudio, CPU, m) {
m.impl("forced_align", &compute);
}
Expand Down
6 changes: 3 additions & 3 deletions src/libtorchaudio/lfilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ void lfilter_core_generic_loop(
auto coeff = a_coeff_flipped.unsqueeze(2);
for (int64_t i_sample = 0; i_sample < n_samples_input; i_sample++) {
auto windowed_output_signal =
torch::narrow(padded_output_waveform, 2, i_sample, i_sample + n_order).transpose(0, 1);
auto o0 =
torch::select(input_signal_windows, 2, i_sample) -
torch::narrow(padded_output_waveform, 2, i_sample, i_sample + n_order)
.transpose(0, 1);
auto o0 = torch::select(input_signal_windows, 2, i_sample) -
at::matmul(windowed_output_signal, coeff).squeeze(2).transpose(0, 1);
padded_output_waveform.index_put_(
{torch::indexing::Slice(),
Expand Down
28 changes: 12 additions & 16 deletions src/torchaudio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,13 @@
from torchaudio._internal.module_utils import dropping_io_support, dropping_class_io_support
from typing import Union, BinaryIO, Optional, Tuple
import os
from typing import BinaryIO, Optional, Tuple, Union

import torch
import sys

# Initialize extension and backend first
from . import _extension # noqa # usort: skip
from . import compliance, datasets, functional, models, pipelines, transforms, utils # noqa: F401
from ._torchcodec import load_with_torchcodec, save_with_torchcodec

from . import ( # noqa: F401
compliance,
datasets,
functional,
models,
pipelines,
transforms,
utils,
)


try:
from .version import __version__, git_version # noqa: F401
Expand Down Expand Up @@ -100,9 +90,10 @@ def load(
channels_first=channels_first,
format=format,
buffer_size=buffer_size,
backend=backend
backend=backend,
)


def save(
uri: Union[str, os.PathLike],
src: torch.Tensor,
Expand Down Expand Up @@ -182,14 +173,19 @@ def save(
- The output format is determined by the file extension in the uri.
- TorchCodec uses FFmpeg under the hood for encoding.
"""
return save_with_torchcodec(uri, src, sample_rate,
return save_with_torchcodec(
uri,
src,
sample_rate,
channels_first=channels_first,
format=format,
encoding=encoding,
bits_per_sample=bits_per_sample,
buffer_size=buffer_size,
backend=backend,
compression=compression)
compression=compression,
)


__all__ = [
"load",
Expand Down
3 changes: 1 addition & 2 deletions src/torchaudio/_extension/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from torchaudio._internal.module_utils import fail_with_message, is_module_available, no_op

from .utils import _check_cuda_version, _init_dll_path, _LazyImporter, _load_lib
from .utils import _check_cuda_version, _init_dll_path, _load_lib

_LG = logging.getLogger(__name__)

Expand Down Expand Up @@ -43,7 +43,6 @@
_IS_ALIGN_AVAILABLE = torchaudio.lib._torchaudio.is_align_available()



fail_if_no_rir = (
no_op
if _IS_RIR_AVAILABLE
Expand Down
2 changes: 0 additions & 2 deletions src/torchaudio/_extension/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@
They should not depend on external state.
Anything that depends on external state should happen in __init__.py
"""
import importlib
import logging
import os
import types
from pathlib import Path

import torch
from torchaudio._internal.module_utils import eval_env

_LG = logging.getLogger(__name__)
_LIB_DIR = Path(__file__).parent.parent / "lib"
Expand Down
Loading
Loading