Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for #1152 #1153 and #1154 #1156

Merged
merged 10 commits into from
Sep 18, 2023
14 changes: 10 additions & 4 deletions lhotse/audio/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,7 @@ def read_opus_ffmpeg(
if duration is not None:
cmd += f" -t {duration}"
# Add the input specifier after offset and duration.
cmd += f" -i {path}"
cmd += f" -i '{path}'"
# Optionally resample the output.
if force_opus_sampling_rate is not None:
cmd += f" -ar {force_opus_sampling_rate}"
Expand Down Expand Up @@ -1028,22 +1028,28 @@ def read_audio(


def info(
path: Pathlike,
path: Union[Pathlike, BytesIO],
force_opus_sampling_rate: Optional[int] = None,
force_read_audio: bool = False,
) -> LibsndfileCompatibleAudioInfo:

is_path = isinstance(path, (Path, str))

if force_read_audio:
# This is a reliable fallback for situations when the user knows that audio files do not
# have duration metadata in their headers.
# We will use "audioread" backend that spawns an ffmpeg process, reads the audio,
# and computes the duration.
assert (
is_path
), f"info(obj, force_read_audio=True) is not supported for object of type: {type(path)}"
return audioread_info(str(path))

if path.suffix.lower() == ".opus":
if is_path and Path(path).suffix.lower() == ".opus":
# We handle OPUS as a special case because we might need to force a certain sampling rate.
return opus_info(path, force_opus_sampling_rate=force_opus_sampling_rate)

elif path.suffix.lower() == ".sph":
if is_path and Path(path).suffix.lower() == ".sph":
# We handle SPHERE as another special case because some old codecs (i.e. "shorten" codec)
# can't be handled by neither pysoundfile nor pyaudioread.
return sph_info(path)
Expand Down
36 changes: 30 additions & 6 deletions lhotse/bin/modes/manipulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,22 @@ def copy_feats_worker(
@click.option(
"--pad/--no-pad",
default=True,
help="Whether to pad the split output idx with zeros (e.g. 01, 02, .., 10).",
help="Whether to pad the split output idx with zeros (e.g. 00, 01, 02, .., 10).",
)
@click.option(
"-i",
"--start-idx",
type=int,
default=0,
help="Count splits starting from this index.",
)
def split(
num_splits: int, manifest: Pathlike, output_dir: Pathlike, shuffle: bool, pad: bool
num_splits: int,
manifest: Pathlike,
output_dir: Pathlike,
shuffle: bool,
pad: bool,
start_idx: int,
):
"""
Load MANIFEST, split it into NUM_SPLITS equal parts and save as separate manifests in OUTPUT_DIR.
Expand All @@ -161,8 +173,8 @@ def split(
parts = any_set.split(num_splits=num_splits, shuffle=shuffle)
output_dir.mkdir(parents=True, exist_ok=True)
num_digits = len(str(num_splits))
for idx, part in enumerate(parts):
idx = f"{idx + 1}".zfill(num_digits) if pad else str(idx + 1)
for idx, part in enumerate(parts, start=start_idx):
idx = f"{idx}".zfill(num_digits) if pad else str(idx)
part.to_file((output_dir / manifest.stem).with_suffix(f".{idx}{suffix}"))


Expand All @@ -172,7 +184,16 @@ def split(
)
@click.argument("output_dir", type=click.Path(allow_dash=True))
@click.argument("chunk_size", type=int)
def split_lazy(manifest: Pathlike, output_dir: Pathlike, chunk_size: int):
@click.option(
"-i",
"--start-idx",
type=int,
default=0,
help="Count splits starting from this index.",
)
def split_lazy(
manifest: Pathlike, output_dir: Pathlike, chunk_size: int, start_idx: int
):
"""
Load MANIFEST (lazily if in JSONL format) and split it into parts,
each with CHUNK_SIZE items.
Expand All @@ -187,7 +208,10 @@ def split_lazy(manifest: Pathlike, output_dir: Pathlike, chunk_size: int):
manifest = Path(manifest)
any_set = load_manifest_lazy_or_eager(manifest)
any_set.split_lazy(
output_dir=output_dir, chunk_size=chunk_size, prefix=manifest.stem
output_dir=output_dir,
chunk_size=chunk_size,
prefix=manifest.stem,
start_idx=start_idx,
)


Expand Down
13 changes: 11 additions & 2 deletions lhotse/cut/set.py
Original file line number Diff line number Diff line change
Expand Up @@ -984,7 +984,10 @@ def total_duration_(segments: List[TimeSpan]) -> float:
print(tabulate(speaker_stats, headers="firstrow", tablefmt="fancy_grid"))

def split(
self, num_splits: int, shuffle: bool = False, drop_last: bool = False
self,
num_splits: int,
shuffle: bool = False,
drop_last: bool = False,
) -> List["CutSet"]:
"""
Split the :class:`~lhotse.CutSet` into ``num_splits`` pieces of equal size.
Expand All @@ -1000,7 +1003,10 @@ def split(
return [
CutSet.from_cuts(subset)
for subset in split_sequence(
self, num_splits=num_splits, shuffle=shuffle, drop_last=drop_last
self,
num_splits=num_splits,
shuffle=shuffle,
drop_last=drop_last,
)
]

Expand All @@ -1010,6 +1016,7 @@ def split_lazy(
chunk_size: int,
prefix: str = "",
num_digits: int = 8,
start_idx: int = 0,
) -> List["CutSet"]:
"""
Splits a manifest (either lazily or eagerly opened) into chunks, each
Expand All @@ -1027,6 +1034,7 @@ def split_lazy(
:param chunk_size: the number of items in each chunk.
:param prefix: the prefix of each manifest.
:param num_digits: the width of ``split_idx``, which will be left padded with zeros to achieve it.
:param start_idx: The split index to start counting from (default is ``0``).
:return: a list of lazily opened chunk manifests.
"""
return split_manifest_lazy(
Expand All @@ -1035,6 +1043,7 @@ def split_lazy(
chunk_size=chunk_size,
prefix=prefix,
num_digits=num_digits,
start_idx=start_idx,
)

def subset(
Expand Down
4 changes: 3 additions & 1 deletion lhotse/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ def split_manifest_lazy(
chunk_size: int,
prefix: str = "",
num_digits: int = 8,
start_idx: int = 0,
) -> List:
"""
Splits a manifest (either lazily or eagerly opened) into chunks, each
Expand All @@ -297,6 +298,7 @@ def split_manifest_lazy(
:param chunk_size: the number of items in each chunk.
:param prefix: the prefix of each manifest.
:param num_digits: the width of ``split_idx``, which will be left padded with zeros to achieve it.
:param start_idx: The split index to start counting from (default is ``0``).
:return: a list of lazily opened chunk manifests.
"""
from lhotse.serialization import SequentialJsonlWriter
Expand All @@ -308,7 +310,7 @@ def split_manifest_lazy(
prefix = "split"

items = iter(it)
split_idx = 0
split_idx = start_idx
splits = []
while True:
try:
Expand Down
34 changes: 32 additions & 2 deletions test/audio/test_audio_reads.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from tempfile import NamedTemporaryFile
import shutil
from io import BytesIO
from pathlib import Path
from tempfile import NamedTemporaryFile, TemporaryDirectory

import numpy as np
import pytest
Expand All @@ -7,7 +10,12 @@

import lhotse
from lhotse import AudioSource, Recording
from lhotse.audio.backend import read_opus_ffmpeg, read_opus_torchaudio, torchaudio_load
from lhotse.audio.backend import (
info,
read_opus_ffmpeg,
read_opus_torchaudio,
torchaudio_load,
)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -78,6 +86,14 @@ def test_resample_opus():
r1.load_audio()


def test_opus_name_with_whitespaces():
with TemporaryDirectory() as d:
path_with_ws = Path(d) / "white space.opus"
shutil.copy("test/fixtures/mono_c0.opus", path_with_ws)
r = Recording.from_file(path_with_ws)
r.load_audio() # does not raise


@pytest.mark.parametrize(
"path",
[
Expand Down Expand Up @@ -223,3 +239,17 @@ def test_audio_loading_optimization_returns_expected_num_samples():
cut.duration = reduced_num_samples / cut.sampling_rate
audio = cut.load_audio()
assert audio.shape[1] == reduced_num_samples


def test_audio_info_from_bytes_io():
audio_filelike = BytesIO(open("test/fixtures/mono_c0.wav", "rb").read())

meta = info(audio_filelike)
assert meta.duration == 0.5
assert meta.frames == 4000
assert meta.samplerate == 8000
assert meta.channels == 1

with pytest.raises(AssertionError):
# force_read_audio won't work with a filelike object
assert info(audio_filelike, force_read_audio=True)
Loading