Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor bug fix for eval2000 recipe #1127

Merged
merged 16 commits into from
Oct 16, 2023
24 changes: 22 additions & 2 deletions lhotse/bin/modes/recipes/eval2000.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,28 @@
from lhotse.recipes import prepare_eval2000
from lhotse.utils import Pathlike

EVAL2000_TRANSCRIPT_DIR = "LDC2002T43"


@prepare.command(context_settings=dict(show_default=True))
@click.argument("corpus-dir", type=click.Path(exists=True, file_okay=False))
@click.argument("output-dir", type=click.Path())
@click.argument(
JinZr marked this conversation as resolved.
Show resolved Hide resolved
"transcript-dir",
type=click.Path(exists=True, file_okay=False),
default=None,
)
@click.option(
"--absolute-paths",
default=False,
help="Whether to return absolute or relative (to the corpus dir) paths for recordings.",
)
def eval2000(corpus_dir: Pathlike, output_dir: Pathlike, absolute_paths: bool):
def eval2000(
corpus_dir: Pathlike,
output_dir: Pathlike,
absolute_paths: bool,
transcript_dir: Pathlike,
):
"""
The Eval2000 corpus preparation.

Expand All @@ -25,6 +37,14 @@ def eval2000(corpus_dir: Pathlike, output_dir: Pathlike, absolute_paths: bool):

This data is not available for free - your institution needs to have an LDC subscription.
"""
transcript_dir = (
corpus_dir / EVAL2000_TRANSCRIPT_DIR / "reference" / "english"
if transcript_dir is None
else transcript_dir
)
prepare_eval2000(
corpus_dir=corpus_dir, output_dir=output_dir, absolute_paths=absolute_paths
corpus_dir=corpus_dir,
output_dir=output_dir,
absolute_paths=absolute_paths,
transcript_path=transcript_dir,
)
3 changes: 2 additions & 1 deletion lhotse/recipes/eval2000.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
def prepare_eval2000(
corpus_dir: Pathlike,
output_dir: Pathlike,
transcript_path: Pathlike,
JinZr marked this conversation as resolved.
Show resolved Hide resolved
absolute_paths: bool = False,
num_jobs: int = 1,
) -> Dict[str, Union[RecordingSet, SupervisionSet]]:
Expand All @@ -48,7 +49,7 @@ def prepare_eval2000(
assert (
audio_partition_dir_path.is_dir()
), f"No such directory:{audio_partition_dir_path}"
transcript_dir_path = corpus_dir / EVAL2000_TRANSCRIPT_DIR / "reference" / "english"
transcript_dir_path = transcript_path
JinZr marked this conversation as resolved.
Show resolved Hide resolved
assert transcript_dir_path.is_dir(), f"No such directory:{transcript_dir_path}"
groups = []
for path in (audio_partition_dir_path).rglob("*.sph"):
Expand Down
Loading