Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for manifest validation and fixing #1284

Merged
merged 1 commit into from
Feb 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions lhotse/bin/modes/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@
from lhotse import load_manifest, validate

data = load_manifest(manifest)
validate(data, read_data=read_data)
try:
validate(data, read_data=read_data)
except AssertionError as e:
click.echo(f"Validation failed: {e}")
return 1

Check warning on line 26 in lhotse/bin/modes/validate.py

View check run for this annotation

Codecov / codecov/patch

lhotse/bin/modes/validate.py#L22-L26

Added lines #L22 - L26 were not covered by tests


@cli.command(name="validate-pair")
Expand All @@ -40,9 +44,13 @@

recs = load_manifest(recordings)
sups = load_manifest(supervisions)
validate_recordings_and_supervisions(
recordings=recs, supervisions=sups, read_data=read_data
)
try:
validate_recordings_and_supervisions(

Check warning on line 48 in lhotse/bin/modes/validate.py

View check run for this annotation

Codecov / codecov/patch

lhotse/bin/modes/validate.py#L47-L48

Added lines #L47 - L48 were not covered by tests
recordings=recs, supervisions=sups, read_data=read_data
)
except AssertionError as e:
click.echo(f"Validation failed: {e}")
return 1

Check warning on line 53 in lhotse/bin/modes/validate.py

View check run for this annotation

Codecov / codecov/patch

lhotse/bin/modes/validate.py#L51-L53

Added lines #L51 - L53 were not covered by tests


@cli.command(name="fix")
Expand Down
40 changes: 27 additions & 13 deletions lhotse/qa.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from collections import defaultdict
from collections import Counter, defaultdict
from math import isclose
from typing import Any, Callable, Dict, Iterable, Optional, Tuple, Union

Expand Down Expand Up @@ -88,20 +88,22 @@
These items will be discarded by default when creating a CutSet.
"""
if isinstance(recordings, Recording):
recordings = RecordingSet.from_recordings([recordings])
recordings = RecordingSet([recordings])

Check warning on line 91 in lhotse/qa.py

View check run for this annotation

Codecov / codecov/patch

lhotse/qa.py#L91

Added line #L91 was not covered by tests
if isinstance(supervisions, SupervisionSegment):
supervisions = SupervisionSet.from_segments([supervisions])
supervisions = SupervisionSet([supervisions])

Check warning on line 93 in lhotse/qa.py

View check run for this annotation

Codecov / codecov/patch

lhotse/qa.py#L93

Added line #L93 was not covered by tests

if recordings.is_lazy:
recordings = RecordingSet.from_recordings(iter(recordings))
if supervisions.is_lazy:
supervisions = SupervisionSet.from_segments(iter(supervisions))
recordings = recordings.to_eager()
supervisions = supervisions.to_eager()

Check warning on line 96 in lhotse/qa.py

View check run for this annotation

Codecov / codecov/patch

lhotse/qa.py#L95-L96

Added lines #L95 - L96 were not covered by tests

validate(recordings, read_data=read_data)
validate(supervisions)
# Errors
id2rec = {r.id: r for r in recordings}

Check warning on line 101 in lhotse/qa.py

View check run for this annotation

Codecov / codecov/patch

lhotse/qa.py#L101

Added line #L101 was not covered by tests
for s in supervisions:
r = recordings[s.recording_id]
r = id2rec.get(s.recording_id)
assert (

Check warning on line 104 in lhotse/qa.py

View check run for this annotation

Codecov / codecov/patch

lhotse/qa.py#L103-L104

Added lines #L103 - L104 were not covered by tests
r is not None
), f"Supervision {s.id} references non-existent recording {s.recording_id}"
assert -1e-3 <= s.start <= s.end <= r.duration + 1e-3, (
f"Supervision {s.id}: exceeded the bounds of its corresponding recording "
f"(supervision spans [{s.start}, {s.end}]; recording spans [0, {r.duration}])"
Expand All @@ -111,7 +113,7 @@
f"(recording channels: {r.channel_ids})"
)
# Warnings
recording_ids = frozenset(r.id for r in recordings)
recording_ids = id2rec.keys()

Check warning on line 116 in lhotse/qa.py

View check run for this annotation

Codecov / codecov/patch

lhotse/qa.py#L116

Added line #L116 was not covered by tests
recording_ids_in_sups = frozenset(s.recording_id for s in supervisions)
only_in_recordings = recording_ids - recording_ids_in_sups
if only_in_recordings:
Expand Down Expand Up @@ -172,15 +174,14 @@
not exceeding the duration of their corresponding :class:`~lhotse.audio.Recording`.
"""
if isinstance(recordings, Recording):
recordings = RecordingSet.from_recordings([recordings])
if recordings.is_lazy:
recordings = RecordingSet.from_recordings(iter(recordings))
recordings = RecordingSet([recordings])

Check warning on line 177 in lhotse/qa.py

View check run for this annotation

Codecov / codecov/patch

lhotse/qa.py#L177

Added line #L177 was not covered by tests

id2rec = {r.id: r for r in recordings}

Check warning on line 179 in lhotse/qa.py

View check run for this annotation

Codecov / codecov/patch

lhotse/qa.py#L179

Added line #L179 was not covered by tests
sups = []
removed = 0
trimmed = 0
for s in supervisions:
end = recordings[s.recording_id].duration
end = id2rec[s.recording_id].duration

Check warning on line 184 in lhotse/qa.py

View check run for this annotation

Codecov / codecov/patch

lhotse/qa.py#L184

Added line #L184 was not covered by tests
if s.start > end:
removed += 1
continue
Expand Down Expand Up @@ -438,20 +439,30 @@
@register_validator
def validate_recording_set(recordings: RecordingSet, read_data: bool = False) -> None:
rates = set()
ids = Counter()

Check warning on line 442 in lhotse/qa.py

View check run for this annotation

Codecov / codecov/patch

lhotse/qa.py#L442

Added line #L442 was not covered by tests
for r in recordings:
validate_recording(r, read_data=read_data)
rates.add(r.sampling_rate)
ids[r.id] += 1

Check warning on line 446 in lhotse/qa.py

View check run for this annotation

Codecov / codecov/patch

lhotse/qa.py#L446

Added line #L446 was not covered by tests
if len(rates) > 1:
logging.warning(
f"RecordingSet contains recordings with different sampling rates ({rates}). "
f"Make sure that this was intended."
)
assert (

Check warning on line 452 in lhotse/qa.py

View check run for this annotation

Codecov / codecov/patch

lhotse/qa.py#L452

Added line #L452 was not covered by tests
ids.most_common(1)[0][1] <= 1
), "RecordingSet has recordings with duplicated IDs."


@register_validator
def validate_supervision_set(supervisions: SupervisionSet, **kwargs) -> None:
ids = Counter()

Check warning on line 459 in lhotse/qa.py

View check run for this annotation

Codecov / codecov/patch

lhotse/qa.py#L459

Added line #L459 was not covered by tests
for s in supervisions:
validate_supervision(s)
ids[s.id] += 1
assert (

Check warning on line 463 in lhotse/qa.py

View check run for this annotation

Codecov / codecov/patch

lhotse/qa.py#L462-L463

Added lines #L462 - L463 were not covered by tests
ids.most_common(1)[0][1] <= 1
), "SupervisionSet has supervisions with duplicated IDs."

# Catch errors in data preparation:
# - more than one supervision for a given recording starts at 0 (in a given channel)
Expand Down Expand Up @@ -494,5 +505,8 @@

@register_validator
def validate_cut_set(cuts: CutSet, read_data: bool = False) -> None:
ids = Counter()
for c in cuts:
validate_cut(c, read_data=read_data)
ids[c.id] += 1
assert ids.most_common(1)[0][1] <= 1, "CutSet has cuts with duplicated IDs."
Loading