Skip to content

Commit

Permalink
Add VAD to Supervisions in LibriLight Recipe (#1280)
Browse files Browse the repository at this point in the history
* Add vad info to supervisions

* Use add_duration

* Update librilight.py

---------

Co-authored-by: yifanyeung <yifanyeung@yifanyeung.local>
  • Loading branch information
yfyeung and yifanyeung committed Feb 3, 2024
1 parent 455b20e commit 00abc09
Showing 1 changed file with 30 additions and 14 deletions.
44 changes: 30 additions & 14 deletions lhotse/recipes/librilight.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
This data is very huge - please download manually at LIBRILIGHT_URL.
"""

import json
import logging
import os
from collections import defaultdict
Expand All @@ -26,7 +27,7 @@
from lhotse.qa import fix_manifests, validate_recordings_and_supervisions
from lhotse.recipes.utils import manifests_exist
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike
from lhotse.utils import Pathlike, add_durations

LIBRILIGHT = ("small", "medium", "large")

Expand All @@ -42,28 +43,43 @@ def _parse_utterance(
audio_path: Pathlike,
) -> Optional[Tuple[Recording, SupervisionSegment]]:
file_name = str(audio_path).replace(".flac", "").replace(str(corpus_dir) + "/", "")
speaker = str(audio_path).split("/")[-3]
audio_path = audio_path.resolve()

if not audio_path.is_file():
logging.warning(f"No such file: {audio_path}")
return None

audio_info_path = str(audio_path).replace("flac", "json")
with open(audio_info_path) as f:
audio_infos = json.load(f)
speaker = audio_infos["speaker"]
vad_infos = audio_infos["voice_activity"]

recording = Recording.from_file(
path=audio_path,
recording_id=file_name,
)
segment = SupervisionSegment(
id=file_name,
recording_id=file_name,
start=0.0,
duration=recording.duration,
channel=0,
language="English",
speaker=speaker,
)

return recording, segment
segments = []
segment_seq = 0
sampling_rate = 16000
for vad_info in vad_infos:
segments.append(
SupervisionSegment(
id=file_name + "_" + str(segment_seq),
recording_id=file_name,
start=vad_info[0],
duration=add_durations(
vad_info[1], -vad_info[0], sampling_rate=sampling_rate
),
channel=0,
language="English",
speaker=speaker,
)
)
segment_seq += 1

return recording, segments


def _prepare_subset(
Expand Down Expand Up @@ -92,9 +108,9 @@ def _prepare_subset(
result = future.result()
if result is None:
continue
recording, segment = result
recording, segments = result
recordings.append(recording)
supervisions.append(segment)
supervisions.extend(segments)

recording_set = RecordingSet.from_recordings(recordings)
supervision_set = SupervisionSet.from_segments(supervisions)
Expand Down

0 comments on commit 00abc09

Please sign in to comment.