Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

save sdm files into a single mdm file to do gss #1221

Merged
merged 3 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion lhotse/bin/modes/recipes/icmcasr.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
help="How many threads to use (can give good speed-ups with slow disks).",
)
@click.option(
"--mic", type=click.Choice(["ihm", "sdm"]), default="ihm", help="Microphone type."
"--mic",
type=click.Choice(["ihm", "sdm", "mdm"]),
default="ihm",
help="Microphone type.",
)
def icmcasr(
corpus_dir: Pathlike,
Expand Down
61 changes: 51 additions & 10 deletions lhotse/recipes/icmcasr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,19 @@

from tqdm.auto import tqdm

from lhotse.audio import Recording, RecordingSet
from lhotse.audio import AudioSource, Recording, RecordingSet
from lhotse.audio.backend import info
from lhotse.qa import fix_manifests, validate_recordings_and_supervisions
from lhotse.recipes.utils import manifests_exist, normalize_text_alimeeting
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, is_module_available

ICMCASR = ("train", "dev") # TODO: Support all subsets when released
POSITION = ("DA01", "DA02", "DA03", "DA04")
SDM_POSITION = ("DX01C01", "DX02C01", "DX03C01", "DX04C01", "DX05C01", "DX06C01")
# ignore "DX05C01", "DX06C01",
# which are 2-channel reference signals for AEC.
# see https://github.com/MrSupW/ICMC-ASR_Baseline/tree/main
SDM_POSITION = ("DX01C01", "DX02C01", "DX03C01", "DX04C01")
yuekaizhang marked this conversation as resolved.
Show resolved Hide resolved


def _parse_utterance(
Expand Down Expand Up @@ -60,18 +64,55 @@ def _parse_utterance(
+ f"-{position}"
for sdm_position in SDM_POSITION
]
elif mic == "mdm":
audio_paths = ["fake_audio_path_for_mdm"]
recording_ids = [
str(section_path / "DXmixC01")
.replace(str(corpus_dir) + "/", "")
.replace("/", "-")
+ f"-{position}"
]
else:
raise ValueError(f"Unsupported mic type: {mic}")

for audio_path, recording_id in zip(audio_paths, recording_ids):
if mic == "mdm":
channel_paths = [
(section_path / (position + ".wav")).resolve()
for position in SDM_POSITION
]
audio_info = info(
channel_paths[0],
force_opus_sampling_rate=None,
force_read_audio=False,
)
recordings.append(
Recording(
id=recording_id,
sources=[
AudioSource(
type="file",
channels=[idx],
source=str(audio_path),
)
for idx, audio_path in enumerate(channel_paths)
],
sampling_rate=16000,
num_samples=audio_info.frames,
duration=audio_info.duration,
)
)
# check if audio_path exists, if not, then skip
if not audio_path.is_file():
# give some warning
logging.warning(f"Audio file {audio_path} does not exist - skipping.")
continue
recordings.append(
Recording.from_file(path=audio_path, recording_id=recording_id)
)
else:
if not audio_path.is_file():
# give some warning
logging.warning(
f"Audio file {audio_path} does not exist - skipping."
)
continue
recordings.append(
Recording.from_file(path=audio_path, recording_id=recording_id)
)

tg = textgrid.TextGrid.fromFile(str(text_path))
assert len(tg.tiers) == 1, f"Expected 1 tier, found {len(tg.tiers)} tiers."
Expand All @@ -87,7 +128,7 @@ def _parse_utterance(
recording_id=recording_id,
start=start,
duration=round(end - start, 4),
channel=0,
channel=0 if mic in ["sdm", "ihm"] else list(range(4)),
language="Chinese",
speaker=speaker,
text=normalize_text_alimeeting(text),
Expand Down
Loading