From 9491bdbc035000bfcfad468c85f4953d3027c0ec Mon Sep 17 00:00:00 2001
From: root <zhangyuekai@foxmail.com>
Date: Fri, 24 Nov 2023 11:44:00 +0000
Subject: [PATCH 1/3] save sdm files into mdm file to do gss

---
 lhotse/bin/modes/recipes/icmcasr.py |  5 ++++-
 lhotse/recipes/icmcasr.py           | 22 ++++++++++++++++++++--
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/lhotse/bin/modes/recipes/icmcasr.py b/lhotse/bin/modes/recipes/icmcasr.py
index a7c137c20..a2b5594c6 100644
--- a/lhotse/bin/modes/recipes/icmcasr.py
+++ b/lhotse/bin/modes/recipes/icmcasr.py
@@ -18,7 +18,10 @@
     help="How many threads to use (can give good speed-ups with slow disks).",
 )
 @click.option(
-    "--mic", type=click.Choice(["ihm", "sdm"]), default="ihm", help="Microphone type."
+    "--mic",
+    type=click.Choice(["ihm", "sdm", "mdm"]),
+    default="ihm",
+    help="Microphone type.",
 )
 def icmcasr(
     corpus_dir: Pathlike,
diff --git a/lhotse/recipes/icmcasr.py b/lhotse/recipes/icmcasr.py
index 1f211e68b..0d1c70234 100644
--- a/lhotse/recipes/icmcasr.py
+++ b/lhotse/recipes/icmcasr.py
@@ -6,6 +6,7 @@
 
 import logging
 import os
+import subprocess
 from collections import defaultdict
 from concurrent.futures.thread import ThreadPoolExecutor
 from pathlib import Path
@@ -21,7 +22,8 @@
 
 ICMCASR = ("train", "dev")  # TODO: Support all subsets when released
 POSITION = ("DA01", "DA02", "DA03", "DA04")
-SDM_POSITION = ("DX01C01", "DX02C01", "DX03C01", "DX04C01", "DX05C01", "DX06C01")
+# ignore "DX05C01", "DX06C01"
+SDM_POSITION = ("DX01C01", "DX02C01", "DX03C01", "DX04C01")
 
 
 def _parse_utterance(
@@ -60,6 +62,22 @@ def _parse_utterance(
                 + f"-{position}"
                 for sdm_position in SDM_POSITION
             ]
+        elif mic == "mdm":
+            wav_path_stereo = section_path / "DXmixC01.wav"
+            if not wav_path_stereo.is_file():
+                audio_paths = [
+                    (section_path / (sdm_position + ".wav")).resolve()
+                    for sdm_position in SDM_POSITION
+                ]
+                cmd = f"sox -M -c 1 {audio_paths[0]} -c 1 {audio_paths[1]} -c 1 {audio_paths[2]} -c 1 {audio_paths[3]} {wav_path_stereo.resolve()}"
+                subprocess.run(cmd, shell=True, check=True)
+            audio_paths = [wav_path_stereo.resolve()]
+            recording_ids = [
+                str(section_path / "DXmixC01")
+                .replace(str(corpus_dir) + "/", "")
+                .replace("/", "-")
+                + f"-{position}"
+            ]
         else:
             raise ValueError(f"Unsupported mic type: {mic}")
 
@@ -87,7 +105,7 @@ def _parse_utterance(
                         recording_id=recording_id,
                         start=start,
                         duration=round(end - start, 4),
-                        channel=0,
+                        channel=0 if mic in ["sdm", "ihm"] else list(range(4)),
                         language="Chinese",
                         speaker=speaker,
                         text=normalize_text_alimeeting(text),

From 4eb2fdc0f952f808cb4a50e32a6e723cab75edf6 Mon Sep 17 00:00:00 2001
From: root <zhangyuekai@foxmail.com>
Date: Mon, 27 Nov 2023 06:16:50 +0000
Subject: [PATCH 2/3] remove mdm files save

---
 lhotse/recipes/icmcasr.py | 56 +++++++++++++++++++++++++++------------
 1 file changed, 39 insertions(+), 17 deletions(-)

diff --git a/lhotse/recipes/icmcasr.py b/lhotse/recipes/icmcasr.py
index 0d1c70234..adb89c97a 100644
--- a/lhotse/recipes/icmcasr.py
+++ b/lhotse/recipes/icmcasr.py
@@ -14,7 +14,8 @@
 
 from tqdm.auto import tqdm
 
-from lhotse.audio import Recording, RecordingSet
+from lhotse.audio import AudioSource, Recording, RecordingSet
+from lhotse.audio.backend import info
 from lhotse.qa import fix_manifests, validate_recordings_and_supervisions
 from lhotse.recipes.utils import manifests_exist, normalize_text_alimeeting
 from lhotse.supervision import SupervisionSegment, SupervisionSet
@@ -63,15 +64,7 @@ def _parse_utterance(
                 for sdm_position in SDM_POSITION
             ]
         elif mic == "mdm":
-            wav_path_stereo = section_path / "DXmixC01.wav"
-            if not wav_path_stereo.is_file():
-                audio_paths = [
-                    (section_path / (sdm_position + ".wav")).resolve()
-                    for sdm_position in SDM_POSITION
-                ]
-                cmd = f"sox -M -c 1 {audio_paths[0]} -c 1 {audio_paths[1]} -c 1 {audio_paths[2]} -c 1 {audio_paths[3]} {wav_path_stereo.resolve()}"
-                subprocess.run(cmd, shell=True, check=True)
-            audio_paths = [wav_path_stereo.resolve()]
+            audio_paths = ["fake_audio_path_for_mdm"]
             recording_ids = [
                 str(section_path / "DXmixC01")
                 .replace(str(corpus_dir) + "/", "")
@@ -82,14 +75,43 @@ def _parse_utterance(
             raise ValueError(f"Unsupported mic type: {mic}")
 
         for audio_path, recording_id in zip(audio_paths, recording_ids):
+            if mic == "mdm":
+                channel_paths = [
+                    (section_path / (position + ".wav")).resolve()
+                    for position in SDM_POSITION
+                ]
+                audio_info = info(
+                    channel_paths[0],
+                    force_opus_sampling_rate=None,
+                    force_read_audio=False,
+                )
+                recordings.append(
+                    Recording(
+                        id=recording_id,
+                        sources=[
+                            AudioSource(
+                                type="file",
+                                channels=[idx],
+                                source=str(audio_path),
+                            )
+                            for idx, audio_path in enumerate(channel_paths)
+                        ],
+                        sampling_rate=16000,
+                        num_samples=audio_info.frames,
+                        duration=audio_info.duration,
+                    )
+                )
             # check if audio_path exists, if not, then skip
-            if not audio_path.is_file():
-                # give some warning
-                logging.warning(f"Audio file {audio_path} does not exist - skipping.")
-                continue
-            recordings.append(
-                Recording.from_file(path=audio_path, recording_id=recording_id)
-            )
+            else:
+                if not audio_path.is_file():
+                    # give some warning
+                    logging.warning(
+                        f"Audio file {audio_path} does not exist - skipping."
+                    )
+                    continue
+                recordings.append(
+                    Recording.from_file(path=audio_path, recording_id=recording_id)
+                )
 
             tg = textgrid.TextGrid.fromFile(str(text_path))
             assert len(tg.tiers) == 1, f"Expected 1 tier, found {len(tg.tiers)} tiers."

From 39d48e86ff0a51c833bc47d54964c4ab0313d01f Mon Sep 17 00:00:00 2001
From: root <zhangyuekai@foxmail.com>
Date: Tue, 28 Nov 2023 12:00:53 +0000
Subject: [PATCH 3/3] add comments for ignored signals

---
 lhotse/recipes/icmcasr.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lhotse/recipes/icmcasr.py b/lhotse/recipes/icmcasr.py
index adb89c97a..20f25112f 100644
--- a/lhotse/recipes/icmcasr.py
+++ b/lhotse/recipes/icmcasr.py
@@ -6,7 +6,6 @@
 
 import logging
 import os
-import subprocess
 from collections import defaultdict
 from concurrent.futures.thread import ThreadPoolExecutor
 from pathlib import Path
@@ -23,7 +22,9 @@
 
 ICMCASR = ("train", "dev")  # TODO: Support all subsets when released
 POSITION = ("DA01", "DA02", "DA03", "DA04")
-# ignore "DX05C01", "DX06C01"
+# ignore "DX05C01", "DX06C01",
+# which are 2-channel reference signals for AEC.
+# see https://github.com/MrSupW/ICMC-ASR_Baseline/tree/main
 SDM_POSITION = ("DX01C01", "DX02C01", "DX03C01", "DX04C01")