From 69ab31dbd67f4aafa4c87cad43b98eeaf6efbba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20=C5=BBelasko?= Date: Tue, 23 Jan 2024 09:13:54 -0500 Subject: [PATCH] Fix duplication issues in CutSet.mix() (#1268) --- lhotse/cut/set.py | 1 + test/cut/test_cut_mixing.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/lhotse/cut/set.py b/lhotse/cut/set.py index 09789f67f..267ee16e8 100644 --- a/lhotse/cut/set.py +++ b/lhotse/cut/set.py @@ -3391,6 +3391,7 @@ def __iter__(self): # or pass it through unchanged. if rng.uniform(0.0, 1.0) > self.mix_prob: yield cut + continue to_mix = next(mix_in_cuts) # Determine the SNR - either it's specified or we need to sample one. cut_snr = ( diff --git a/test/cut/test_cut_mixing.py b/test/cut/test_cut_mixing.py index 89ffa4928..f1f31b04d 100644 --- a/test/cut/test_cut_mixing.py +++ b/test/cut/test_cut_mixing.py @@ -474,3 +474,20 @@ def test_cut_set_mix_is_lazy(): mixed = cuts.mix(cuts, snr=10, mix_prob=1.0, seed=0) assert mixed.is_lazy + + +def test_cut_set_mix_size_is_not_growing(): + cuts = DummyManifest(CutSet, begin_id=0, end_id=100) + noise_cuts = DummyManifest(CutSet, begin_id=10, end_id=20) + + mixed_cuts = cuts.mix( + cuts=noise_cuts, + duration=None, + snr=10, + mix_prob=0.1, + preserve_id=None, + seed=42, + random_mix_offset=True, + ).to_eager() + + assert len(mixed_cuts) == len(cuts)