Fixing some multichannel bugs when switching to in-memory ops. (#118)

* Fixing some multichannel bugs. * Updating the test case, sidestepping multi-channel regression data for now. * Pinning pyristent to support Python 2.7, 3.4. * Pinning jsonschema instead. * Pinning pyristent to a working version, hopefully. * pyristent -> pyrsistent *facepalm* * Fix for Issue #113, trimming with isolated events. (#115) Fixes a bug that happens when you trim an event, then generate from the trimmed JAMS file, with saving of isolated sources enabled. * Updating test for trimming soundscapes to take into account saving isolated sources * Pushing the actual fix to generate_from_jams. * Updating changelog and bumping version * Using TemporaryDirectory in the test case now, rather than relying on the whims of tempfile * Raising atol in a test from 1e-5 to 1e-4. * Adding subtype to soundfile.write to get rid of precision issues. Co-authored-by: pseeth <prem@descript.com>
justinsalamon · Sep 18, 2020 · 34e9831 · 34e9831
1 parent 65f3212
commit 34e9831
Show file tree

Hide file tree

Showing 5 changed files with 142 additions and 68 deletions.
diff --git a/docs/changes.rst b/docs/changes.rst
@@ -2,6 +2,10 @@
 
 Changelog
 ---------
+v1.3.9
+~~~~~~
+- Fixed a bug where trim before generating soundscapes from a JAMS file with saving of isolated events resulted in incorrect soundscape audio.
+
 v1.3.8
 ~~~~~~
 - Fixed a bug where _sample_trunc_norm returned an array in Scipy 1.5.1, but returns a scalar in Scipy 1.4.0.

diff --git a/scaper/core.py b/scaper/core.py
@@ -194,7 +194,7 @@ def generate_from_jams(jams_infile, audio_outfile, fg_path=None, bg_path=None,
                     tfm.trim(sliceop['slice_start'], sliceop['slice_end'])
                     tfm.build(audio_file, tmpfiles[-1].name)
                     # Copy result back to original file
-                    shutil.copyfile(tmpfiles[-1].name, audio_outfile)
+                    shutil.copyfile(tmpfiles[-1].name, audio_file)
 
     # Optionally save new jams file
     if jams_outfile is not None:
@@ -1748,9 +1748,13 @@ def _generate_audio(self, audio_path, ann, reverb=None,
                             input_array=event_audio,
                             sample_rate_in=event_sr
                         )
+                        event_audio = event_audio.reshape(-1, self.n_channels)
                         # Write event_audio_array to disk so we can compute LUFS using ffmpeg
                         soundfile.write(
-                            tmpfiles_internal[-1].name, event_audio.T, self.sr)
+                            tmpfiles_internal[-1].name, 
+                            event_audio,
+                            self.sr
+                        )
                         # NOW compute LUFS
                         bg_lufs = get_integrated_lufs(
                             tmpfiles_internal[-1].name)
@@ -1806,8 +1810,13 @@ def _generate_audio(self, audio_path, ann, reverb=None,
                             input_array=event_audio,
                             sample_rate_in=event_sr
                         )
+                        event_audio = event_audio.reshape(-1, self.n_channels)
+
                         soundfile.write(
-                            tmpfiles_internal[-1].name, event_audio.T, self.sr)
+                            tmpfiles_internal[-1].name, 
+                            event_audio,
+                            self.sr
+                        )
                         # NOW compute LUFS
                         fg_lufs = get_integrated_lufs(
                             tmpfiles_internal[-1].name)
@@ -1821,8 +1830,8 @@ def _generate_audio(self, audio_path, ann, reverb=None,
                         # (avoid unnatural sound onsets/offsets)
                         fade_in_samples =  int(self.fade_in_len * self.sr)
                         fade_out_samples = int(self.fade_out_len * self.sr)
-                        fade_in_window = np.sin(np.linspace(0, np.pi / 2, fade_in_samples))
-                        fade_out_window = np.sin(np.linspace(np.pi / 2, 0, fade_out_samples))
+                        fade_in_window = np.sin(np.linspace(0, np.pi / 2, fade_in_samples))[..., None]
+                        fade_out_window = np.sin(np.linspace(np.pi / 2, 0, fade_out_samples))[..., None]
 
                         event_audio[:fade_in_samples] *= fade_in_window
                         event_audio[-fade_out_samples:] *= fade_out_window
@@ -1831,8 +1840,8 @@ def _generate_audio(self, audio_path, ann, reverb=None,
                         # soundscape duration
                         prepad = int(self.sr * e.value['event_time'])
                         postpad = max(0, duration_in_samples - (event_audio.shape[0] + prepad))
-                        event_audio = np.pad(event_audio, ((prepad, postpad)), mode='constant',
-                            constant_values=(0, 0))
+                        event_audio = np.pad(event_audio, ((prepad, postpad), (0, 0)), 
+                            mode='constant', constant_values=(0, 0))
                         event_audio = event_audio[:duration_in_samples]
 
                         event_audio_list.append(event_audio[:duration_in_samples])
@@ -1860,7 +1869,7 @@ def _generate_audio(self, audio_path, ann, reverb=None,
                         # os.makedirs(..., exist_ok=True) but we test back to
                         # Python 2.7.
                         os.makedirs(event_folder)
-                    soundfile.write(event_audio_path, event_audio_list[-1].T, self.sr)
+                    soundfile.write(event_audio_path, event_audio_list[-1], self.sr, subtype='PCM_32')
                     isolated_events_audio_path.append(event_audio_path)
 
                     #TODO what do we do in this case? for now throw a warning
@@ -1883,13 +1892,13 @@ def _generate_audio(self, audio_path, ann, reverb=None,
                 if reverb is not None:
                     tfm.reverb(reverberance=reverb * 100)
                 # TODO: do we want to normalize the final output?
-
                 soundscape_audio = sum(event_audio_list)
                 soundscape_audio = tfm.build_array(
                     input_array=soundscape_audio,
                     sample_rate_in=self.sr,
                 )
-                soundfile.write(audio_path, soundscape_audio, self.sr)
+                soundscape_audio = soundscape_audio.reshape(-1, self.n_channels)
+                soundfile.write(audio_path, soundscape_audio, self.sr, subtype='PCM_32')
 
         ann.sandbox.scaper.soundscape_audio_path = audio_path
         ann.sandbox.scaper.isolated_events_audio_path = isolated_events_audio_path

diff --git a/scaper/version.py b/scaper/version.py
@@ -3,4 +3,4 @@
 """Version info"""
 
 short_version = '1.3'
-version = '1.3.8'
+version = '1.3.9'
diff --git a/setup.py b/setup.py
@@ -37,9 +37,10 @@
         ],
     install_requires=[
         'sox==1.4.0b0',
+        'pyrsistent==0.15.4',
         'jams>=0.3.2',
         'numpy>=1.13.3',
-        'soundfile'
+        'soundfile',
     ],
     extras_require={
         'docs': [

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -165,7 +165,7 @@ def test_generate_from_jams(atol=1e-5, rtol=1e-8):
         pytest.raises(ScaperError, scaper.generate_from_jams, jam_file.name,
                       gen_file.name)
 
-    # Test for valid jams files
+    # Test for valid jams file
     tmpfiles = []
     with _close_temp_files(tmpfiles):
 
@@ -199,63 +199,113 @@ def test_generate_from_jams(atol=1e-5, rtol=1e-8):
                          pitch_shift=('uniform', -1, 1),
                          time_stretch=('uniform', 0.8, 1.2))
 
-        # generate, then generate from the jams and compare audio files
-        # repeat 5 times
-        for _ in range(5):
-            sc.generate(orig_wav_file.name, orig_jam_file.name,
-                        disable_instantiation_warnings=True)
-            scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name)
 
+        def _validate_soundscape_and_event_audio(orig_wav_file, gen_wav_file, 
+                                                 gen_events_path, orig_events_path):
             # validate audio
             orig_wav, sr = soundfile.read(orig_wav_file.name)
             gen_wav, sr = soundfile.read(gen_wav_file.name)
             assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol)
 
-        # Now add in trimming!
+            # validate that the sum of event audio sums to trimmed soundscape
+            gen_event_files = [
+                os.path.join(gen_events_path, x)
+                for x in sorted(os.listdir(gen_events_path))
+            ]
+            gen_audio = [soundfile.read(x)[0] for x in gen_event_files]
+
+            # Trim does not currently support trimming isolated events, but if/when
+            # we add that functionality, this test should be updated to test that
+            # as well, using the files in orig_events_path (currently unused).
+            # atol = 1e-4, to match test_generate_isolated_events
+            assert np.allclose(gen_wav, sum(gen_audio), atol=1e-8, rtol=rtol)
+
+        # generate, then generate from the jams and compare audio files
+        # repeat 5 times
         for _ in range(5):
             sc.generate(orig_wav_file.name, orig_jam_file.name,
                         disable_instantiation_warnings=True)
-            scaper.trim(orig_wav_file.name, orig_jam_file.name,
-                        orig_wav_file.name, orig_jam_file.name,
-                        np.random.uniform(0, 5), np.random.uniform(5, 10))
             scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name)
 
             # validate audio
             orig_wav, sr = soundfile.read(orig_wav_file.name)
             gen_wav, sr = soundfile.read(gen_wav_file.name)
             assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol)
 
+        # Now add in trimming!
+        for _ in range(5):
+            with backports.tempfile.TemporaryDirectory() as isolated_events_path:
+                orig_events_path = os.path.join(isolated_events_path, 'original')
+                gen_events_path = os.path.join(isolated_events_path, 'generated')
+                os.makedirs(orig_events_path)
+                os.makedirs(gen_events_path)
+
+                sc.generate(orig_wav_file.name, orig_jam_file.name,
+                            disable_instantiation_warnings=True,
+                            save_isolated_events=True, 
+                            isolated_events_path=orig_events_path)
+                scaper.trim(orig_wav_file.name, orig_jam_file.name,
+                            orig_wav_file.name, orig_jam_file.name,
+                            np.random.uniform(0, 5), np.random.uniform(5, 10))
+                scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name,
+                                          save_isolated_events=True, 
+                                          isolated_events_path=gen_events_path)
+
+                _validate_soundscape_and_event_audio(orig_wav_file, gen_wav_file, 
+                    gen_events_path, orig_events_path)
+
         # Double trimming
         for _ in range(2):
-            sc.generate(orig_wav_file.name, orig_jam_file.name,
-                        disable_instantiation_warnings=True)
-            scaper.trim(orig_wav_file.name, orig_jam_file.name,
-                        orig_wav_file.name, orig_jam_file.name,
-                        np.random.uniform(0, 2), np.random.uniform(8, 10))
-            scaper.trim(orig_wav_file.name, orig_jam_file.name,
-                        orig_wav_file.name, orig_jam_file.name,
-                        np.random.uniform(0, 2), np.random.uniform(4, 6))
-            scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name)
+            with backports.tempfile.TemporaryDirectory() as isolated_events_path:
+                orig_events_path = os.path.join(isolated_events_path, 'original')
+                gen_events_path = os.path.join(isolated_events_path, 'generated')
+                os.makedirs(orig_events_path)
+                os.makedirs(gen_events_path)
+
+                sc.generate(orig_wav_file.name, orig_jam_file.name,
+                            disable_instantiation_warnings=True,
+                            save_isolated_events=True, 
+                            isolated_events_path=orig_events_path)
+                scaper.trim(orig_wav_file.name, orig_jam_file.name,
+                            orig_wav_file.name, orig_jam_file.name,
+                            np.random.uniform(0, 2), np.random.uniform(8, 10))
+                scaper.trim(orig_wav_file.name, orig_jam_file.name,
+                            orig_wav_file.name, orig_jam_file.name,
+                            np.random.uniform(0, 2), np.random.uniform(4, 6))
+                scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name,
+                                          save_isolated_events=True, 
+                                          isolated_events_path=gen_events_path)
+
+                _validate_soundscape_and_event_audio(orig_wav_file, gen_wav_file, 
+                    gen_events_path, orig_events_path)
 
         # Triple trimming
         for _ in range(2):
-            sc.generate(orig_wav_file.name, orig_jam_file.name,
-                        disable_instantiation_warnings=True)
-            scaper.trim(orig_wav_file.name, orig_jam_file.name,
-                        orig_wav_file.name, orig_jam_file.name,
-                        np.random.uniform(0, 2), np.random.uniform(8, 10))
-            scaper.trim(orig_wav_file.name, orig_jam_file.name,
-                        orig_wav_file.name, orig_jam_file.name,
-                        np.random.uniform(0, 1), np.random.uniform(5, 6))
-            scaper.trim(orig_wav_file.name, orig_jam_file.name,
-                        orig_wav_file.name, orig_jam_file.name,
-                        np.random.uniform(0, 1), np.random.uniform(3, 4))
-            scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name)
-
-            # validate audio
-            orig_wav, sr = soundfile.read(orig_wav_file.name)
-            gen_wav, sr = soundfile.read(gen_wav_file.name)
-            assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol)
+            with backports.tempfile.TemporaryDirectory() as isolated_events_path:
+                orig_events_path = os.path.join(isolated_events_path, 'original')
+                gen_events_path = os.path.join(isolated_events_path, 'generated')
+                os.makedirs(orig_events_path)
+                os.makedirs(gen_events_path)
+
+                sc.generate(orig_wav_file.name, orig_jam_file.name,
+                            disable_instantiation_warnings=True,
+                            save_isolated_events=True, 
+                            isolated_events_path=orig_events_path)
+                scaper.trim(orig_wav_file.name, orig_jam_file.name,
+                            orig_wav_file.name, orig_jam_file.name,
+                            np.random.uniform(0, 2), np.random.uniform(8, 10))
+                scaper.trim(orig_wav_file.name, orig_jam_file.name,
+                            orig_wav_file.name, orig_jam_file.name,
+                            np.random.uniform(0, 1), np.random.uniform(5, 6))
+                scaper.trim(orig_wav_file.name, orig_jam_file.name,
+                            orig_wav_file.name, orig_jam_file.name,
+                            np.random.uniform(0, 1), np.random.uniform(3, 4))
+                scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name,
+                                          save_isolated_events=True, 
+                                          isolated_events_path=gen_events_path)
+
+                _validate_soundscape_and_event_audio(orig_wav_file, gen_wav_file, 
+                    gen_events_path, orig_events_path)
 
         # Test with new FG and BG paths
         for _ in range(5):
@@ -1268,21 +1318,23 @@ def _create_scaper_with_random_seed(seed):
 
 def test_generate_audio():
     for sr in SAMPLE_RATES:
-        REG_WAV_PATH = TEST_PATHS[sr]['REG'].wav
-        REG_BGONLY_WAV_PATH = TEST_PATHS[sr]['REG_BGONLY'].wav
-        REG_REVERB_WAV_PATH = TEST_PATHS[sr]['REG_REVERB'].wav
-        _test_generate_audio(sr, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_PATH)
+        for n_ch in range(1, 3):
+            REG_WAV_PATH = TEST_PATHS[sr]['REG'].wav
+            REG_BGONLY_WAV_PATH = TEST_PATHS[sr]['REG_BGONLY'].wav
+            REG_REVERB_WAV_PATH = TEST_PATHS[sr]['REG_REVERB'].wav
+            _test_generate_audio(sr, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_PATH, n_ch)
 
 
-def _test_generate_audio(SR, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_PATH, atol=1e-4, rtol=1e-8):
+def _test_generate_audio(SR, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_PATH, N_CHANNELS, atol=1e-4, rtol=1e-8):
     # Regression test: same spec, same audio (not this will fail if we update
     # any of the audio processing techniques used (e.g. change time stretching
     # algorithm.
     sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH)
     sc.ref_db = -50
     sc.sr = SR
+    sc.n_channels = N_CHANNELS
 
-    print("TEST SR: {}".format(SR))
+    print("TEST SR: {}, # OF CHANNELS: {}".format(SR, N_CHANNELS))
 
     # background
     sc.add_background(
@@ -1340,24 +1392,30 @@ def _test_generate_audio(SR, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_P
         sc._generate_audio(wav_file.name, jam.annotations[0])
 
         # validate audio
-        wav, sr = soundfile.read(wav_file.name)
-        regwav, sr = soundfile.read(REG_WAV_PATH)
-        assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
+        wav, sr = soundfile.read(wav_file.name, always_2d=True)
+        regwav, sr = soundfile.read(REG_WAV_PATH, always_2d=True)
+        # TODO: Add multi-channel regression data.
+        if N_CHANNELS == 1:
+            assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
 
         # with reverb
         sc._generate_audio(wav_file.name, jam.annotations[0], reverb=0.2)
         # validate audio
-        wav, sr = soundfile.read(wav_file.name)
-        regwav, sr = soundfile.read(REG_REVERB_WAV_PATH)
-        assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
+        wav, sr = soundfile.read(wav_file.name, always_2d=True)
+        regwav, sr = soundfile.read(REG_REVERB_WAV_PATH, always_2d=True)
+        # TODO: Add multi-channel regression data.
+        if N_CHANNELS == 1:
+            assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
 
         # Don't disable sox warnings (just to cover line)
         sc._generate_audio(wav_file.name, jam.annotations[0],
                            disable_sox_warnings=False)
         # validate audio
-        wav, sr = soundfile.read(wav_file.name)
-        regwav, sr = soundfile.read(REG_WAV_PATH)
-        assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
+        wav, sr = soundfile.read(wav_file.name, always_2d=True)
+        regwav, sr = soundfile.read(REG_WAV_PATH, always_2d=True)
+        # TODO: Add multi-channel regression data.
+        if N_CHANNELS == 1:
+            assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
 
         # namespace must be scaper
         jam.annotations[0].namespace = 'tag_open'
@@ -1393,9 +1451,11 @@ def _test_generate_audio(SR, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_P
         jam = sc._instantiate(disable_instantiation_warnings=True, reverb=reverb)
         sc._generate_audio(wav_file.name, jam.annotations[0], reverb=reverb)
         # validate audio
-        wav, sr = soundfile.read(wav_file.name)
-        regwav, sr = soundfile.read(REG_BGONLY_WAV_PATH)
-        assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
+        wav, sr = soundfile.read(wav_file.name, always_2d=True)
+        regwav, sr = soundfile.read(REG_BGONLY_WAV_PATH, always_2d=True)
+        # TODO: Add multi-channel regression data.
+        if N_CHANNELS == 1:
+            assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
 
 
 def create_scaper_scene_without_random_seed():
@@ -1518,7 +1578,7 @@ def _delete_files(mix_file, directory):
             isolated_audio.append(_isolated_sandbox_audio)
 
         # the sum of the isolated audio should sum to the soundscape
-        assert np.allclose(sum(isolated_audio), soundscape_audio, atol=1e-4, rtol=1e-8)
+        assert np.allclose(sum(isolated_audio), soundscape_audio, atol=1e-8, rtol=1e-8)
 
         jam = sc._instantiate(disable_instantiation_warnings=True)