Skip to content

Commit

Permalink
Fixing some multichannel bugs when switching to in-memory ops. (#118)
Browse files Browse the repository at this point in the history
* Fixing some multichannel bugs.

* Updating the test case, sidestepping multi-channel regression data for now.

* Pinning pyristent to support Python 2.7, 3.4.

* Pinning jsonschema instead.

* Pinning pyristent to a working version, hopefully.

* pyristent -> pyrsistent *facepalm*

* Fix for Issue #113, trimming with isolated events. (#115)

Fixes a bug that happens when you trim an event, then generate from the trimmed JAMS file, with saving of isolated sources enabled.

* Updating test for trimming soundscapes to take into account saving isolated sources

* Pushing the actual fix to generate_from_jams.

* Updating changelog and bumping version

* Using TemporaryDirectory in the test case now, rather than relying on the whims of tempfile

* Raising atol in a test from 1e-5 to 1e-4.

* Adding subtype to soundfile.write to get rid of precision issues.

Co-authored-by: pseeth <prem@descript.com>
  • Loading branch information
pseeth and pseeth committed Sep 18, 2020
1 parent 65f3212 commit 34e9831
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 68 deletions.
4 changes: 4 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

Changelog
---------
v1.3.9
~~~~~~
- Fixed a bug where trim before generating soundscapes from a JAMS file with saving of isolated events resulted in incorrect soundscape audio.

v1.3.8
~~~~~~
- Fixed a bug where _sample_trunc_norm returned an array in Scipy 1.5.1, but returns a scalar in Scipy 1.4.0.
Expand Down
29 changes: 19 additions & 10 deletions scaper/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def generate_from_jams(jams_infile, audio_outfile, fg_path=None, bg_path=None,
tfm.trim(sliceop['slice_start'], sliceop['slice_end'])
tfm.build(audio_file, tmpfiles[-1].name)
# Copy result back to original file
shutil.copyfile(tmpfiles[-1].name, audio_outfile)
shutil.copyfile(tmpfiles[-1].name, audio_file)

# Optionally save new jams file
if jams_outfile is not None:
Expand Down Expand Up @@ -1748,9 +1748,13 @@ def _generate_audio(self, audio_path, ann, reverb=None,
input_array=event_audio,
sample_rate_in=event_sr
)
event_audio = event_audio.reshape(-1, self.n_channels)
# Write event_audio_array to disk so we can compute LUFS using ffmpeg
soundfile.write(
tmpfiles_internal[-1].name, event_audio.T, self.sr)
tmpfiles_internal[-1].name,
event_audio,
self.sr
)
# NOW compute LUFS
bg_lufs = get_integrated_lufs(
tmpfiles_internal[-1].name)
Expand Down Expand Up @@ -1806,8 +1810,13 @@ def _generate_audio(self, audio_path, ann, reverb=None,
input_array=event_audio,
sample_rate_in=event_sr
)
event_audio = event_audio.reshape(-1, self.n_channels)

soundfile.write(
tmpfiles_internal[-1].name, event_audio.T, self.sr)
tmpfiles_internal[-1].name,
event_audio,
self.sr
)
# NOW compute LUFS
fg_lufs = get_integrated_lufs(
tmpfiles_internal[-1].name)
Expand All @@ -1821,8 +1830,8 @@ def _generate_audio(self, audio_path, ann, reverb=None,
# (avoid unnatural sound onsets/offsets)
fade_in_samples = int(self.fade_in_len * self.sr)
fade_out_samples = int(self.fade_out_len * self.sr)
fade_in_window = np.sin(np.linspace(0, np.pi / 2, fade_in_samples))
fade_out_window = np.sin(np.linspace(np.pi / 2, 0, fade_out_samples))
fade_in_window = np.sin(np.linspace(0, np.pi / 2, fade_in_samples))[..., None]
fade_out_window = np.sin(np.linspace(np.pi / 2, 0, fade_out_samples))[..., None]

event_audio[:fade_in_samples] *= fade_in_window
event_audio[-fade_out_samples:] *= fade_out_window
Expand All @@ -1831,8 +1840,8 @@ def _generate_audio(self, audio_path, ann, reverb=None,
# soundscape duration
prepad = int(self.sr * e.value['event_time'])
postpad = max(0, duration_in_samples - (event_audio.shape[0] + prepad))
event_audio = np.pad(event_audio, ((prepad, postpad)), mode='constant',
constant_values=(0, 0))
event_audio = np.pad(event_audio, ((prepad, postpad), (0, 0)),
mode='constant', constant_values=(0, 0))
event_audio = event_audio[:duration_in_samples]

event_audio_list.append(event_audio[:duration_in_samples])
Expand Down Expand Up @@ -1860,7 +1869,7 @@ def _generate_audio(self, audio_path, ann, reverb=None,
# os.makedirs(..., exist_ok=True) but we test back to
# Python 2.7.
os.makedirs(event_folder)
soundfile.write(event_audio_path, event_audio_list[-1].T, self.sr)
soundfile.write(event_audio_path, event_audio_list[-1], self.sr, subtype='PCM_32')
isolated_events_audio_path.append(event_audio_path)

#TODO what do we do in this case? for now throw a warning
Expand All @@ -1883,13 +1892,13 @@ def _generate_audio(self, audio_path, ann, reverb=None,
if reverb is not None:
tfm.reverb(reverberance=reverb * 100)
# TODO: do we want to normalize the final output?

soundscape_audio = sum(event_audio_list)
soundscape_audio = tfm.build_array(
input_array=soundscape_audio,
sample_rate_in=self.sr,
)
soundfile.write(audio_path, soundscape_audio, self.sr)
soundscape_audio = soundscape_audio.reshape(-1, self.n_channels)
soundfile.write(audio_path, soundscape_audio, self.sr, subtype='PCM_32')

ann.sandbox.scaper.soundscape_audio_path = audio_path
ann.sandbox.scaper.isolated_events_audio_path = isolated_events_audio_path
Expand Down
2 changes: 1 addition & 1 deletion scaper/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
"""Version info"""

short_version = '1.3'
version = '1.3.8'
version = '1.3.9'
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@
],
install_requires=[
'sox==1.4.0b0',
'pyrsistent==0.15.4',
'jams>=0.3.2',
'numpy>=1.13.3',
'soundfile'
'soundfile',
],
extras_require={
'docs': [
Expand Down
172 changes: 116 additions & 56 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def test_generate_from_jams(atol=1e-5, rtol=1e-8):
pytest.raises(ScaperError, scaper.generate_from_jams, jam_file.name,
gen_file.name)

# Test for valid jams files
# Test for valid jams file
tmpfiles = []
with _close_temp_files(tmpfiles):

Expand Down Expand Up @@ -199,63 +199,113 @@ def test_generate_from_jams(atol=1e-5, rtol=1e-8):
pitch_shift=('uniform', -1, 1),
time_stretch=('uniform', 0.8, 1.2))

# generate, then generate from the jams and compare audio files
# repeat 5 times
for _ in range(5):
sc.generate(orig_wav_file.name, orig_jam_file.name,
disable_instantiation_warnings=True)
scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name)

def _validate_soundscape_and_event_audio(orig_wav_file, gen_wav_file,
gen_events_path, orig_events_path):
# validate audio
orig_wav, sr = soundfile.read(orig_wav_file.name)
gen_wav, sr = soundfile.read(gen_wav_file.name)
assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol)

# Now add in trimming!
# validate that the sum of event audio sums to trimmed soundscape
gen_event_files = [
os.path.join(gen_events_path, x)
for x in sorted(os.listdir(gen_events_path))
]
gen_audio = [soundfile.read(x)[0] for x in gen_event_files]

# Trim does not currently support trimming isolated events, but if/when
# we add that functionality, this test should be updated to test that
# as well, using the files in orig_events_path (currently unused).
# atol = 1e-4, to match test_generate_isolated_events
assert np.allclose(gen_wav, sum(gen_audio), atol=1e-8, rtol=rtol)

# generate, then generate from the jams and compare audio files
# repeat 5 times
for _ in range(5):
sc.generate(orig_wav_file.name, orig_jam_file.name,
disable_instantiation_warnings=True)
scaper.trim(orig_wav_file.name, orig_jam_file.name,
orig_wav_file.name, orig_jam_file.name,
np.random.uniform(0, 5), np.random.uniform(5, 10))
scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name)

# validate audio
orig_wav, sr = soundfile.read(orig_wav_file.name)
gen_wav, sr = soundfile.read(gen_wav_file.name)
assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol)

# Now add in trimming!
for _ in range(5):
with backports.tempfile.TemporaryDirectory() as isolated_events_path:
orig_events_path = os.path.join(isolated_events_path, 'original')
gen_events_path = os.path.join(isolated_events_path, 'generated')
os.makedirs(orig_events_path)
os.makedirs(gen_events_path)

sc.generate(orig_wav_file.name, orig_jam_file.name,
disable_instantiation_warnings=True,
save_isolated_events=True,
isolated_events_path=orig_events_path)
scaper.trim(orig_wav_file.name, orig_jam_file.name,
orig_wav_file.name, orig_jam_file.name,
np.random.uniform(0, 5), np.random.uniform(5, 10))
scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name,
save_isolated_events=True,
isolated_events_path=gen_events_path)

_validate_soundscape_and_event_audio(orig_wav_file, gen_wav_file,
gen_events_path, orig_events_path)

# Double trimming
for _ in range(2):
sc.generate(orig_wav_file.name, orig_jam_file.name,
disable_instantiation_warnings=True)
scaper.trim(orig_wav_file.name, orig_jam_file.name,
orig_wav_file.name, orig_jam_file.name,
np.random.uniform(0, 2), np.random.uniform(8, 10))
scaper.trim(orig_wav_file.name, orig_jam_file.name,
orig_wav_file.name, orig_jam_file.name,
np.random.uniform(0, 2), np.random.uniform(4, 6))
scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name)
with backports.tempfile.TemporaryDirectory() as isolated_events_path:
orig_events_path = os.path.join(isolated_events_path, 'original')
gen_events_path = os.path.join(isolated_events_path, 'generated')
os.makedirs(orig_events_path)
os.makedirs(gen_events_path)

sc.generate(orig_wav_file.name, orig_jam_file.name,
disable_instantiation_warnings=True,
save_isolated_events=True,
isolated_events_path=orig_events_path)
scaper.trim(orig_wav_file.name, orig_jam_file.name,
orig_wav_file.name, orig_jam_file.name,
np.random.uniform(0, 2), np.random.uniform(8, 10))
scaper.trim(orig_wav_file.name, orig_jam_file.name,
orig_wav_file.name, orig_jam_file.name,
np.random.uniform(0, 2), np.random.uniform(4, 6))
scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name,
save_isolated_events=True,
isolated_events_path=gen_events_path)

_validate_soundscape_and_event_audio(orig_wav_file, gen_wav_file,
gen_events_path, orig_events_path)

# Triple trimming
for _ in range(2):
sc.generate(orig_wav_file.name, orig_jam_file.name,
disable_instantiation_warnings=True)
scaper.trim(orig_wav_file.name, orig_jam_file.name,
orig_wav_file.name, orig_jam_file.name,
np.random.uniform(0, 2), np.random.uniform(8, 10))
scaper.trim(orig_wav_file.name, orig_jam_file.name,
orig_wav_file.name, orig_jam_file.name,
np.random.uniform(0, 1), np.random.uniform(5, 6))
scaper.trim(orig_wav_file.name, orig_jam_file.name,
orig_wav_file.name, orig_jam_file.name,
np.random.uniform(0, 1), np.random.uniform(3, 4))
scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name)

# validate audio
orig_wav, sr = soundfile.read(orig_wav_file.name)
gen_wav, sr = soundfile.read(gen_wav_file.name)
assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol)
with backports.tempfile.TemporaryDirectory() as isolated_events_path:
orig_events_path = os.path.join(isolated_events_path, 'original')
gen_events_path = os.path.join(isolated_events_path, 'generated')
os.makedirs(orig_events_path)
os.makedirs(gen_events_path)

sc.generate(orig_wav_file.name, orig_jam_file.name,
disable_instantiation_warnings=True,
save_isolated_events=True,
isolated_events_path=orig_events_path)
scaper.trim(orig_wav_file.name, orig_jam_file.name,
orig_wav_file.name, orig_jam_file.name,
np.random.uniform(0, 2), np.random.uniform(8, 10))
scaper.trim(orig_wav_file.name, orig_jam_file.name,
orig_wav_file.name, orig_jam_file.name,
np.random.uniform(0, 1), np.random.uniform(5, 6))
scaper.trim(orig_wav_file.name, orig_jam_file.name,
orig_wav_file.name, orig_jam_file.name,
np.random.uniform(0, 1), np.random.uniform(3, 4))
scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name,
save_isolated_events=True,
isolated_events_path=gen_events_path)

_validate_soundscape_and_event_audio(orig_wav_file, gen_wav_file,
gen_events_path, orig_events_path)

# Test with new FG and BG paths
for _ in range(5):
Expand Down Expand Up @@ -1268,21 +1318,23 @@ def _create_scaper_with_random_seed(seed):

def test_generate_audio():
for sr in SAMPLE_RATES:
REG_WAV_PATH = TEST_PATHS[sr]['REG'].wav
REG_BGONLY_WAV_PATH = TEST_PATHS[sr]['REG_BGONLY'].wav
REG_REVERB_WAV_PATH = TEST_PATHS[sr]['REG_REVERB'].wav
_test_generate_audio(sr, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_PATH)
for n_ch in range(1, 3):
REG_WAV_PATH = TEST_PATHS[sr]['REG'].wav
REG_BGONLY_WAV_PATH = TEST_PATHS[sr]['REG_BGONLY'].wav
REG_REVERB_WAV_PATH = TEST_PATHS[sr]['REG_REVERB'].wav
_test_generate_audio(sr, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_PATH, n_ch)


def _test_generate_audio(SR, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_PATH, atol=1e-4, rtol=1e-8):
def _test_generate_audio(SR, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_PATH, N_CHANNELS, atol=1e-4, rtol=1e-8):
# Regression test: same spec, same audio (not this will fail if we update
# any of the audio processing techniques used (e.g. change time stretching
# algorithm.
sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH)
sc.ref_db = -50
sc.sr = SR
sc.n_channels = N_CHANNELS

print("TEST SR: {}".format(SR))
print("TEST SR: {}, # OF CHANNELS: {}".format(SR, N_CHANNELS))

# background
sc.add_background(
Expand Down Expand Up @@ -1340,24 +1392,30 @@ def _test_generate_audio(SR, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_P
sc._generate_audio(wav_file.name, jam.annotations[0])

# validate audio
wav, sr = soundfile.read(wav_file.name)
regwav, sr = soundfile.read(REG_WAV_PATH)
assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
wav, sr = soundfile.read(wav_file.name, always_2d=True)
regwav, sr = soundfile.read(REG_WAV_PATH, always_2d=True)
# TODO: Add multi-channel regression data.
if N_CHANNELS == 1:
assert np.allclose(wav, regwav, atol=atol, rtol=rtol)

# with reverb
sc._generate_audio(wav_file.name, jam.annotations[0], reverb=0.2)
# validate audio
wav, sr = soundfile.read(wav_file.name)
regwav, sr = soundfile.read(REG_REVERB_WAV_PATH)
assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
wav, sr = soundfile.read(wav_file.name, always_2d=True)
regwav, sr = soundfile.read(REG_REVERB_WAV_PATH, always_2d=True)
# TODO: Add multi-channel regression data.
if N_CHANNELS == 1:
assert np.allclose(wav, regwav, atol=atol, rtol=rtol)

# Don't disable sox warnings (just to cover line)
sc._generate_audio(wav_file.name, jam.annotations[0],
disable_sox_warnings=False)
# validate audio
wav, sr = soundfile.read(wav_file.name)
regwav, sr = soundfile.read(REG_WAV_PATH)
assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
wav, sr = soundfile.read(wav_file.name, always_2d=True)
regwav, sr = soundfile.read(REG_WAV_PATH, always_2d=True)
# TODO: Add multi-channel regression data.
if N_CHANNELS == 1:
assert np.allclose(wav, regwav, atol=atol, rtol=rtol)

# namespace must be scaper
jam.annotations[0].namespace = 'tag_open'
Expand Down Expand Up @@ -1393,9 +1451,11 @@ def _test_generate_audio(SR, REG_WAV_PATH, REG_BGONLY_WAV_PATH, REG_REVERB_WAV_P
jam = sc._instantiate(disable_instantiation_warnings=True, reverb=reverb)
sc._generate_audio(wav_file.name, jam.annotations[0], reverb=reverb)
# validate audio
wav, sr = soundfile.read(wav_file.name)
regwav, sr = soundfile.read(REG_BGONLY_WAV_PATH)
assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
wav, sr = soundfile.read(wav_file.name, always_2d=True)
regwav, sr = soundfile.read(REG_BGONLY_WAV_PATH, always_2d=True)
# TODO: Add multi-channel regression data.
if N_CHANNELS == 1:
assert np.allclose(wav, regwav, atol=atol, rtol=rtol)


def create_scaper_scene_without_random_seed():
Expand Down Expand Up @@ -1518,7 +1578,7 @@ def _delete_files(mix_file, directory):
isolated_audio.append(_isolated_sandbox_audio)

# the sum of the isolated audio should sum to the soundscape
assert np.allclose(sum(isolated_audio), soundscape_audio, atol=1e-4, rtol=1e-8)
assert np.allclose(sum(isolated_audio), soundscape_audio, atol=1e-8, rtol=1e-8)

jam = sc._instantiate(disable_instantiation_warnings=True)

Expand Down

0 comments on commit 34e9831

Please sign in to comment.