justinsalamon · justinsalamon · Feb 3, 2020 · Apr 2, 2019 · Apr 3, 2019 · Feb 2, 2020
diff --git a/.gitignore b/.gitignore
@@ -8,4 +8,5 @@ build/*
 dist/*
 docs/_build/*
 scaper/bin/*
-.pytest_cache/*
+.pytest_cache/*
+**/.DS_Store
diff --git a/docs/changes.rst b/docs/changes.rst
@@ -3,6 +3,13 @@
 Changelog
 ---------
 
+v1.2.0
+~~~~~~
+- Added a random_state parameter to Scaper object, which allows all runs to be perfectly reproducible given the same audio and the same random seed.
+- Switched from numpydoc to napoleon for generating the documentation. Also switched Sphinx to the most recent version.
+- Added functions to Scaper object that allow one to reset the foreground and background event specifications independently. This allows users to reuse the same Scaper object and generate multiple soundscapes.
+- Added a function to Scaper that allows the user to set the random state after creation.
+
 v1.1.0
 ~~~~~~
 - Added functionality which modifies a source_time distribution tuple according to the duration of the source and the duration of the event.

diff --git a/docs/conf.py b/docs/conf.py
@@ -40,7 +40,7 @@
     'sphinx.ext.autosummary',
     'sphinx.ext.coverage',
     'sphinx.ext.viewcode',
-    'numpydoc',
+    'sphinx.ext.napoleon',
     'sphinx_issues'
 ]
 

diff --git a/docs/examples.rst b/docs/examples.rst
@@ -48,16 +48,24 @@ Example: synthesizing 1000 soundscapes in one go
     time_stretch_min = 0.8
     time_stretch_max = 1.2
 
+    # generate a random seed for this Scaper object
+    seed = 123
+
+    # create a scaper that will be used below
+    sc = scaper.Scaper(duration, fg_folder, bg_folder, random_state=seed)
+    sc.protected_labels = []
+    sc.ref_db = ref_db
+
     # Generate 1000 soundscapes using a truncated normal distribution of start times
 
     for n in range(n_soundscapes):
 
         print('Generating soundscape: {:d}/{:d}'.format(n+1, n_soundscapes))
 
-        # create a scaper
-        sc = scaper.Scaper(duration, fg_folder, bg_folder)
-        sc.protected_labels = []
-        sc.ref_db = ref_db
+        # reset the event specifications for foreground and background at the 
+        # beginning of each loop to clear all previously added events
+        sc.reset_bg_spec()
+        sc.reset_fg_spec()
 
         # add background
         sc.add_background(label=('const', 'noise'),

diff --git a/docs/tutorial.rst b/docs/tutorial.rst
@@ -100,6 +100,46 @@ when we add foreground events, we'll have to specify an ``snr``
 be louder (or softer) with respect to the background level specified by
 ``sc.ref_db``.
 
+Seeding the Scaper object for reproducibility
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A further argument can be specified to the ``Scaper`` object:
+
+- The random state: this can be either a numpy.random.RandomState object or an integer.
+  In the latter case, a random state will be constructed. The random state is what will 
+  be used for drawing from any distributions. If the audio kept in all of the folders is
+  exactly the same and the random state is fixed between runs, the same soundscape will be 
+  generated both times. If you don't define any random state or set seed to None, runs 
+  will be random and not reproducible. You can use np.random.get_state() to reproduce 
+  the run after the fact by recording the seed that was used somewhere.
+
+This can be specified like so (e.g. for a random seed of 0):
+
+.. code-block:: python
+
+    import scaper
+    import os
+    soundscape_duration = 10.0
+    seed = 123
+    foreground_folder = os.path.expanduser('~/audio/foreground/')
+    background_folder = os.path.expanduser('~/audio/background/')
+    sc = scaper.Scaper(soundscape_duration, foreground_folder, background_folder, 
+                       random_state=seed)
+    sc.ref_db = -20
+
+If the random state is not specified, it defaults to the old behavior which just uses
+the RandomState used by np.random. You can also set the random state after creation
+via ``Scaper.set_random_state``. Alternatively, you can set the random state directly:
+
+.. code-block:: python
+
+    import numpy as np
+    seed = np.random.RandomState(123)
+    sc = scaper.Scaper(soundscape_duration, foreground_folder, background_folder, 
+                       random_state=seed)
+    sc.ref_db = -20
+
+
 Adding a background and foreground sound events
 -----------------------------------------------
 

diff --git a/scaper/core.py b/scaper/core.py
@@ -1,5 +1,4 @@
 import sox
-import random
 import os
 import warnings
 import jams
@@ -17,18 +16,23 @@
 from .util import _get_sorted_files
 from .util import _validate_folder_path
 from .util import _populate_label_list
-from .util import _trunc_norm
+from .util import _check_random_state
+from .util import _sample_trunc_norm
+from .util import _sample_uniform
+from .util import _sample_choose
+from .util import _sample_normal
+from .util import _sample_const
 from .util import max_polyphony
 from .util import polyphony_gini
 from .util import is_real_number, is_real_array
 from .audio import get_integrated_lufs
 from .version import version as scaper_version
 
-SUPPORTED_DIST = {"const": lambda x: x,
-                  "choose": lambda x: random.choice(x),
-                  "uniform": random.uniform,
-                  "normal": random.normalvariate,
-                  "truncnorm": _trunc_norm}
+SUPPORTED_DIST = {"const": _sample_const,
+                  "choose": _sample_choose,
+                  "uniform": _sample_uniform,
+                  "normal": _sample_normal,
+                  "truncnorm": _sample_trunc_norm}
 
 # Define single event spec as namedtuple
 EventSpec = namedtuple(
@@ -245,8 +249,7 @@ def trim(audio_infile, jams_infile, audio_outfile, jams_outfile, start_time,
                 # Copy result back to original file
                 shutil.copyfile(tmpfiles[-1].name, audio_outfile)
 
-
-def _get_value_from_dist(dist_tuple):
+def _get_value_from_dist(dist_tuple, random_state):
     '''
     Sample a value from the provided distribution tuple.
 
@@ -274,7 +277,7 @@ def _get_value_from_dist(dist_tuple):
     '''
     # Make sure it's a valid distribution tuple
     _validate_distribution(dist_tuple)
-    return SUPPORTED_DIST[dist_tuple[0]](*dist_tuple[1:])
+    return SUPPORTED_DIST[dist_tuple[0]](*dist_tuple[1:], random_state=random_state)
 
 
 def _validate_distribution(dist_tuple):
@@ -825,22 +828,27 @@ class Scaper(object):
         Path to foreground folder.
     bg_path : str
         Path to background folder.
-    protected_labels : list
+    protected_labels : list 
         Provide a list of protected foreground labels. When a foreground
         label is in the protected list it means that when a sound event
         matching the label gets added to a soundscape instantiation the
         duration of the source audio file cannot be altered, and the
         duration value that was provided in the specification will be
-        ignored.
-
-        Adding labels to the protected list is useful for sound events
+        ignored. Adding labels to the protected list is useful for sound events
         whose semantic validity would be lost if the sound were trimmed
         before the sound event ends, for example an animal vocalization
         such as a dog bark.
-
+    random_state : int, RandomState instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number 
+        generator; If RandomState instance, random_state is the random number 
+        generator; If None, the random number generator is the RandomState 
+        instance used by np.random. Note that if the random state is passed as a 
+        RandomState instance, it is passed by reference, not value. This will lead to
+        the Scaper object advancing the state of the random state object if you use
+        it elsewhere.
     '''
 
-    def __init__(self, duration, fg_path, bg_path, protected_labels=[]):
+    def __init__(self, duration, fg_path, bg_path, protected_labels=[], random_state=None):
         '''
         Create a Scaper object.
 
@@ -852,19 +860,24 @@ def __init__(self, duration, fg_path, bg_path, protected_labels=[]):
             Path to foreground folder.
         bg_path : str
             Path to background folder.
-        protected_labels : list
+        protected_labels : list 
             Provide a list of protected foreground labels. When a foreground
             label is in the protected list it means that when a sound event
             matching the label gets added to a soundscape instantiation the
             duration of the source audio file cannot be altered, and the
             duration value that was provided in the specification will be
-            ignored.
-
-            Adding labels to the protected list is useful for sound events
+            ignored. Adding labels to the protected list is useful for sound events
             whose semantic validity would be lost if the sound were trimmed
             before the sound event ends, for example an animal vocalization
             such as a dog bark.
-
+        random_state : int, RandomState instance or None, optional (default=None)
+            If int, random_state is the seed used by the random number 
+            generator; If RandomState instance, random_state is the random number 
+            generator; If None, the random number generator is the RandomState 
+            instance used by np.random. Note that if the random state is passed as a 
+            RandomState instance, it is passed by reference, not value. This will lead to
+            the Scaper object advancing the state of the random state object if you use
+            it elsewhere.
         '''
         # Duration must be a positive real number
         if np.isrealobj(duration) and duration > 0:
@@ -900,6 +913,51 @@ def __init__(self, duration, fg_path, bg_path, protected_labels=[]):
         # Copy list of protected labels
         self.protected_labels = protected_labels[:]
 
+        # Get random number generator
+        self.random_state = _check_random_state(random_state)
+
+    def reset_fg_event_spec(self):
+        '''
+        Resets the foreground event specification to be an empty list as it is when
+        the Scaper object is initialized in the first place. This allows the same
+        Scaper object to be used over and over again to generate new soundscapes
+        with the same underlying settings (e.g. `ref_db`, `num_channels`, and so on.)
+
+        See Also
+        --------
+        Scaper.reset_bg_event_spec : Same functionality but resets the background
+        event specification instead of the foreground specification.
+        '''
+        self.fg_spec = []
+
+    def reset_bg_event_spec(self):
+        '''
+        Resets the background event specification to be an empty list as it is when
+        the Scaper object is initialized in the first place. This allows the same
+        Scaper object to be used over and over again to generate new soundscapes
+        with the same underlying settings (e.g. `ref_db`, `num_channels`, and so on.)
+
+        See Also
+        --------
+        Scaper.reset_fg_event_spec : Same functionality but resets the foreground
+        event specification instead of the foreground specification.
+        '''
+        self.bg_spec = []
+
+    def set_random_state(self, random_state):
+        '''
+        Allows the user to set the random state after creating the Scaper object.
+
+        Parameters
+        ----------
+        random_state : int, RandomState instance or None, optional (default=None)
+            If int, random_state is the seed used by the random number 
+            generator; If RandomState instance, random_state is the random number 
+            generator; If None, the random number generator is the RandomState 
+            instance used by np.random.
+        '''
+        self.random_state = _check_random_state(random_state)
+
     def add_background(self, label, source_file, source_time):
         '''
         Add a background recording to the background specification.
@@ -1177,7 +1235,7 @@ def _instantiate_event(self, event, isbackground=False,
             label_tuple = tuple(label_tuple)
         else:
             label_tuple = event.label
-        label = _get_value_from_dist(label_tuple)
+        label = _get_value_from_dist(label_tuple, self.random_state)
 
         # Make sure we can use this label
         if (not allow_repeated_label) and (label in used_labels):
@@ -1189,7 +1247,7 @@ def _instantiate_event(self, event, isbackground=False,
                     "allow_repeated_label=False.".format(label))
             else:
                 while label in used_labels:
-                    label = _get_value_from_dist(label_tuple)
+                    label = _get_value_from_dist(label_tuple, self.random_state)
 
         # Update the used labels list
         if label not in used_labels:
@@ -1205,7 +1263,7 @@ def _instantiate_event(self, event, isbackground=False,
         else:
             source_file_tuple = event.source_file
 
-        source_file = _get_value_from_dist(source_file_tuple)
+        source_file = _get_value_from_dist(source_file_tuple, self.random_state)
 
         # Make sure we can use this source file
         if (not allow_repeated_source) and (source_file in used_source_files):
@@ -1218,7 +1276,7 @@ def _instantiate_event(self, event, isbackground=False,
                     "allow_repeated_source=False.".format(label))
             else:
                 while source_file in used_source_files:
-                    source_file = _get_value_from_dist(source_file_tuple)
+                    source_file = _get_value_from_dist(source_file_tuple, self.random_state)
 
         # Update the used source files list
         if source_file not in used_source_files:
@@ -1238,7 +1296,9 @@ def _instantiate_event(self, event, isbackground=False,
             # potentially be non-positive, hence the loop.
             event_duration = -np.Inf
             while event_duration <= 0:
-                event_duration = _get_value_from_dist(event.event_duration)
+                event_duration = _get_value_from_dist(
+                    event.event_duration, self.random_state
+                )
 
         # Check if chosen event duration is longer than the duration of the
         # selected source file, if so adjust the event duration.
@@ -1259,7 +1319,9 @@ def _instantiate_event(self, event, isbackground=False,
         else:
             time_stretch = -np.Inf
             while time_stretch <= 0:
-                time_stretch = _get_value_from_dist(event.time_stretch)
+                time_stretch = _get_value_from_dist(
+                    event.time_stretch, self.random_state
+                )
             # compute duration after stretching
             event_duration_stretched = event_duration * time_stretch
 
@@ -1306,7 +1368,8 @@ def _instantiate_event(self, event, isbackground=False,
             # if it happens again, just use the old method.
             source_time = -np.Inf
             while source_time < 0:
-                source_time = _get_value_from_dist(modified_source_time)
+                source_time = _get_value_from_dist(
+                    modified_source_time, self.random_state)
                 if source_time + event_duration > source_duration:
                     source_time = source_duration - event_duration
                     warn = True
@@ -1342,7 +1405,9 @@ def _instantiate_event(self, event, isbackground=False,
         # foreground events it's not.
         event_time = -np.Inf
         while event_time < 0:
-            event_time = _get_value_from_dist(event.event_time)
+            event_time = _get_value_from_dist(
+                event.event_time, self.random_state
+            )
 
         # Make sure the selected event time + event duration are is not greater
         # than the total duration of the soundscape, if it is adjust the event
@@ -1374,15 +1439,15 @@ def _instantiate_event(self, event, isbackground=False,
                         ScaperWarning)
 
         # determine snr
-        snr = _get_value_from_dist(event.snr)
+        snr = _get_value_from_dist(event.snr, self.random_state)
 
         # get role (which can only take "foreground" or "background" and
         # is set internally, not by the user).
         role = event.role
 
         # determine pitch_shift
         if event.pitch_shift is not None:
-            pitch_shift = _get_value_from_dist(event.pitch_shift)
+            pitch_shift = _get_value_from_dist(event.pitch_shift, self.random_state)
         else:
             pitch_shift = None
 
@@ -1396,6 +1461,7 @@ def _instantiate_event(self, event, isbackground=False,
                                        role=role,
                                        pitch_shift=pitch_shift,
                                        time_stretch=time_stretch)
+
         # Return
         return instantiated_event